import quandl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
quandl.ApiConfig.api_key = 'oNkm5gT_RW9pc8ZJy41k'
df = quandl.get("WIKI/TSLA")
df.head()
Open | High | Low | Close | Volume | Ex-Dividend | Split Ratio | Adj. Open | Adj. High | Adj. Low | Adj. Close | Adj. Volume | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2010-06-29 | 19.00 | 25.0000 | 17.54 | 23.89 | 18766300.0 | 0.0 | 1.0 | 19.00 | 25.0000 | 17.54 | 23.89 | 18766300.0 |
2010-06-30 | 25.79 | 30.4192 | 23.30 | 23.83 | 17187100.0 | 0.0 | 1.0 | 25.79 | 30.4192 | 23.30 | 23.83 | 17187100.0 |
2010-07-01 | 25.00 | 25.9200 | 20.27 | 21.96 | 8218800.0 | 0.0 | 1.0 | 25.00 | 25.9200 | 20.27 | 21.96 | 8218800.0 |
2010-07-02 | 23.00 | 23.1000 | 18.71 | 19.20 | 5139800.0 | 0.0 | 1.0 | 23.00 | 23.1000 | 18.71 | 19.20 | 5139800.0 |
2010-07-06 | 20.00 | 20.0000 | 15.83 | 16.11 | 6866900.0 | 0.0 | 1.0 | 20.00 | 20.0000 | 15.83 | 16.11 | 6866900.0 |
df = df[['Adj. Close']]
df.head()
Adj. Close | |
---|---|
Date | |
2010-06-29 | 23.89 |
2010-06-30 | 23.83 |
2010-07-01 | 21.96 |
2010-07-02 | 19.20 |
2010-07-06 | 16.11 |
df['Adj. Close'].plot(figsize= (15,6), color= 'black')
plt.legend(loc='upper left')
plt.show()
forecast = 30
df['Predictions'] = df[['Adj. Close']].shift(-forecast)
df
Adj. Close | Predictions | |
---|---|---|
Date | ||
2010-06-29 | 23.89 | 17.90 |
2010-06-30 | 23.83 | 17.60 |
2010-07-01 | 21.96 | 18.32 |
2010-07-02 | 19.20 | 18.78 |
2010-07-06 | 16.11 | 19.15 |
... | ... | ... |
2018-03-21 | 316.53 | NaN |
2018-03-22 | 309.10 | NaN |
2018-03-23 | 301.54 | NaN |
2018-03-26 | 304.18 | NaN |
2018-03-27 | 279.18 | NaN |
1949 rows × 2 columns
X = np.array(df.drop(['Predictions'], 1))
#Standardising our data
X = preprocessing.scale(X)
X_forecast = X[-forecast:]
X = X[:-forecast]
y = np.array(df['Predictions'])
y = y[:-forecast]
#Split into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
#Initialize the linear regression model
clf = LinearRegression()
clf.fit(X_train, y_train)
#Confidence of the model
cnf = clf.score(X_test, y_test)
plt.plot(X, y)
[<matplotlib.lines.Line2D at 0x243e451ada0>]
#Predicted values after the forecast period
predicted_forecast = clf.predict(X_forecast)
print(predicted_forecast)
[324.13021258 322.81289631 334.28330576 335.67380627 334.97123759 333.53682654 346.09524165 351.83288585 357.07287724 350.79854864 343.06053528 331.22420464 335.31276403 333.58561603 328.56029841 332.56103671 329.43850925 327.55523488 345.45122037 341.87007169 327.02830837 326.02324485 321.87613807 314.27473529 311.33760791 317.17283109 309.92271265 302.54574154 305.12182669 280.72708093]
#Visualize the predicted values
dates = pd.date_range(start = "2018-03-28", end = "2018-04-26")
plt.plot(dates, predicted_forecast, color = 'b')
df['Adj. Close'].plot(color='g')
plt.xlim(xmin = datetime.date(2017,4,26))
(736445.0, 736952.9)