import quandl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import datetime

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression


quandl.ApiConfig.api_key = 'oNkm5gT_RW9pc8ZJy41k'


df = quandl.get("WIKI/TSLA")

df.head()


df = df[['Adj. Close']]
df.head()


df['Adj. Close'].plot(figsize= (15,6), color= 'black')
plt.legend(loc='upper left')
plt.show()


forecast = 30

df['Predictions'] = df[['Adj. Close']].shift(-forecast)
df


X = np.array(df.drop(['Predictions'], 1))

#Standardising our data

X = preprocessing.scale(X)

X_forecast = X[-forecast:]

X = X[:-forecast]


y = np.array(df['Predictions'])
y = y[:-forecast]


#Split into training and test data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)


#Initialize the linear regression model

clf = LinearRegression()
clf.fit(X_train, y_train)

#Confidence of the model
cnf = clf.score(X_test, y_test)


plt.plot(X, y)

[<matplotlib.lines.Line2D at 0x243e451ada0>]


#Predicted values after the forecast period
predicted_forecast = clf.predict(X_forecast)

print(predicted_forecast)

[324.13021258 322.81289631 334.28330576 335.67380627 334.97123759
 333.53682654 346.09524165 351.83288585 357.07287724 350.79854864
 343.06053528 331.22420464 335.31276403 333.58561603 328.56029841
 332.56103671 329.43850925 327.55523488 345.45122037 341.87007169
 327.02830837 326.02324485 321.87613807 314.27473529 311.33760791
 317.17283109 309.92271265 302.54574154 305.12182669 280.72708093]


#Visualize the predicted values

dates = pd.date_range(start = "2018-03-28", end = "2018-04-26")

plt.plot(dates, predicted_forecast, color = 'b')
df['Adj. Close'].plot(color='g')

plt.xlim(xmin = datetime.date(2017,4,26))

(736445.0, 736952.9)

	Open	High	Low	Close	Volume	Ex-Dividend	Split Ratio	Adj. Open	Adj. High	Adj. Low	Adj. Close	Adj. Volume
Date
2010-06-29	19.00	25.0000	17.54	23.89	18766300.0	0.0	1.0	19.00	25.0000	17.54	23.89	18766300.0
2010-06-30	25.79	30.4192	23.30	23.83	17187100.0	0.0	1.0	25.79	30.4192	23.30	23.83	17187100.0
2010-07-01	25.00	25.9200	20.27	21.96	8218800.0	0.0	1.0	25.00	25.9200	20.27	21.96	8218800.0
2010-07-02	23.00	23.1000	18.71	19.20	5139800.0	0.0	1.0	23.00	23.1000	18.71	19.20	5139800.0
2010-07-06	20.00	20.0000	15.83	16.11	6866900.0	0.0	1.0	20.00	20.0000	15.83	16.11	6866900.0

	Adj. Close
Date
2010-06-29	23.89
2010-06-30	23.83
2010-07-01	21.96
2010-07-02	19.20
2010-07-06	16.11

	Adj. Close	Predictions
Date
2010-06-29	23.89	17.90
2010-06-30	23.83	17.60
2010-07-01	21.96	18.32
2010-07-02	19.20	18.78
2010-07-06	16.11	19.15
...	...	...
2018-03-21	316.53	NaN
2018-03-22	309.10	NaN
2018-03-23	301.54	NaN
2018-03-26	304.18	NaN
2018-03-27	279.18	NaN

Predicting Stock Prices¶

Since the key column in this dataframe is the Adjusted close cloumn, we create a data frame with the date and ajusted close columns.¶

Creating our X variable¶

Creating our y variable¶