In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [4]:
dataset = pd.read_csv('Salary.csv')
In [7]:
dataset
Out[7]:
YearsExperience Salary
0 1.1 39343
1 1.3 46205
2 1.5 37731
3 2.0 43525
4 2.2 39891
5 2.9 56642
6 3.0 60150
7 3.2 54445
8 3.2 64445
9 3.7 57189
10 3.9 63218
11 4.0 55794
12 4.0 56957
13 4.1 57081
14 4.5 61111
15 4.9 67938
16 5.1 66029
17 5.3 83088
18 5.9 81363
19 6.0 93940
20 6.8 91738
21 7.1 98273
22 7.9 101302
23 8.2 113812
24 8.7 109431
25 9.0 105582
26 9.5 116969
27 9.6 112635
28 10.3 122391
29 10.5 121872
30 11.2 127345
31 11.5 126756
32 12.3 128765
33 12.9 135675
34 13.5 139465
In [13]:
x=dataset.iloc[:,:1].values
#x
In [11]:
y=dataset.iloc[:,1:].values
#y
In [14]:
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax.scatter(x,y,color='r')
Out[14]:
<matplotlib.collections.PathCollection at 0x8c9e8b0>
In [20]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
In [21]:
from sklearn.linear_model import LinearRegression
In [22]:
regressor=LinearRegression()
In [23]:
regressor.fit(x_train,y_train)
Out[23]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
In [24]:
y_pred=regressor.predict(x_test)
In [25]:
y_pred
Out[25]:
array([[120057.87672477],
       [ 88127.64484315],
       [ 73456.99776241],
       [118331.91824468],
       [ 97620.41648363],
       [ 71731.03928232],
       [ 63101.24688189]])
In [26]:
y_test
Out[26]:
array([[121872],
       [ 91738],
       [ 66029],
       [122391],
       [101302],
       [ 67938],
       [ 63218]], dtype=int64)
In [28]:
plt.scatter(x,y,color='r')
plt.plot(x,regressor.predict(x),color='blue')
Out[28]:
[<matplotlib.lines.Line2D at 0xc16f610>]
In [32]:
from sklearn.preprocessing import PolynomialFeatures
In [33]:
poly=PolynomialFeatures(degree=2)
x_poly=poly.fit_transform(x)
In [34]:
regressor.fit(x_poly,y)
Out[34]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
In [35]:
plt.scatter(x,y,color='r')
plt.plot(x,regressor.predict(poly.fit_transform(x)),color='blue')
Out[35]:
[<matplotlib.lines.Line2D at 0xc2a9630>]
In [37]:
y_pred=regressor.predict(poly.fit_transform(x))
In [38]:
y_pred
Out[38]:
array([[ 34222.96719981],
       [ 36421.1693715 ],
       [ 38603.85950911],
       [ 43992.71970409],
       [ 46121.11772245],
       [ 53448.35351841],
       [ 54479.58945519],
       [ 56530.42730319],
       [ 56530.42730319],
       [ 61589.65677413],
       [ 63586.20250289],
       [ 64578.65835449],
       [ 64578.65835449],
       [ 65567.23619757],
       [ 69482.76748471],
       [ 73336.25063557],
       [ 75239.72415989],
       [ 77127.68565014],
       [ 82698.49791645],
       [ 83613.39359769],
       [ 90792.95074096],
       [ 93421.29752913],
       [100259.58992282],
       [102759.9624299 ],
       [106849.68977135],
       [109256.990074  ],
       [113191.59707473],
       [113966.88444932],
       [119285.31183296],
       [120769.96043733],
       [125844.07328434],
       [127960.52294814],
       [133433.75634349],
       [137375.80503225],
       [141178.24541436]])
In [40]:
y
Out[40]:
array([[ 39343],
       [ 46205],
       [ 37731],
       [ 43525],
       [ 39891],
       [ 56642],
       [ 60150],
       [ 54445],
       [ 64445],
       [ 57189],
       [ 63218],
       [ 55794],
       [ 56957],
       [ 57081],
       [ 61111],
       [ 67938],
       [ 66029],
       [ 83088],
       [ 81363],
       [ 93940],
       [ 91738],
       [ 98273],
       [101302],
       [113812],
       [109431],
       [105582],
       [116969],
       [112635],
       [122391],
       [121872],
       [127345],
       [126756],
       [128765],
       [135675],
       [139465]], dtype=int64)
In [ ]: