import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset = pd.read_csv('Salary.csv')
dataset
| YearsExperience | Salary | |
|---|---|---|
| 0 | 1.1 | 39343 |
| 1 | 1.3 | 46205 |
| 2 | 1.5 | 37731 |
| 3 | 2.0 | 43525 |
| 4 | 2.2 | 39891 |
| 5 | 2.9 | 56642 |
| 6 | 3.0 | 60150 |
| 7 | 3.2 | 54445 |
| 8 | 3.2 | 64445 |
| 9 | 3.7 | 57189 |
| 10 | 3.9 | 63218 |
| 11 | 4.0 | 55794 |
| 12 | 4.0 | 56957 |
| 13 | 4.1 | 57081 |
| 14 | 4.5 | 61111 |
| 15 | 4.9 | 67938 |
| 16 | 5.1 | 66029 |
| 17 | 5.3 | 83088 |
| 18 | 5.9 | 81363 |
| 19 | 6.0 | 93940 |
| 20 | 6.8 | 91738 |
| 21 | 7.1 | 98273 |
| 22 | 7.9 | 101302 |
| 23 | 8.2 | 113812 |
| 24 | 8.7 | 109431 |
| 25 | 9.0 | 105582 |
| 26 | 9.5 | 116969 |
| 27 | 9.6 | 112635 |
| 28 | 10.3 | 122391 |
| 29 | 10.5 | 121872 |
| 30 | 11.2 | 127345 |
| 31 | 11.5 | 126756 |
| 32 | 12.3 | 128765 |
| 33 | 12.9 | 135675 |
| 34 | 13.5 | 139465 |
x=dataset.iloc[:,:1].values
#x
y=dataset.iloc[:,1:].values
#y
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax.scatter(x,y,color='r')
<matplotlib.collections.PathCollection at 0x8c9e8b0>
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(x_train,y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
y_pred=regressor.predict(x_test)
y_pred
array([[120057.87672477],
[ 88127.64484315],
[ 73456.99776241],
[118331.91824468],
[ 97620.41648363],
[ 71731.03928232],
[ 63101.24688189]])
y_test
array([[121872],
[ 91738],
[ 66029],
[122391],
[101302],
[ 67938],
[ 63218]], dtype=int64)
plt.scatter(x,y,color='r')
plt.plot(x,regressor.predict(x),color='blue')
[<matplotlib.lines.Line2D at 0xc16f610>]
from sklearn.preprocessing import PolynomialFeatures
poly=PolynomialFeatures(degree=2)
x_poly=poly.fit_transform(x)
regressor.fit(x_poly,y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
plt.scatter(x,y,color='r')
plt.plot(x,regressor.predict(poly.fit_transform(x)),color='blue')
[<matplotlib.lines.Line2D at 0xc2a9630>]
y_pred=regressor.predict(poly.fit_transform(x))
y_pred
array([[ 34222.96719981],
[ 36421.1693715 ],
[ 38603.85950911],
[ 43992.71970409],
[ 46121.11772245],
[ 53448.35351841],
[ 54479.58945519],
[ 56530.42730319],
[ 56530.42730319],
[ 61589.65677413],
[ 63586.20250289],
[ 64578.65835449],
[ 64578.65835449],
[ 65567.23619757],
[ 69482.76748471],
[ 73336.25063557],
[ 75239.72415989],
[ 77127.68565014],
[ 82698.49791645],
[ 83613.39359769],
[ 90792.95074096],
[ 93421.29752913],
[100259.58992282],
[102759.9624299 ],
[106849.68977135],
[109256.990074 ],
[113191.59707473],
[113966.88444932],
[119285.31183296],
[120769.96043733],
[125844.07328434],
[127960.52294814],
[133433.75634349],
[137375.80503225],
[141178.24541436]])
y
array([[ 39343],
[ 46205],
[ 37731],
[ 43525],
[ 39891],
[ 56642],
[ 60150],
[ 54445],
[ 64445],
[ 57189],
[ 63218],
[ 55794],
[ 56957],
[ 57081],
[ 61111],
[ 67938],
[ 66029],
[ 83088],
[ 81363],
[ 93940],
[ 91738],
[ 98273],
[101302],
[113812],
[109431],
[105582],
[116969],
[112635],
[122391],
[121872],
[127345],
[126756],
[128765],
[135675],
[139465]], dtype=int64)