Build a model using linear regression algorithm on any dataset.
import seaborn as sns
[ ]: iris = sns.load_dataset('iris')
[ ]: iris
[ ]: sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
.. … … … … …
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica
[150 rows x 5 columns]
[ ]: iris = iris[['petal_length','petal_width']]
[ ]: iris
[ ]: petal_length petal_width
0 1.4 0.2
1 1.4 0.2
2 1.3 0.2
3 1.5 0.2
4 1.4 0.2
.. … …
145 5.2 2.3
146 5.0 1.9
147 5.2 2.0
148 5.4 2.3
149 5.1 1.8
1
[150 rows x 2 columns]
[ ]: x=iris['petal_length']
[ ]: y=iris['petal_width']
[ ]: import matplotlib.pyplot as plt
plt.scatter(x,y)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
[ ]: Text(0, 0.5, 'Petal Width')
[ ]: from sklearn.model_selection import train_test_split
[ ]: x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.
↪4,random_state=23)
[ ]: x_train
2
[ ]: 77 5.0
29 1.6
92 4.0
23 1.7
128 5.6
…
39 1.5
91 4.6
31 1.5
40 1.3
83 5.1
Name: petal_length, Length: 90, dtype: float64
[ ]: x_train.shape
[ ]: (90,)
[ ]: import numpy as np
[ ]: x_train=np.array(x_train).reshape(-1,1)
[ ]: x_train
[ ]: array([[5. ],
[1.6],
[4. ],
[1.7],
[5.6],
[4. ],
[4.8],
[5.6],
[5.1],
[4.9],
[1.4],
[1.6],
[5.6],
[1.4],
[1.6],
[5.5],
[5.1],
[4. ],
[1.4],
[4.1],
[5.3],
[4.5],
[5.8],
[6.6],
3
[4.3],
[1.3],
[4. ],
[4.9],
[4.9],
[4. ],
[1.5],
[4.5],
[4.5],
[3.9],
[5. ],
[4.8],
[3.8],
[5.1],
[6.3],
[6.1],
[1.2],
[5.7],
[3. ],
[1.5],
[5.9],
[4.8],
[1.4],
[4.5],
[4.2],
[5.2],
[1.3],
[1. ],
[3.5],
[1.1],
[4.7],
[4.2],
[1.2],
[5.8],
[4.3],
[4.5],
[1.6],
[6.9],
[4.6],
[5.1],
[5.6],
[4.7],
[1.5],
[1.6],
[5.5],
[5.8],
[4.4],
4
[1.3],
[5.2],
[3.3],
[5.7],
[3.5],
[1.3],
[1.5],
[1.5],
[5.1],
[1.4],
[1.4],
[4.9],
[1.4],
[4.4],
[1.5],
[4.6],
[1.5],
[1.3],
[5.1]])
[ ]: x_train.shape
[ ]: (90, 1)
[ ]: x_test = np.array(x_test).reshape(-1,1)
[ ]: x_test
[ ]: array([[5.4],
[6. ],
[4.1],
[1.5],
[5. ],
[4.9],
[1.7],
[5.5],
[1.7],
[3.6],
[4.7],
[1.6],
[5.9],
[1.5],
[1.5],
[5.1],
[4.5],
[4.7],
[6.1],
5
[1.4],
[5.3],
[1.4],
[1.6],
[1.3],
[5.6],
[1.4],
[1.9],
[4.8],
[4.4],
[3.9],
[1.5],
[3.9],
[1.3],
[6.7],
[1.5],
[1.7],
[4.6],
[3.3],
[4.2],
[6. ],
[5.7],
[1.9],
[3.7],
[1.4],
[4.4],
[5. ],
[4.5],
[6.4],
[1.5],
[4.1],
[6.1],
[5.4],
[1.4],
[5.6],
[4.5],
[4.7],
[6.7],
[4.2],
[1.4],
[5.1]])
[ ]: from sklearn.linear_model import LinearRegression
[ ]: lr = LinearRegression()
[ ]: lr.fit(x_train, y_train)
6
[ ]: LinearRegression()
[ ]: c = lr.intercept_
[ ]: c
[ ]: -0.3511327422143746
[ ]: m = lr.coef_
[ ]: m
[ ]: array([0.41684538])
[ ]: y_predit_train = m*x_train + c
[ ]: y_predit_train.flatten
[ ]: <function ndarray.flatten>
[ ]: y_predit_train1 = lr.predict(x_train)
[ ]: y_predit_train1
[ ]: array([1.73309416, 0.31581987, 1.31624878, 0.3575044 , 1.98320139,
1.31624878, 1.64972508, 1.98320139, 1.7747787 , 1.69140962,
0.23245079, 0.31581987, 1.98320139, 0.23245079, 0.31581987,
1.94151685, 1.7747787 , 1.31624878, 0.23245079, 1.35793332,
1.85814777, 1.52467147, 2.06657046, 2.40004677, 1.44130239,
0.19076625, 1.31624878, 1.69140962, 1.69140962, 1.31624878,
0.27413533, 1.52467147, 1.52467147, 1.27456424, 1.73309416,
1.64972508, 1.2328797 , 1.7747787 , 2.27499315, 2.19162408,
0.14908171, 2.02488593, 0.8994034 , 0.27413533, 2.108255 ,
1.64972508, 0.23245079, 1.52467147, 1.39961786, 1.81646324,
0.19076625, 0.06571264, 1.10782609, 0.10739718, 1.60804055,
1.39961786, 0.14908171, 2.06657046, 1.44130239, 1.52467147,
0.31581987, 2.52510038, 1.56635601, 1.7747787 , 1.98320139,
1.60804055, 0.27413533, 0.31581987, 1.94151685, 2.06657046,
1.48298693, 0.19076625, 1.81646324, 1.02445701, 2.02488593,
1.10782609, 0.19076625, 0.27413533, 0.27413533, 1.7747787 ,
0.23245079, 0.23245079, 1.69140962, 0.23245079, 1.48298693,
0.27413533, 1.56635601, 0.27413533, 0.19076625, 1.7747787 ])
[ ]: import matplotlib.pyplot as plt
plt.scatter(x_train,y_train)
plt.plot(x_train,y_predit_train1, color = 'red')
plt.xlabel('Petal Length')