import pandas as pd
[5]: dataset=pd.read_csv("Salary_Data.csv")
dataset
type(dataset)
[5]: pandas.core.frame.DataFrame
[6]: dataset
[6]: YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
5 2.9 NaN
6 3.0 60150.0
7 3.2 54445.0
8 3.2 64445.0
9 3.7 57189.0
10 3.9 63218.0
11 NaN 55794.0
12 4.0 56957.0
13 4.1 57081.0
14 4.5 61111.0
15 4.9 67938.0
16 5.1 66029.0
17 5.3 83088.0
18 5.9 81363.0
19 6.0 93940.0
20 6.8 91738.0
21 7.1 NaN
22 7.9 101302.0
23 8.2 113812.0
24 8.7 109431.0
25 9.0 NaN
1
26 9.5 116969.0
27 9.6 112635.0
28 NaN 122391.0
29 10.5 121872.0
[7]: X=dataset.iloc[:,:-1].values
[8]: X
[8]: array([[ 1.1],
[ 1.3],
[ 1.5],
[ 2. ],
[ 2.2],
[ 2.9],
[ 3. ],
[ 3.2],
[ 3.2],
[ 3.7],
[ 3.9],
[ nan],
[ 4. ],
[ 4.1],
[ 4.5],
[ 4.9],
[ 5.1],
[ 5.3],
[ 5.9],
[ 6. ],
[ 6.8],
[ 7.1],
[ 7.9],
[ 8.2],
[ 8.7],
[ 9. ],
[ 9.5],
[ 9.6],
[ nan],
[10.5]])
[9]: Y=dataset.iloc[:,-1]
[10]: Y
[10]: 0 39343.0
1 46205.0
2 37731.0
2
3 43525.0
4 39891.0
5 NaN
6 60150.0
7 54445.0
8 64445.0
9 57189.0
10 63218.0
11 55794.0
12 56957.0
13 57081.0
14 61111.0
15 67938.0
16 66029.0
17 83088.0
18 81363.0
19 93940.0
20 91738.0
21 NaN
22 101302.0
23 113812.0
24 109431.0
25 NaN
26 116969.0
27 112635.0
28 122391.0
29 121872.0
Name: Salary, dtype: float64
[11]: Y=dataset.iloc[:,-1].values
[12]: Y
[12]: array([ 39343., 46205., 37731., 43525., 39891., nan, 60150.,
54445., 64445., 57189., 63218., 55794., 56957., 57081.,
61111., 67938., 66029., 83088., 81363., 93940., 91738.,
nan, 101302., 113812., 109431., nan, 116969., 112635.,
122391., 121872.])
[13]: import numpy as np
[22]: from sklearn.impute import SimpleImputer
[23]: type(X)
[23]: numpy.ndarray
3
[32]: X.shape
[32]: (30, 1)
[36]: imp=SimpleImputer(missing_values=np.nan,strategy="mean")
[38]: X1=imp.fit_transform(X)
[39]: Y.shape
[39]: (30,)
[40]: Y=Y.reshape((-1,1))
[41]: Y.shape
[41]: (30, 1)
[42]: Y1=imp.fit_transform(Y)
[43]: Y=Y.reshape((-1))
Y
array([ 39343., 46205., 37731., 43525., 39891., nan, 60150.,
54445., 64445., 57189., 63218., 55794., 56957., 57081.,
61111., 67938., 66029., 83088., 81363., 93940., 91738.,
nan, 101302., 113812., 109431., nan, 116969., 112635.,
122391., 121872.])
4