1

Run Settings
LanguageC
Language Version
Run Command
Loading the dataset b) Identifying the dependent and independent variables c) Dealing with missing data import pandas as pd [5]: dataset=pd.read_csv("Salary_Data.csv") dataset [6]: YearsExperience Salary 0 1.1 39343.0 1 1.3 46205.0 2 1.5 37731.0 3 2.0 43525.0 4 2.2 39891.0 5 2.9 NaN 6 3.0 60150.0 7 3.2 54445.0 8 3.2 64445.0 9 3.7 57189.0 10 3.9 63218.0 11 NaN 55794.0 12 4.0 56957.0 13 4.1 57081.0 14 4.5 61111.0 15 4.9 67938.0 16 5.1 66029.0 17 5.3 83088.0 18 5.9 81363.0 19 6.0 93940.0 20 6.8 91738.0 21 7.1 NaN 22 7.9 101302.0 23 8.2 113812.0 24 8.7 109431.0 25 9.0 NaN 1 26 9.5 116969.0 27 9.6 112635.0 28 NaN 122391.0 29 10.5 121872.0 type(dataset ) pandas.core.frame.DataFrame dataset.shape (30,2) [7]: X=dataset.iloc[:,:-1].values type(x) numpy.ndarray [8]: X [8]: array([[ 1.1], [ 1.3], [ 1.5], [ 2. ], [ 2.2], [ 2.9], [ 3. ], [ 3.2], [ 3.2], [ 3.7], [ 3.9], [ nan], [ 4. ], [ 4.1], [ 4.5], [ 4.9], [ 5.1], [ 5.3], [ 5.9], [ 6. ], [ 6.8], [ 7.1], [ 7.9], [ 8.2], [ 8.7], [ 9. ], [ 9.5], [ 9.6], [ nan], [10.5]]) [9]: Y=dataset.iloc[:,-1] [10]: Y [10]: 0 39343.0 1 46205.0 2 37731.0 2 3 43525.0 4 39891.0 5 NaN 6 60150.0 7 54445.0 8 64445.0 9 57189.0 10 63218.0 11 55794.0 12 56957.0 13 57081.0 14 61111.0 15 67938.0 16 66029.0 17 83088.0 18 81363.0 19 93940.0 20 91738.0 21 NaN 22 101302.0 23 113812.0 24 109431.0 25 NaN 26 116969.0 27 112635.0 28 122391.0 29 121872.0 Name: Salary, dtype: float64 [11]: Y=dataset.iloc[:,-1].values [12]: Y [12]: array([ 39343., 46205., 37731., 43525., 39891., nan, 60150., 54445., 64445., 57189., 63218., 55794., 56957., 57081., 61111., 67938., 66029., 83088., 81363., 93940., 91738., nan, 101302., 113812., 109431., nan, 116969., 112635., 122391., 121872.]) [13]: import numpy as np [22]: from sklearn.impute import SimpleImputer [23]: type(X) [23]: numpy.ndarray 3 [32]: X.shape [32]: (30, 1) [36]: imp=SimpleImputer(missing_values=np.nan,strategy="mean") [38]: X1=imp.fit_transform(X) [39]: Y.shape [39]: (30,) [40]: Y=Y.reshape((-1,1)) [41]: Y.shape [41]: (30, 1) [42]: Y1=imp.fit_transform(Y) [43]: Y=Y.reshape((-1)) Y array([ 39343., 46205., 37731., 43525., 39891., nan, 60150., 54445., 64445., 57189., 63218., 55794., 56957., 57081., 61111., 67938., 66029., 83088., 81363., 93940., 91738., nan, 101302., 113812., 109431., nan, 116969., 112635., 122391., 121872.]) 4
Editor Settings
Theme
Key bindings
Full width
Lines