2

Run Settings
LanguageC
Language Version
Run Command
import pandas as pd [2]: dataset=pd.read_csv("sample.csv") [3]: dataset [3]: Index nation purchase age salary 0 0 India No 25.0 35000 1 1 Russia Yes 27.0 40000 2 2 Germany No 50.0 60000 3 3 Russia No 35.0 40000 4 4 Germany Yes 40.0 50000 5 5 India Yes 35.0 40000 6 6 Russia No 39.1 20000 7 7 India Yes 40.0 40000 8 8 Germany No 50.0 30000 9 9 India Yes 37.0 40000 10 10 Germany No 21.0 70000 11 11 India Yes 39.1 80000 12 12 Russia No 63.0 40000 [4]: from sklearn.preprocessing import LabelBinarizer [7]: label_binzr =LabelBinarizer() [8]: label_binzr_output = label_binzr.fit_transform(dataset['nation']) [9]: result = pd.DataFrame(label_binzr_output,columns=label_binzr.classes_) [10]: print(result) Germany India Russia 0 0 1 0 1 0 0 1 2 1 0 0 3 0 0 1 4 1 0 0 5 0 1 0 1 6 0 0 1 7 0 1 0 8 1 0 0 9 0 1 0 10 1 0 0 11 0 1 0 12 0 0 1 [11]: type(result) [11]: pandas.core.frame.DataFrame [14]: type(label_binzr_output) [14]: numpy.ndarray [15]: from sklearn.model_selection import train_test_split [18]: X=dataset.iloc[:,[0,3,4]] [19]: X [19]: Index age salary 0 0 25.0 35000 1 1 27.0 40000 2 2 50.0 60000 3 3 35.0 40000 4 4 40.0 50000 5 5 35.0 40000 6 6 39.1 20000 7 7 40.0 40000 8 8 50.0 30000 9 9 37.0 40000 10 10 21.0 70000 11 11 39.1 80000 12 12 63.0 40000 [22]: Y=dataset.iloc[:,1].values [23]: Y [23]: array(['India', 'Russia', 'Germany', 'Russia', 'Germany', 'India', 'Russia', 'India', 'Germany', 'India', 'Germany', 'India', 'Russia'], dtype=object) [33]: X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0. ↪5,random_state=0) 2 [34]: X_train [34]: Index age salary 7 7 40.0 40000 9 9 37.0 40000 3 3 35.0 40000 0 0 25.0 35000 5 5 35.0 40000 12 12 63.0 40000 [35]: type(X_train) [35]: pandas.core.frame.DataFrame [36]: Y_train [36]: array(['India', 'India', 'Russia', 'India', 'India', 'Russia'], dtype=object) [37]: type(Y_train) [37]: numpy.ndarray [38]: from sklearn.preprocessing import StandardScaler [39]: sc_X = StandardScaler () [40]: X_train2 = sc_X.fit_transform(X_train) [41]: X_train2 [41]: array([[ 0.25537696, 0.07177362, 0.4472136 ], [ 0.76613088, -0.18661142, 0.4472136 ], [-0.76613088, -0.35886811, 0.4472136 ], [-1.53226176, -1.22015156, -2.23606798], [-0.25537696, -0.35886811, 0.4472136 ], [ 1.53226176, 2.05272557, 0.4472136 ]])
Editor Settings
Theme
Key bindings
Full width
Lines