#Step 4: Encoding categorical data from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder_X = LabelEncoder() X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) #Creating a dummy variable onehotencoder = OneHotEncoder(categorical_features = [0]) X = onehotencoder.fit_transform(X).toarray() labelencoder_Y = LabelEncoder() Y = labelencoder_Y.fit_transform(Y) print("---------------------") print("Step 4: Encoding categorical data") print("X") print(X) print("Y") print(Y)
#Step 5: Splitting the datasets into training sets and Test sets from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) print("---------------------") print("Step 5: Splitting the datasets into training sets and Test sets") print("X_train") print(X_train) print("X_test") print(X_test) print("Y_train") print(Y_train) print("Y_test") print(Y_test)