In this Video we will worl with One Hot Encoding:
import pandas as pd
import numpy as np
df = pd.read_csv('Datapreprocessing.csv')
# Get the rows that contains NULL (NaN)
df.isnull().sum()
# Fill the NaN values for Occupation, Emplyment Status and Employement Type
col = ['Occupation','Employment Status','Employement Type']
df[col] = df[col].fillna(df.mode().iloc[0])
features = df.iloc[:,:-1].values
features1 = df.iloc[:,:-1].values
labels = df.iloc[:,-1].values
from sklearn.preprocessing import Imputer, OneHotEncoder
imputer = Imputer(missing_values='NaN',strategy='mean',axis=0)
# 2 step transformation
# Fit and Tranform
imputer.fit(features[:,[1,6]])
features[:,[1,6]] = imputer.fit_transform(features[:,[1,6]])
features1[:,[1,6]] = imputer.fit_transform(features1[:,[1,6]])
#------------------------------- L A B E L E N C O D I N G ------------------#
from sklearn.preprocessing import LabelEncoder
encode = LabelEncoder()
features[:,0] = encode.fit_transform(features[:,0])
features[:,2] = encode.fit_transform(features[:,2])
features[:,3] = encode.fit_transform(features[:,3])
features[:,4] = encode.fit_transform(features[:,4])
features[:,5] = encode.fit_transform(features[:,5])
features1[:,0] = encode.fit_transform(features1[:,0])
features1[:,2] = encode.fit_transform(features1[:,2])
features1[:,3] = encode.fit_transform(features1[:,3])
features1[:,4] = encode.fit_transform(features1[:,4])
features1[:,5] = encode.fit_transform(features1[:,5])
df1 = pd.DataFrame(features)
#--------------------------- ONE HOT ENCODING --------------------------------#
hotencode = OneHotEncoder(categorical_features=[0])
features = hotencode.fit_transform(features).toarray()
hotencode = OneHotEncoder(categorical_features=[7])
features = hotencode.fit_transform(features).toarray()
hotencode = OneHotEncoder(categorical_features=[9])
features = hotencode.fit_transform(features).toarray()
hotencode = OneHotEncoder(categorical_features=[11])
features = hotencode.fit_transform(features).toarray()
hotencode = OneHotEncoder(categorical_features=[13])
features = hotencode.fit_transform(features).toarray()
#--
hotencode = OneHotEncoder(categorical_features=[0])
features1 = hotencode.fit_transform(features1).toarray()
hotencode = OneHotEncoder(categorical_features=[2])
features1 = hotencode.fit_transform(features1).toarray()
hotencode = OneHotEncoder(categorical_features=[3])
features1 = hotencode.fit_transform(features1).toarray()
hotencode = OneHotEncoder(categorical_features=[4])
features1 = hotencode.fit_transform(features1).toarray()
hotencode = OneHotEncoder(categorical_features=[5])
features1 = hotencode.fit_transform(features1).toarray()