Using iris data set. test dataset includes indices: 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, ...(the multiples of 10) and the rest of the data points will be your training dataset.
1)USE MLP (NN) classifier from sklearn package (with random state = 123, if applicable). For this classifier only tune the following hyper-parameters (using Kfold CV):
Here is the code:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from sklearn.metrics import accuracy_score
iris_data = pd.read_csv('IRIS.csv')
# setosa = 0, Iris-versicolor = 1 Iris-virginica = 2
species = [0 if item == 'Iris-setosa' else 1 if item == 'Iris-versicolor' else 2 for item in iris_data['species']]
iris_data['species'] = species
print('Shape of data: ', iris_data.shape)
iris_data.head()
# preparing dataset
X_text_list = list()
X_test = pd.DataFrame(columns= list(iris_data.columns)) # adding column names
for loop in range(0,len(iris_data),10): # taking test data as index of 0,10,20,30,....
X_test.loc[loop] = list(iris_data.loc[loop])
X_train = iris_data.merge(X_test, indicator=True, how='outer')
X_train = X_train[X_train['_merge'] != 'both']
X_train = X_train.reset_index(drop=True)
y_train = X_train['species']
y_test = X_test['species']
X_train = X_train.iloc[:,:4]
X_test = X_test.iloc[:,:4]
print(len(X_train),len(X_test))
X_train.head()
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
clf = MLPClassifier(random_state=123, max_iter=300)
model = clf.fit(X_train, y_train)
# Calculate the accuracy
print('Accuracy without kfold CV: ')
print(accuracy_score(y_test, model.predict(X_test), normalize=True)*100)
# KFold Cross Validation approach
kf = KFold(n_splits=5,shuffle=False)
kf.split(X_train)
accuracy_model = list()
# Iterate over each train-test split
for train_index, test_index in kf.split(X_train):
# Split train-test
X_train1, X_test1 = X_train.iloc[train_index], X_train.iloc[test_index]
y_train1, y_test1 = y_train[train_index], y_train[test_index]
# Train the model
model = clf.fit(X_train, y_train)
# Append to accuracy_model the accuracy of the model
accuracy_model.append(accuracy_score(y_test, model.predict(X_test), normalize=True)*100)
# Print the accuracy
print('Accuracy after kfold CV: ')
print(accuracy_model)
Here is the result:
For any doubt, please comment below.
Get Answers For Free
Most questions answered within 1 hours.