Consider the Kernel SVM. implement it for a generic kernel, and then apply it to a dataset with 7 features that have values between -2,2 and one target with values 0 and1. Split the dataset (80% train, 20% test) and apply k = 10 fold cross validation on the train data to choose to optimal hyperparameters (you must decide on reasonable hyperparameter ranges) for the following kernels:
(i) Linear kernel,
(ii) RBF kernel.
Preparing data:
# creating dataset of 7 features
import random
import pandas as pd
import numpy as np
n_rows = 500 # number of rows
n_feat = 7 # number of features
features = list()
for loop1 in range(n_feat):
feat = list()
target = list()
for loop2 in range(n_rows):
feat.append(random.randint(-2,2))
target.append(random.randint(0,1))
features.append(feat)
# creating a dataframe of features
data = pd.DataFrame()
for loop3 in range(len(features)):
data['feat_'+str(loop3+1)] = features[loop3]
data['target'] = target
data.head()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,:7], data.iloc[:,7:], test_size = 0.20)
# converting into array
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = y_train.values
y_train = y_train.ravel()
y_test = y_test.values
y_test = y_test.ravel()
type(X_train)
Applying kenels:
# Linear
from sklearn import linear_model
from sklearn.model_selection import GridSearchCV
linear_clf = linear_model.SGDClassifier()
linear_clf.fit(X_train, y_train)
param_grid = {'alpha':[0.01, 0.1, 1, 10, 100]} #params we need to try on classifier
grid_search_linear = GridSearchCV(linear_clf,
param_grid,
cv=10,
verbose=1,
scoring='f1')
# fit
grid_search_linear.fit(X_train,y_train)
print("Best HyperParameter: ",grid_search_linear.best_params_)
print("Best Accuracy: %.2f%%"%(grid_search_linear.best_score_*100))
# RBF
# SVC with RBF kernel
# from sklearn.svm import SVC
from sklearn import svm
rbf_svc = svm.SVC(kernel='linear')
rbf_svc.fit(X_train, y_train)
param_grid = {'alpha':[0.01, 0.1, 1, 10, 100]} #params we need to try on classifier
grid_search_rbf = GridSearchCV(linear_clf,
param_grid,
cv=10,
verbose=1,
scoring='f1')
# fit
grid_search_rbf.fit(X_train,y_train)
print("Best HyperParameter: ",grid_search_rbf.best_params_)
print("Best Accuracy: %.2f%%"%(grid_search_rbf.best_score_*100))
Here is the results:
Linear:
RBF:
Data:
Get Answers For Free
Most questions answered within 1 hours.