+1 (315) 557-6473 

Python Program to Implement Stochastic Gradient Design Assignment Solution.


Instructions

Objective
Write a python assignment to implement stochastic gradient descent.

Requirements and Specifications

program to implement stochastic gradient design in python

Source Code

!pip install --upgrade --no-cache-dir gdown

## Download Dataset from drive link

!gdown --id 1H7ONGAS2hZgOBIq8csIdjjpNGVs4aWPL

import pandas as pd

import numpy as np

from sklearn import preprocessing

from sklearn.metrics import confusion_matrix

from sklearn import svm

import seaborn

import matplotlib.pyplot as plt

from sklearn.metrics import plot_confusion_matrix

from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier

from sklearn.decomposition import PCA

## Load Dataset

df = pd.read_csv('creditcard.csv')

df = df.astype({col: 'float32' for col in df.select_dtypes('float64').columns})

df = df.astype({col: 'int32' for col in df.select_dtypes('int64').columns})

df.head()

## Normalize Dataset

scaler = MinMaxScaler(feature_range=(0, 1))

normed = scaler.fit_transform(df)

df_normed = pd.DataFrame(data=normed, columns=df.columns)

df_normed.head()

## Describe

df.describe()

## Check correlation between variables

plt.figure()

seaborn.heatmap(df.corr(), cmap="YlGnBu") # Displaying the Heatmap

#seaborn.set(font_scale=2,style='white')

plt.title('Heatmap correlation')

plt.show()

# Balance

df_1 = df[df['Class'] == 1]

df_0 = df[df['Class'] == 0].iloc[:len(df_1),:]

df = df_0.append(df_1, ignore_index = True)

df = df.sample(frac=1)

scaler = MinMaxScaler(feature_range=(0, 1))

normed = scaler.fit_transform(df)

df_normed = pd.DataFrame(data=normed, columns=df.columns)

df_normed.head()

## Split into train and test

train = df_normed.sample(frac=0.7)

val = df_normed.loc[~df_normed.index.isin(train.index)]

train.reset_index(drop=True, inplace=True)

val.reset_index(drop=True, inplace=True)

## Split data into X and y

y_train = train['Class']

X_train = train.drop(columns = ['Time', 'Amount', 'Class'])

y_val = val['Class']

X_val = val.drop(columns = ['Time', 'Amount', 'Class'])

## PCA

pca = PCA(n_components = 2)

pca.fit(X_train)

X_train = pca.transform(X_train)

pca = PCA(n_components = 2)

pca.fit(X_val)

X_val = pca.transform(X_val)

## Create Model

model = svm.SVC(kernel = 'linear',C=1.0)

model.fit(X_train, y_train)

## Measure Accuracy

y_pred = model.predict(X_val)

model_acc = accuracy_score(y_val, y_pred)

print(f"The accuracy of the model is: {model_acc}")

## Confusion matrix

fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(8,8))

plot_confusion_matrix(model, X_train, y_train, ax = axes[0])

plot_confusion_matrix(model, X_val, y_val, ax = axes[1])

plt.show()

## Display clustering

x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1

y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),

                     np.arange(y_min, y_max, 0.02))

plt.figure(figsize=(8,8))

ypred = model.predict(np.c_[xx.ravel(), yy.ravel()])

ypred = ypred.reshape(xx.shape)

plt.contourf(xx, yy, ypred, cmap = plt.cm.coolwarm, alpha = 0.8)

idx = np.where(y_train == 1)[0]

plt.scatter(X_train[idx,0], X_train[idx,1], c = 'red',cmap = plt.cm.coolwarm, marker='o',edgecolors='black', label = '1')

idx = np.where(y_train == 0)[0]

plt.scatter(X_train[idx,0], X_train[idx,1], c = 'blue',cmap = plt.cm.coolwarm, marker='s',edgecolors='black', label = '0')

plt.legend()

plt.legend()

# Part 2) Now cluster for each pair of consecutive features

fig, ax = plt.subplots(nrows = 2, ncols = 14, figsize=(30,10))

j = 0

k = 0

for i in range(27):

  X_train = train.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2]

  X_val = val.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2]

  model2 = KNeighborsClassifier(n_neighbors=2)

  model2.fit(X_train,y_train)

  y_min, y_max = X_val.values[:, 1].min() - 1, X_val.values[:, 1].max() + 1

  x_min, x_max = X_val.values[:, 0].min() - 1, X_val.values[:, 0].max() + 1

  xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),

                      np.arange(y_min, y_max, 0.02))

  ypred = model2.predict(np.c_[xx.ravel(), yy.ravel()])

  ypred = ypred.reshape(xx.shape)

  ax[j,k].contourf(xx, yy, ypred, cmap = plt.cm.coolwarm, alpha = 0.8)

  idx = np.where(y_val == 1)[0]

  ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'red',cmap = plt.cm.coolwarm, marker='o',edgecolors='black', label = '1')

  idx = np.where(y_val == 0)[0]

  ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'blue',cmap = plt.cm.coolwarm, marker='s',edgecolors='black', label = '0')

  ax[j,k].axis('off')

  ax[j,k].legend()

  ax[j,k].set_title(f'V{i+1} vs. V{i+2}')

  k+=1

  if k%14 == 0:

    j += 1

    k = 0

plt.show()

Now, we see that since we only have two classes, the best number of clusters/neighbors to select is 2. We see how each pair of variables is clustered in the multi-plot figure shown above.

# Random Forest

fig, ax = plt.subplots(nrows = 2, ncols = 14, figsize=(30,10))

j = 0

k = 0

for i in range(27):

  X_train = train.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2]

  X_val = val.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2]

  model3 = RandomForestClassifier(max_depth=2, random_state=0)

  model3.fit(X_train,y_train)

  y_min, y_max = X_val.values[:, 1].min() - 1, X_val.values[:, 1].max() + 1

  x_min, x_max = X_val.values[:, 0].min() - 1, X_val.values[:, 0].max() + 1

  xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),

                      np.arange(y_min, y_max, 0.02))

  ypred = model3.predict(np.c_[xx.ravel(), yy.ravel()])

  ypred = ypred.reshape(xx.shape)

  ax[j,k].contourf(xx, yy, ypred, cmap = plt.cm.coolwarm, alpha = 0.8)

  idx = np.where(y_val == 1)[0]

  ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'red',cmap = plt.cm.coolwarm, marker='o',edgecolors='black', label = '1')

  idx = np.where(y_val == 0)[0]

  ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'blue',cmap = plt.cm.coolwarm, marker='s',edgecolors='black', label = '0')

  ax[j,k].axis('off')

  ax[j,k].legend()

  ax[j,k].set_title(f'V{i+1} vs. V{i+2}')

  k+=1

  if k%14 == 0:

    j += 1

    k = 0

plt.show()