# Program to Implement Stochastic Gradient Design Assignment Solution

July 03, 2024
Professor Liam
🇦🇺 Australia
Python
Professor Liam Taylor holds a Master's degree in Computer Science from a prominent university in Australia and has completed over 600 assignments related to Python file handling. His expertise includes designing file handling libraries, implementing data serialization techniques, and optimizing file access patterns. Professor Taylor specializes in multimedia file processing, metadata extraction, and developing cross-platform file handling solutions using Python.
Key Topics
• Instructions
• Requirements and Specifications
Tip of the day
News

## Instructions

Objective

Write a python assignment to implement stochastic gradient descent.

## Requirements and Specifications

Source Code

```!pip install --upgrade --no-cache-dir gdown ## Download Dataset from drive link !gdown --id 1H7ONGAS2hZgOBIq8csIdjjpNGVs4aWPL import pandas as pd import numpy as np from sklearn import preprocessing from sklearn.metrics import confusion_matrix from sklearn import svm import seaborn import matplotlib.pyplot as plt from sklearn.metrics import plot_confusion_matrix from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import accuracy_score from sklearn.ensemble import RandomForestClassifier from sklearn.decomposition import PCA ## Load Dataset df = pd.read_csv('creditcard.csv') df = df.astype({col: 'float32' for col in df.select_dtypes('float64').columns}) df = df.astype({col: 'int32' for col in df.select_dtypes('int64').columns}) df.head() ## Normalize Dataset scaler = MinMaxScaler(feature_range=(0, 1)) normed = scaler.fit_transform(df) df_normed = pd.DataFrame(data=normed, columns=df.columns) df_normed.head() ## Describe df.describe() ## Check correlation between variables plt.figure() seaborn.heatmap(df.corr(), cmap="YlGnBu") # Displaying the Heatmap #seaborn.set(font_scale=2,style='white') plt.title('Heatmap correlation') plt.show() # Balance df_1 = df[df['Class'] == 1] df_0 = df[df['Class'] == 0].iloc[:len(df_1),:] df = df_0.append(df_1, ignore_index = True) df = df.sample(frac=1) scaler = MinMaxScaler(feature_range=(0, 1)) normed = scaler.fit_transform(df) df_normed = pd.DataFrame(data=normed, columns=df.columns) df_normed.head() ## Split into train and test train = df_normed.sample(frac=0.7) val = df_normed.loc[~df_normed.index.isin(train.index)] train.reset_index(drop=True, inplace=True) val.reset_index(drop=True, inplace=True) ## Split data into X and y y_train = train['Class'] X_train = train.drop(columns = ['Time', 'Amount', 'Class']) y_val = val['Class'] X_val = val.drop(columns = ['Time', 'Amount', 'Class']) ## PCA pca = PCA(n_components = 2) pca.fit(X_train) X_train = pca.transform(X_train) pca = PCA(n_components = 2) pca.fit(X_val) X_val = pca.transform(X_val) ## Create Model model = svm.SVC(kernel = 'linear',C=1.0) model.fit(X_train, y_train) ## Measure Accuracy y_pred = model.predict(X_val) model_acc = accuracy_score(y_val, y_pred) print(f"The accuracy of the model is: {model_acc}") ## Confusion matrix fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(8,8)) plot_confusion_matrix(model, X_train, y_train, ax = axes[0]) plot_confusion_matrix(model, X_val, y_val, ax = axes[1]) plt.show() ## Display clustering x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1 y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) plt.figure(figsize=(8,8)) ypred = model.predict(np.c_[xx.ravel(), yy.ravel()]) ypred = ypred.reshape(xx.shape) plt.contourf(xx, yy, ypred, cmap = plt.cm.coolwarm, alpha = 0.8) idx = np.where(y_train == 1)[0] plt.scatter(X_train[idx,0], X_train[idx,1], c = 'red',cmap = plt.cm.coolwarm, marker='o',edgecolors='black', label = '1') idx = np.where(y_train == 0)[0] plt.scatter(X_train[idx,0], X_train[idx,1], c = 'blue',cmap = plt.cm.coolwarm, marker='s',edgecolors='black', label = '0') plt.legend() plt.legend() # Part 2) Now cluster for each pair of consecutive features fig, ax = plt.subplots(nrows = 2, ncols = 14, figsize=(30,10)) j = 0 k = 0 for i in range(27): X_train = train.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2] X_val = val.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2] model2 = KNeighborsClassifier(n_neighbors=2) model2.fit(X_train,y_train) y_min, y_max = X_val.values[:, 1].min() - 1, X_val.values[:, 1].max() + 1 x_min, x_max = X_val.values[:, 0].min() - 1, X_val.values[:, 0].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) ypred = model2.predict(np.c_[xx.ravel(), yy.ravel()]) ypred = ypred.reshape(xx.shape) ax[j,k].contourf(xx, yy, ypred, cmap = plt.cm.coolwarm, alpha = 0.8) idx = np.where(y_val == 1)[0] ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'red',cmap = plt.cm.coolwarm, marker='o',edgecolors='black', label = '1') idx = np.where(y_val == 0)[0] ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'blue',cmap = plt.cm.coolwarm, marker='s',edgecolors='black', label = '0') ax[j,k].axis('off') ax[j,k].legend() ax[j,k].set_title(f'V{i+1} vs. V{i+2}') k+=1 if k%14 == 0: j += 1 k = 0 plt.show() Now, we see that since we only have two classes, the best number of clusters/neighbors to select is 2. We see how each pair of variables is clustered in the multi-plot figure shown above. # Random Forest fig, ax = plt.subplots(nrows = 2, ncols = 14, figsize=(30,10)) j = 0 k = 0 for i in range(27): X_train = train.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2] X_val = val.drop(columns = ['Time', 'Amount', 'Class']).iloc[:,i:i+2] model3 = RandomForestClassifier(max_depth=2, random_state=0) model3.fit(X_train,y_train) y_min, y_max = X_val.values[:, 1].min() - 1, X_val.values[:, 1].max() + 1 x_min, x_max = X_val.values[:, 0].min() - 1, X_val.values[:, 0].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) ypred = model3.predict(np.c_[xx.ravel(), yy.ravel()]) ypred = ypred.reshape(xx.shape) ax[j,k].contourf(xx, yy, ypred, cmap = plt.cm.coolwarm, alpha = 0.8) idx = np.where(y_val == 1)[0] ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'red',cmap = plt.cm.coolwarm, marker='o',edgecolors='black', label = '1') idx = np.where(y_val == 0)[0] ax[j,k].scatter(X_val.values[idx,0], X_val.values[idx,1], c = 'blue',cmap = plt.cm.coolwarm, marker='s',edgecolors='black', label = '0') ax[j,k].axis('off') ax[j,k].legend() ax[j,k].set_title(f'V{i+1} vs. V{i+2}') k+=1 if k%14 == 0: j += 1 k = 0 plt.show() ```

## Similar Samples

Discover a wide range of programming homework samples at Programming Homework Help. Our examples showcase expert solutions to complex coding challenges, helping you understand various programming concepts and improve your coding skills. Get inspired and learn how to tackle your assignments effectively!