+1 (315) 557-6473 

Create a Program to Implement Classification in Python Assignment Solution.


Instructions

Objective
Write a python assignment program to implement classification.

Requirements and Specifications

program to implement classification in python

Source Code

K Nearest Neighbors (KNN)

### Name: Gedion

### Dataset

- Pima Indians Diabetes Database Dataset link.

This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases. The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements included in the dataset. Several constraints were placed on the selection of these instances from a larger database. In particular, all patients here are females at least 21 years old of Pima Indian heritage.

The datasets consists of several medical predictor variables and one target variable, Outcome. Predictor variables includes the number of pregnancies the patient has had, their BMI, insulin level, age, and so on.

- We will try to classify if whether or not a patient has diabetes using KNN

### Read the Dataset

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

import math as m

%matplotlib inline

import warnings

warnings.filterwarnings('ignore')

from sklearn.model_selection import learning_curve, train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, log_loss

df = pd.read_csv("diabetes.csv")

df.head()

df.shape

### Preprocessing

#### deal with null values

df.isnull().sum()

print("Shape of data:",df.shape)

print("Number of missing values in the data", df.isnull().sum().sum())

df.describe().T

df.info()

### Visualization

#### Number of recoreds for every Pregnancies

sns.countplot(x="Pregnancies", data=df)

#### Age distribution

sns.distplot(df['Age'], label='count')

#### Difference between Pregnancies and Age effect

sns.pointplot(data=df, x="Pregnancies", y="Age", hue="Outcome")

#### The correlation heatmap

plt.figure(figsize=(16,8), dpi=150)

sns.heatmap(df.corr(),annot=True)

### Feature & target selection

y = df['Outcome']

X = df.drop('Outcome', axis = 1)

### Split the dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2 ,stratify=y, random_state = 42)

### Evaluation phase

K = np.arange(3,12)

train_accuracy =np.empty(len(K))

test_accuracy = np.empty(len(K))

for i,k in enumerate(K):

knn = KNeighborsClassifier(n_neighbors=k)

knn.fit(X_train, y_train)

train_accuracy[i] = knn.score(X_train, y_train)

test_accuracy[i] = knn.score(X_test, y_test)

train_accuracy

test_accuracy

plt.title('KNN Accuracy Versus Number of Neighbors')

plt.plot(K, test_accuracy, label='Testing Accuracy')

plt.plot(K, train_accuracy, label='Training accuracy')

plt.legend()

plt.xlabel('Number of neighbors')

plt.ylabel('Accuracy')

plt.show()

- max testing accuracy for k=4,6,8.

### Training phase

knn4 = KNeighborsClassifier(n_neighbors=4)

knn4.fit(X_train, y_train)

knn6 = KNeighborsClassifier(n_neighbors=6)

knn6.fit(X_train, y_train)

knn8 = KNeighborsClassifier(n_neighbors=8)

knn8.fit(X_train, y_train)

y_pred4 = knn4.predict(X_train)

y_pred6 = knn6.predict(X_train)

y_pred8 = knn8.predict(X_train)

accuracy_score(y_train, y_pred4)

# KNN when K=4

knn_accuracy4 = accuracy_score(y_train, y_pred4)

knn_confusionMatrix4 = confusion_matrix(y_train, y_pred4)

knn_classification4 = classification_report(y_train, y_pred4)

knn_log_loss4 = log_loss(y_train, y_pred4)

# KNN when K=6

knn_accuracy6 = accuracy_score(y_train, y_pred6)

knn_confusionMatrix6 = confusion_matrix(y_train, y_pred6)

knn_classification6 = classification_report(y_train, y_pred6)

knn_log_loss6 = log_loss(y_train, y_pred6)

# KNN when K=8

knn_accuracy8 = accuracy_score(y_train, y_pred8)

knn_confusionMatrix8 = confusion_matrix(y_train, y_pred8)

knn_classification8 = classification_report(y_train, y_pred8)

knn_log_loss8 = log_loss(y_train, y_pred8)

print("- Accuracy score of KNN")

print(f"K=4: {knn_accuracy4}")

print(f"K=6: {knn_accuracy6}")

print(f"K=8: {knn_accuracy8}\n\n")

print("- Confusion matrix of KNN")

print(f"K=4:\n {knn_confusionMatrix4}\n")

print(f"K=6:\n {knn_confusionMatrix6}\n")

print(f"K=8:\n {knn_confusionMatrix8}\n\n")

print("- Classification report of KNN")

print(f"\nK=4:\n {knn_classification4}\n")

print(f"\nK=6:\n {knn_classification6}\n")

print(f"\nK=8:\n {knn_classification8}")

print("- Log Loss of KNN")

print(f"\nK=4:\n {knn_log_loss4}\n")

print(f"\nK=6:\n {knn_log_loss6}\n")

print(f"\nK=8:\n {knn_log_loss8}")

### Testing phase

y_pred4 = knn4.predict(X_test)

y_pred6 = knn6.predict(X_test)

y_pred8 = knn8.predict(X_test)

# KNN when K=4

knn_accuracy4 = accuracy_score(y_test, y_pred4)

knn_confusionMatrix4 = confusion_matrix(y_test, y_pred4)

knn_classification4 = classification_report(y_test, y_pred4)

knn_log_loss4 = log_loss(y_test, y_pred4)

# KNN when K=6

knn_accuracy6 = accuracy_score(y_test, y_pred6)

knn_confusionMatrix6 = confusion_matrix(y_test, y_pred6)

knn_classification6 = classification_report(y_test, y_pred6)

knn_log_loss6 = log_loss(y_test, y_pred6)

# KNN when K=8

knn_accuracy8 = accuracy_score(y_test, y_pred8)

knn_confusionMatrix8 = confusion_matrix(y_test, y_pred8)

knn_classification8 = classification_report(y_test, y_pred8)

knn_log_loss8 = log_loss(y_test, y_pred8)

print("- Accuracy score of KNN")

print(f"K=4: {knn_accuracy4}")

print(f"K=6: {knn_accuracy6}")

print(f"K=8: {knn_accuracy8}\n\n")

print("- Confusion matrix of KNN")

print(f"K=4:\n {knn_confusionMatrix4}\n")

print(f"K=6:\n {knn_confusionMatrix6}\n")

print(f"K=8:\n {knn_confusionMatrix8}\n\n")

print("- Classification report of KNN")

print(f"\nK=4:\n {knn_classification4}\n")

print(f"\nK=6:\n {knn_classification6}\n")

print(f"\nK=8:\n {knn_classification8}")

print("- Classification report of KNN")

print(f"\nK=4:\n {knn_log_loss4}\n")

print(f"\nK=6:\n {knn_log_loss6}\n")

print(f"\nK=8:\n {knn_log_loss8}")