Program to Implement Classification in Python Assignment Solution.

Instructions

Objective
Write a python assignment program to implement classification.
Requirements and Specifications

program to implement classification in python
Source Code
K Nearest Neighbors (KNN) 
### Name: Gedion
### Dataset
- Pima Indians Diabetes Database Dataset link.
This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases. The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements included in the dataset. Several constraints were placed on the selection of these instances from a larger database. In particular, all patients here are females at least 21 years old of Pima Indian heritage.
The datasets consists of several medical predictor variables and one target variable, Outcome. Predictor variables includes the number of pregnancies the patient has had, their BMI, insulin level, age, and so on.
- We will try to classify if whether or not a patient has diabetes using KNN
### Read the Dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math as m
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import learning_curve, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, log_loss
df = pd.read_csv("diabetes.csv")
df.head()
df.shape
### Preprocessing
#### deal with null values
df.isnull().sum()
print("Shape of data:",df.shape)
print("Number of missing values in the data", df.isnull().sum().sum())
df.describe().T
df.info()
### Visualization
#### Number of recoreds for every Pregnancies
sns.countplot(x="Pregnancies", data=df)
#### Age distribution
sns.distplot(df['Age'], label='count')
#### Difference between Pregnancies and Age effect
sns.pointplot(data=df, x="Pregnancies", y="Age", hue="Outcome")
#### The correlation heatmap
plt.figure(figsize=(16,8), dpi=150)
sns.heatmap(df.corr(),annot=True)
### Feature & target selection
y = df['Outcome']
X = df.drop('Outcome', axis = 1)
### Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2 ,stratify=y, random_state = 42)
### Evaluation phase
K = np.arange(3,12)
train_accuracy =np.empty(len(K))
test_accuracy = np.empty(len(K))
for i,k in enumerate(K):
 knn = KNeighborsClassifier(n_neighbors=k)
 knn.fit(X_train, y_train)
 train_accuracy[i] = knn.score(X_train, y_train)
 test_accuracy[i] = knn.score(X_test, y_test)
train_accuracy
test_accuracy
plt.title('KNN Accuracy Versus Number of Neighbors')
plt.plot(K, test_accuracy, label='Testing Accuracy')
plt.plot(K, train_accuracy, label='Training accuracy')
plt.legend()
plt.xlabel('Number of neighbors')
plt.ylabel('Accuracy')
plt.show()
- max testing accuracy for k=4,6,8.
### Training phase
knn4 = KNeighborsClassifier(n_neighbors=4)
knn4.fit(X_train, y_train)
knn6 = KNeighborsClassifier(n_neighbors=6)
knn6.fit(X_train, y_train)
knn8 = KNeighborsClassifier(n_neighbors=8)
knn8.fit(X_train, y_train)
y_pred4 = knn4.predict(X_train)
y_pred6 = knn6.predict(X_train)
y_pred8 = knn8.predict(X_train)
accuracy_score(y_train, y_pred4)
# KNN when K=4
knn_accuracy4 = accuracy_score(y_train, y_pred4)
knn_confusionMatrix4 = confusion_matrix(y_train, y_pred4)
knn_classification4 = classification_report(y_train, y_pred4)
knn_log_loss4 = log_loss(y_train, y_pred4)
# KNN when K=6
knn_accuracy6 = accuracy_score(y_train, y_pred6)
knn_confusionMatrix6 = confusion_matrix(y_train, y_pred6)
knn_classification6 = classification_report(y_train, y_pred6)
knn_log_loss6 = log_loss(y_train, y_pred6)
# KNN when K=8
knn_accuracy8 = accuracy_score(y_train, y_pred8)
knn_confusionMatrix8 = confusion_matrix(y_train, y_pred8)
knn_classification8 = classification_report(y_train, y_pred8)
knn_log_loss8 = log_loss(y_train, y_pred8)
print("- Accuracy score of KNN")
print(f"K=4: {knn_accuracy4}")
print(f"K=6: {knn_accuracy6}")
print(f"K=8: {knn_accuracy8}\n\n")
print("- Confusion matrix of KNN")
print(f"K=4:\n {knn_confusionMatrix4}\n")
print(f"K=6:\n {knn_confusionMatrix6}\n")
print(f"K=8:\n {knn_confusionMatrix8}\n\n")
print("- Classification report of KNN")
print(f"\nK=4:\n {knn_classification4}\n")
print(f"\nK=6:\n {knn_classification6}\n")
print(f"\nK=8:\n {knn_classification8}")
print("- Log Loss of KNN")
print(f"\nK=4:\n {knn_log_loss4}\n")
print(f"\nK=6:\n {knn_log_loss6}\n")
print(f"\nK=8:\n {knn_log_loss8}")
### Testing phase
y_pred4 = knn4.predict(X_test)
y_pred6 = knn6.predict(X_test)
y_pred8 = knn8.predict(X_test)
# KNN when K=4
knn_accuracy4 = accuracy_score(y_test, y_pred4)
knn_confusionMatrix4 = confusion_matrix(y_test, y_pred4)
knn_classification4 = classification_report(y_test, y_pred4)
knn_log_loss4 = log_loss(y_test, y_pred4)
# KNN when K=6
knn_accuracy6 = accuracy_score(y_test, y_pred6)
knn_confusionMatrix6 = confusion_matrix(y_test, y_pred6)
knn_classification6 = classification_report(y_test, y_pred6)
knn_log_loss6 = log_loss(y_test, y_pred6)
# KNN when K=8
knn_accuracy8 = accuracy_score(y_test, y_pred8)
knn_confusionMatrix8 = confusion_matrix(y_test, y_pred8)
knn_classification8 = classification_report(y_test, y_pred8)
knn_log_loss8 = log_loss(y_test, y_pred8)
print("- Accuracy score of KNN")
print(f"K=4: {knn_accuracy4}")
print(f"K=6: {knn_accuracy6}")
print(f"K=8: {knn_accuracy8}\n\n")
print("- Confusion matrix of KNN")
print(f"K=4:\n {knn_confusionMatrix4}\n")
print(f"K=6:\n {knn_confusionMatrix6}\n")
print(f"K=8:\n {knn_confusionMatrix8}\n\n")
print("- Classification report of KNN")
print(f"\nK=4:\n {knn_classification4}\n")
print(f"\nK=6:\n {knn_classification6}\n")
print(f"\nK=8:\n {knn_classification8}")
print("- Classification report of KNN")
print(f"\nK=4:\n {knn_log_loss4}\n")
print(f"\nK=6:\n {knn_log_loss6}\n")
print(f"\nK=8:\n {knn_log_loss8}")
Create a Program to Implement Classification in Python Assignment Solution.

Instructions

Requirements and Specifications