+1 (315) 557-6473 

Program To Create a Classification Model in Python Language Assignment Solution.


Instructions

Objective
Write a python homework to create a classification model.

Requirements and Specifications

  • Some parts of the code are in files Hw1_code_someResults.pdf and Help.zip. Analyze how the code of Help.zip is embedded into the complete code (Hw1_code_someResults.pdf). File Hw1_code_someResults.pdf uses Iris Flowers dataset. You must replace Iris dataset with one of the datasets given below or any dataset for classification you like-(Here is Abalone Dataset.-This is my data selection confirmed with professor). All datasets should be for classification with numerical attribute values, have more than two classes with not so many attributes and no (explicitly) missing values.
All students must use different datasets. Send me an e-mail with your choice. The principle will be FIRST COME, FIRST SERVED. (Iris Flowers dataset and Pima-Indians Diabetes are excluded as a choice)
  •  Abalone Dataset.-This is my data selection confirmed with professor.
UCI Machine Learning Repository: Abalone Data Set
PART1: for plotting you must use matplotlib
  1. Completion of all parts of listings 1, 2, and 3 shown in file Hw1_code_someResults.zip.
  2.  Analysis of results of PART1
PART2: for plotting you must use matplotlib
Listing 4: Pairwise Pearson Correlation, Skew for Each Attribute, Univariate Density Plot, Correlation Matrix Plot (Help.zip).
Listing 5: Rescaling Data, Standardize Data, Normalize Data, Binarization (Help.zip).
Completion of Listings 4 and 5 of Part2
Analysis of results PART2
NOTE: For completion of PART2 use Lec3 and resources as https://scikit-learn.org/stable/user_guide.html
https://scikit-learn.org/stable/modules/preprocessing.html
PART3: for plotting you must use seaborn
Listing 8: Complete any 6 calculations and plottings using seaborn package which are different from implementation of matplotlib
Analysis of results PART3
NOTE: you may use the following resources for completion of PART3
Source Code
# -*- coding: utf-8 -*-
"""Part1.ipynb
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1gGWDGWMkPRi3Y3UGoAPBxWWRK-FaiLsO
"""
from pandas import read_csv
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from numpy import set_printoptions
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import Binarizer
import seaborn as sns
"""## Listing 1: Load Abalone Dataset"""
filename = 'abalone.data'
names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole Height', 'Shucked Weight', 'Viscera Weight', 'Shell Weight', 'Rings']
dataset = read_csv(filename, names = names, delimiter = ',')
# Drop Na
dataset = dataset.dropna()
# Convert 'Sex' column to categorical
sex_uniq = dataset['Sex'].unique()
for i, sex in enumerate(sex_uniq):
  idx = dataset.index[dataset['Sex'] == sex]
  dataset.loc[idx, 'Sex'] = i
"""## Listing 2: Dimensions of dataset
### a) Shape of data-set
"""
print(dataset.shape)
"""### b) Few rows of the data-set"""
print(dataset.head(20))
"""### c) Statistical Description of the dataset"""
print(dataset.describe())
"""### d) Class distribution of the dataset"""
# The output variable for this dataset is the column Rings
print(dataset.groupby('Rings').size())
"""## Listing 3: Univariate plots to better understand each attribute. Multivariate plots to better understand the relationships between attributes
### a) Univariate Plot
"""
pyplot.figure(figsize=(20,20))
dataset.plot(figsize=(20,20), kind='box', subplots = True, layout=(8,8), sharex=False, sharey = False)
pyplot.show()
"""### b) Visualize data-set using histogram plots"""
dataset.hist(figsize=(20,20))
pyplot.show()
"""### Part c) Visualize dataset using scatter plots"""
scatter_matrix(dataset, figsize=(20,20))
pyplot.show()
PART 2
# -*- coding: utf-8 -*-
"""Part2.ipynb
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1mSRYWD3HL7iPpyUP8Cmc9IXU_YMEGjx6
"""
from pandas import read_csv
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from numpy import set_printoptions
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import Binarizer
import seaborn as sns
"""## Listing 1: Load Abalone Dataset"""
filename = 'abalone.data'
names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole Height', 'Shucked Weight', 'Viscera Weight', 'Shell Weight', 'Rings']
dataset = read_csv(filename, names = names, delimiter = ',')
# Drop Na
dataset = dataset.dropna()
# Convert 'Sex' column to categorical
sex_uniq = dataset['Sex'].unique()
for i, sex in enumerate(sex_uniq):
  idx = dataset.index[dataset['Sex'] == sex]
  dataset.loc[idx, 'Sex'] = i
"""# Part 2
## Listing 4)
### a) Pairwise Correlation: Plot correlation using Seaborn
"""
corr = dataset.corr()
plt.figure(figsize=(10,10))
sns.heatmap(corr)
plt.show()
"""###b) Skew for each attribute"""
dataset.skew()
"""### c) Univariate Density Plot
We create a plot for each variable
"""
# We have 8 variables, so we create a 2x4 grid to plot each variable
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8)) = plt.subplots(4,2)
dataset['Sex'].plot.kde(ax=ax1)
dataset['Length'].plot.kde(ax=ax2)
dataset['Diameter'].plot.kde(ax=ax3)
dataset['Height'].plot.kde(ax=ax4)
dataset['Whole Height'].plot.kde(ax=ax5)
dataset['Shucked Weight'].plot.kde(ax=ax6)
dataset['Viscera Weight'].plot.kde(ax=ax7)
dataset['Shell Weight'].plot.kde(ax=ax8)
pyplot.show()
"""### d) Correlation Matrix Plot"""
corr = dataset.corr()
# plot correlation matrix
fig = pyplot.figure(figsize=(18,18))
ax = fig.add_subplot(111)
cax = ax.matshow(corr, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = numpy.arange(0,9,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
pyplot.show()
"""## Listing 5)
### a) Rescaling Data
"""
array = dataset.values # convert dataset to numpy array
# separate array into input and output components
X = array[:,0:-1]
Y = array[:,:]
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)
# summarize transformed data
set_printoptions(precision=3)
names_scaled = names[:-1]
# Rebuold dataframe of rescaled data
dataset_scaled = pd.DataFrame(data = X_scaled, columns = names_scaled)
dataset_scaled.head()
"""### b) Standarize Data"""
# separate array into input and output components
X = array[:,0:-1]
Y = array[:,:]
standarizer = StandardScaler().fit(X)
X_standarized = standarizer.transform(X)
# summarize transformed data
set_printoptions(precision=3)
names_standarized = names[:-1]
# Rebuild dataframe of standarized data
dataset_standarized= pd.DataFrame(data = X_standarized, columns = names_standarized)
dataset_standarized.head()
"""### c) Normalize Data"""
# separate array into input and output components
X = array[:,0:-1]
Y = array[:,:]
normalizer = Normalizer().fit(X)
X_normalized = normalizer.transform(X)
# summarize transformed data
set_printoptions(precision=3)
names_normalized = names[:-1]
# Rebuild dataframe of normalized data
dataset_normalized = pd.DataFrame(data = X_normalized, columns = names_normalized)
dataset_normalized.head()
"""### d) Binarization"""
# separate array into input and output components
X = array[:,0:-1]
Y = array[:,:]
binarizer = Binarizer(threshold=0.0).fit(X)
X_binarized = binarizer.transform(X)
# summarize transformed data
set_printoptions(precision=3)
names_binarized = names[:-1]
# Rebuild dataframe of binarized data
dataset_binarized = pd.DataFrame(data = X_binarized, columns = names_binarized)
dataset_binarized.head()
PART 3
# -*- coding: utf-8 -*-
"""Part3.ipynb
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1oBcqN_8CnvzAisy7GsypkjOtEQSifJRB
"""
from pandas import read_csv
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from numpy import set_printoptions
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import Binarizer
import seaborn as sns
"""## Listing 1: Load Abalone Dataset"""
filename = 'abalone.data'
names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole Height', 'Shucked Weight', 'Viscera Weight', 'Shell Weight', 'Rings']
dataset = read_csv(filename, names = names, delimiter = ',')
# Drop Na
dataset = dataset.dropna()
# Convert 'Sex' column to categorical
sex_uniq = dataset['Sex'].unique()
for i, sex in enumerate(sex_uniq):
  idx = dataset.index[dataset['Sex'] == sex]
  dataset.loc[idx, 'Sex'] = i
"""# Part 3)
## Listing 8)
# Relation between Each variable with the other
"""
names_plot = names[1:]
axi = 0
axj = 0
used = list()
for i in range(len(names_plot)):
  vari = names_plot[i]
  for j in range(i+1,len(names_plot)):
    varj = names_plot[j]
    if vari != varj and not vari in used:
      sns.relplot(x = vari, y = varj, data = dataset)
      plt.grid(True)
      used.append(vari)
      axi += 1
      if axi == 2:
        axi = 0
        axj += 1
pyplot.show()
"""The figures above shows a plot for each pair of variables, displaying the relation (correlation) between the variables
## Histogran plot of Rings with distribution
"""
sns.histplot(dataset['Rings'], kde = True)
plt.grid(True)
plt.show()
"""## Joint Plot for all variables
"""
sns.set_style('dark')
names_plot = names[1:]
axi = 0
axj = 0
used = list()
for i in range(len(names_plot)):
  vari = names_plot[i]
  for j in range(i+1,len(names_plot)):
    varj = names_plot[j]
    if vari != varj and not vari in used:
      sns.jointplot(x=vari, y= varj, data = dataset)
      plt.grid(True)
      used.append(vari)
      axi += 1
      if axi == 2:
        axi = 0
        axj += 1
pyplot.show()
"""## RegPlot for each variable"""
names_plot = names[1:]
axi = 0
axj = 0
used = list()
for i in range(len(names_plot)):
  vari = names_plot[i]
  for j in range(i+1,len(names_plot)):
    varj = names_plot[j]
    if vari != varj and not vari in used:
      sns.regplot(x = vari, y = varj, data = dataset)
      plt.grid(True)
      plt.show()
      used.append(vari)
      axi += 1
      if axi == 2:
        axi = 0
        axj += 1
pyplot.show()
"""## KDE Plot for each pair"""
names_plot = names[1:]
axi = 0
axj = 0
used = list()
for i in range(len(names_plot)):
  vari = names_plot[i]
  for j in range(i+1,len(names_plot)):
    varj = names_plot[j]
    if vari != varj and not vari in used:
      sns.kdeplot(dataset[vari], dataset[varj], shade = True, shade_lowest = False)
      plt.grid(True)
      plt.show()
      used.append(vari)
      axi += 1
      if axi == 2:
        axi = 0
        axj += 1
pyplot.show()
"""## Finally, show a PairPlot"""
sns.pairplot(dataset,hue = 'Rings',diag_kind = "kde",kind = "scatter",palette = "husl")
plt.show()