Program To Implement Clustering Assignment Solution.

Instructions

Objective

Write a program to implement clustering in python.

Requirements and Specifications

Source Code

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
"""# Read original data with all coefficients
We will read the original .csv file and then extract the desired column
"""
data = pd.read_csv('original_data.csv')
# drop na
data = data.dropna()
data.head()
"""# Get data for column 'Degree = 4 Coefficients'
The column contains the points in a string '[ .. ]', so we will have to parse that string to remove the brackets and extract the float values
"""
pointsraw = data['Degree=4 Coefficients'].to_numpy() # extract values and convert to numpy
# Now, take each row, remove first and last characters ( [] ), and split
X = np.zeros((pointsraw.shape[0], 5)) # Matrix to store all 39 samples
# Loop through each raw sample
for i, points_str in enumerate(pointsraw):
 points_str = points_str[1:-1] # remove first and last characters which are []
 # Split
 points_lst = points_str.split()
 # Convert to float
 points_i = list(map(float, points_lst))
 # Add to matrix
 X[i,:] = points_i
"""# Hierarchical Clustering"""
Z = linkage(X, method = 'ward', metric = 'euclidean')
"""# Dendogram"""
# Create figure
plt.figure(figsize=(25, 15))
# Create dendogram
dendrogram(
 Z,
 leaf_rotation=90., # rotates the x axis labels
 leaf_font_size=8., # font size for the x axis labels
)
plt.title('Hierarchical Clustering Dendrogram', fontsize=25)
plt.xlabel('Index', fontsize=25)
plt.ylabel('Euclidean Distance', fontsize=25)
plt.show()

Python Program to Implement Clustering Assignment Solution.

Instructions

Requirements and Specifications