+1 (315) 557-6473 

Python Program to Implement Clustering Assignment Solution.


Instructions

Objective
Write a program to implement clustering in python.

Requirements and Specifications

program to implement clustering in python

Source Code

import pandas as pd

import numpy as np

from matplotlib import pyplot as plt

from scipy.cluster.hierarchy import dendrogram, linkage

"""# Read original data with all coefficients

We will read the original .csv file and then extract the desired column

"""

data = pd.read_csv('original_data.csv')

# drop na

data = data.dropna()

data.head()

"""# Get data for column 'Degree = 4 Coefficients'

The column contains the points in a string '[ .. ]', so we will have to parse that string to remove the brackets and extract the float values

"""

pointsraw = data['Degree=4 Coefficients'].to_numpy() # extract values and convert to numpy

# Now, take each row, remove first and last characters ( [] ), and split

X = np.zeros((pointsraw.shape[0], 5)) # Matrix to store all 39 samples

# Loop through each raw sample

for i, points_str in enumerate(pointsraw):

points_str = points_str[1:-1] # remove first and last characters which are []

# Split

points_lst = points_str.split()

# Convert to float

points_i = list(map(float, points_lst))

# Add to matrix

X[i,:] = points_i

"""# Hierarchical Clustering"""

Z = linkage(X, method = 'ward', metric = 'euclidean')

"""# Dendogram"""

# Create figure

plt.figure(figsize=(25, 15))

# Create dendogram

dendrogram(

Z,

leaf_rotation=90., # rotates the x axis labels

leaf_font_size=8., # font size for the x axis labels

)

plt.title('Hierarchical Clustering Dendrogram', fontsize=25)

plt.xlabel('Index', fontsize=25)

plt.ylabel('Euclidean Distance', fontsize=25)

plt.show()