Apply K- Means clustering algorithm on any dataset.
import pandas as pd import numpy as np from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score iris_data load_iris()
iris_data.feature_names
df= pd.DataFrame(iris_data.data,columns iris_data.feature_names) df['Species']=iris_data.target
df.head(10)
np.unique (df['Species'])
df.shape
df.info()
ax=df [df.Species==0].plot.scatter(x='sepal length (cm)',
y='sepal width (cm)",
color='red', label='Iris-Setosa')
df[df.Species==1].plot.scatter(x='sepal length (cm)',
y='sepal width (cm)',
color='green',
label='Iris-Versicolor', ax=ax)
df [df.Species==2].plot.scatter(x='sepal length (cm)',
y='sepal width (cm)',
color='blue',
label='Iris-Virginica', ax=ax)
ax.set_title("Scatter Plot")
import seaborn as sns
correl df.corr() sns.heatmap(correl, annot=True)
from sklearn.model_selection import train_test_split
X=iris_data.data
y=iris data.target
X_train, X_test, y_train, y_test train_test_split(X,y,
random_state = 50,test_size=0.2
from sklearn.cluster import KMeans
kmean=KMeans (n_clusters=3)
kmean.fit(X_train,y_train)
train_labels=kmean.predict(X_train)
test_labels=kmean.predict(X_test)
print(accuracy_score(y_train, train_labels))
print(accuracy_score(y_test, test_labels))