Demonstrate the following Similarity and Dissimilarity Measures using python
a) Pearson‘s Correlation
b) Cosine Similarity
c) Jaccard Similarity
d) Euclidean Distance
e) Manhattan Distance
import numpy as np
[2]: import matplotlib.pyplot as plt
[3]: np.random.seed(42)
[6]: X=np.random.randn(10)
[7]: X
[7]: array([-0.56228753, -1.01283112, 0.31424733, -0.90802408, -1.4123037 ,
1.46564877, -0.2257763 , 0.0675282 , -1.42474819, -0.54438272])
[8]: Y=X+np.random.randn(10)
[9]: Y
[9]: array([-0.45136494, -2.1638247 , 0.68994535, -1.50866277, -1.70399745,
0.86394216, 1.62650188, 0.05403098, -2.48245912, 0.27816219])
[18]: plt.scatter(X,Y)
plt.xlabel('X Values')
plt.ylabel('Y Values')
plt.show()
1
0.0.1 Pearsons Correlation
[19]: from scipy.stats import pearsonr
[20]: corr,_=pearsonr(X,Y)
[21]: print('Pearsons correlation: %.3f' %corr)
Pearsons correlation: 0.778
0.0.2 Cosine Similarity
[22]: from sklearn.metrics.pairwise import cosine_similarity
[23]: cos_sim = cosine_similarity(X.reshape(1,-1),Y.reshape(1,-1))
[24]: print('Cosine Similarity: %.3f' % cos_sim)
Cosine Similarity: 0.805
2
0.0.3 Jaccard Similarity
[25]: from sklearn.metrics import jaccard_score
[26]: A = [1,1,1,0]
[28]: B = [1,1,0,0]
[29]: jacc = jaccard_score(A,B)
[30]: print('Jaccard Similarity: %.3f' %jacc)
Jaccard Similarity: 0.667
0.0.4 Euclidean Distance
[31]: from scipy.spatial import distance
[32]: dst = distance.euclidean(X,Y)
[35]: print('Euclidean distance: %.3f' % dst)
Euclidean distance: 2.741
0.0.5 Manhattan Distance
[36]: from scipy.spatial import distance
[37]: dst = distance.cityblock(X,Y)
[38]: print('Manhattan distance: %.3f' % dst)
Manhattan distance: 6.878