def cosine_similarity(vec1, vec2): vec1 = vec1.reshape(1, -1) vec2 = vec2.reshape(1, -1) alpha = sk_cosine_similarity(vec1, vec2) alpha = round(alpha[0][0], 5) # print("Custom sim: ", _cosine_similarity(vec1, vec2), "Sk sim:", alpha) # Custom cosine function and sk learn are almost similar assert alpha >= 0.0 and alpha <= 1.0, print("alpha is ", alpha) return alpha
def test_cosine_similarity(self): v1 = [ -0.046193234622478485, -0.09216824918985367, 0.023753443732857704, -0.03982221707701683, 0.030631808564066887, 0.06340867280960083, -0.09439295530319214, -0.1576867550611496, 0.459428995847702, -0.22166694700717926, 0.21970123052597046, 0.19883397221565247, -0.19289985299110413, -0.157765731215477, 0.0013831154210492969, 0.29028451442718506, 0.18202221393585205, 0.14411108195781708, 0.43273560404777527, -0.31332970261573792, ] v2 = [ 0.23711472749710083, 0.0747479647397995, 0.20933881402015686, -0.1695360243320465, 0.2809278070926666, 0.2502232491970062, -0.0907953605055809, 0.07467399537563324, -0.04727679118514061, -0.028494318947196007, -0.0278947614133358, 0.2525108754634857, -0.06464426219463348, 0.18594379723072052, 0.13334108889102936, 0.3466702401638031, 0.30664315819740295, 0.10267733037471771, 0.04714057222008705, 0.1208021491765976, ] similarity = round(cosine_similarity(v1, v2), 5) compare = round(sk_cosine_similarity([v1], [v2])[0][0], 5) print(similarity) self.assertEqual(similarity, compare)
def cosine_similarity(vec1, vec2): vec1 = vec1.reshape(1, -1) vec2 = vec2.reshape(1, -1) alpha = sk_cosine_similarity(vec1, vec2) alpha = alpha[0][0] return alpha
def fill_similarity_matrix(self, src_filename, dst_filename): self.centered_training_coo = sparse.load_npz(src_filename) similarities_sparse = sk_cosine_similarity( self.centered_training_coo.tocsr(), dense_output=False) sparse.save_npz(dst_filename, similarities_sparse) return similarities_sparse