def get_keywords_keywords_distance(prod_tuple, q_tuple): try: p_keywords = np.array( MyUtils_strings.fromlls_toarrays(prod_tuple.kwsVectors)) q_keywords = np.array( MyUtils_strings.fromlls_toarrays(q_tuple.kwsVectors)) m = len(p_keywords) n = len(q_keywords) sim_matrix = np.ones(shape=(m, n)) * -1 for i in range(m): kw_vec_1 = p_keywords[i] for j in range(n): kw_vec_2 = q_keywords[j] sim_matrix[i][j] = 1 - distance.cosine(u=kw_vec_1, v=kw_vec_2) # logging.debug("\nThe sim.matrix : %s", sim_matrix) max_similarities = MyUtils.pick_maxmatches_matrix(sim_matrix) min_distances = list(map(lambda sim: 1 - sim, max_similarities)) avg_min_distance = np.average(min_distances) return avg_min_distance except NameError: return None
def get_kws_similarity(kwsVectors_stringls_1, kwsVectors_stringls_2): #logging.debug(kwsVectors_stringls_1) #logging.debug(type(kwsVectors_stringls_1)) kwsVectors_ls_1 = MyUtils_strings.fromlls_toarrays(kwsVectors_stringls_1) kwsVectors_ls_2 = MyUtils_strings.fromlls_toarrays(kwsVectors_stringls_2) m = len(kwsVectors_ls_1) n = len(kwsVectors_ls_2) sim_matrix = numpy.ones(shape=(m, n)) * -1 for i in range(m): kw_vec_1 = kwsVectors_ls_1[i] #logging.debug("Vector 1 : %s, kw_vec_1) for j in range(n): kw_vec_2 = kwsVectors_ls_2[j] #logging.debug(kw_vec_2) sim_matrix[i][j] = 1 - scipy.spatial.distance.cosine(u=kw_vec_1, v=kw_vec_2) logging.debug("\nThe keywords sim.matrix : %s", sim_matrix) kws_aggregated_sim = numpy.average( MyUtils.pick_maxmatches_matrix(sim_matrix)) logging.debug("Aggregated keywords' similarity: %s", kws_aggregated_sim) return kws_aggregated_sim