def __process_cos_match(self):
     """ Function which process the dataframe for matching/finding similarity index """
     # list to store the analysis
     __reco = []
     # Loop through the set of rows in the dataframe
     for master_text_index in self.data_frame.index:
         __uniq_id = self.data_frame[self.uniq_header][master_text_index]
         master_text = self.data_frame['Steps'][master_text_index]
         vector1 = text_to_vector(str(master_text))
         # Loop through the rows immediate next from the initially selected in previous loop
         for match_text_index in range(master_text_index + 1,
                                       len(self.data_frame.index)):
             match_id = self.data_frame[self.uniq_header][match_text_index]
             match_text = self.data_frame['Steps'][match_text_index]
             vector2 = text_to_vector(str(match_text))
             # Generate the cosine similarity match value
             cosine = get_cosine(vector1, vector2)
             __reco.append({
                 self.uniq_header: str(__uniq_id),
                 'Potential Match': str(match_id),
                 'Similarity Index': cosine * 100
             })
         if self.is_new_text == 1:
             break
     # Create dataframe and write
     self.__write_xlsx(pd.DataFrame(__reco), "recomendation")
Exemplo n.º 2
0
 def test_get_cosine_same(self):
     """This method checks the value returned by the cosine_core.get_cosine()
     for vec1, vec2: Input vector from the texts to be compared"""
     positive_cosine = cc.get_cosine(
         Counter({
             "hello": 16,
             "Language": 30,
             "python": 66
         }), Counter({
             "hello": 16,
             "Language": 30,
             "python": 66
         }))
     self.assertEqual(1.0, positive_cosine,
                      "Value should not be equal to 0")
Exemplo n.º 3
0
 def test_get_positive_cosine(self):
     """This method checks the value returned by the cosine_core.get_cosine()
     for vec1, vec2: Input vector from the texts to be compared - positive cosine """
     positive_cosine = cc.get_cosine(
         Counter({
             "hello": 16,
             "Language": 30,
             "python": 66
         }), Counter({
             "Mac": 9,
             "MANGO": 27,
             "python": 88,
             "hello": 5
         }))
     self.assertEqual(0.8562387195638202, positive_cosine,
                      "Value should not be equal to 0")
Exemplo n.º 4
0
 def test_get_cosine_none(self):
     """This method checks the value returned by the cosine_core.get_cosine()
     for vec1, vec2: Input vector from the texts to be compared"""
     _cosine = cc.get_cosine(Counter({"": 0}), Counter({"": 0}))
     self.assertEqual(0.0, _cosine, "Value should be equal to 0")
Exemplo n.º 5
0
 def test_get_negative_cosine(self):
     """This method checks the value returned by the cosine_core.get_cosine()
     for vec1, vec2: Input vector from the texts to be compared - negative cosine value"""
     negative_cosine = cc.get_cosine(Counter({"hello_World": 99}),
                                     Counter({"TEST": 888}))
     self.assertEqual(0.0, negative_cosine, "Value should be 0.0")