def test_similarity_regression_6(self): result = calculate_author_similarity("pedro bernaola galva n", "pedro neto") self.assertEqual(result, False) result = calculate_author_similarity("ahmed metwally", "ahmed hassan m h ali") self.assertEqual(result, False)
def test_similarity_regression_9(self): self.assertEqual(calculate_author_similarity("zhang xiu", "zhang xiu"), True) self.assertEqual(calculate_author_similarity("zhang xiu", "xiu zhang"), True) self.assertEqual(calculate_author_similarity("zhang xiu", "zhang x"), True) self.assertEqual(calculate_author_similarity("zhang xiu", "x zhang"), True)
def test_regression_10(self): self.assertEqual( calculate_author_similarity("John Michael", "John Meyer Michael"), True) self.assertEqual( calculate_author_similarity("Mohn Michael", "John Michael"), False) self.assertEqual( calculate_author_similarity("rhys hill", "donna r hill"), False) self.assertEqual( calculate_author_similarity("jose pereira", "jose m g torres pereira"), False)
def start(attribute,transformation_function=default_func): authors = pandas.read_csv(os.path.join(file_path, "Author.csv"), index_col="Id") read_connector = pymysql.connect(user="******", password="******", host="localhost", charset="utf8mb4") counter = 0 result_id = pandas.Series([]) result_names = pandas.Series([]) with read_connector.cursor() as cursor: for key, value in authors.iterrows(): name = str(value['Name']) # transform name transformed_name = transformation_function(name) query = get_search_query(transformed_name) # generate search query SEARCH_QUERY = 'SELECT Id,{} FROM names.authorspapers WHERE MATCH({}) AGAINST (%s IN BOOLEAN MODE)'.format( attribute, attribute ) # perform query cursor.execute(SEARCH_QUERY, (query,)) matched = [] for element in cursor: matched.append({ "id": element[0], "name": element[1] }) # apply algorithm similar = [obj for obj in matched if calculate_author_similarity(transformed_name, obj['name'])] # store ids as string id_list = ' '.join(str(x['id']) for x in similar if x['id'] != key) #name_list = ','.join(str(x['name']) for x in similar if x['name'] != transformed_name) result_id[key] =str(key) + " "+id_list #result_names[name] =str(transformed_name)+","+ name_list counter += 1 if counter % 5000 == 0: print(counter) result_id.to_csv(os.path.join(file_path,"test_id15.csv")) #result_names.to_csv(os.path.join(file_path, "test_names15.csv")) read_connector.close()
def test_similarity_regression_4(self): result = calculate_author_similarity("p d h hill", "hillary d protas") self.assertEqual(result, False)
def test_similarity_regression_3(self): result = calculate_author_similarity("ernest j", "john e") self.assertEqual(result, False)
def test_similarity_regression_2(self): result = calculate_author_similarity("x", "xin xin") self.assertEqual(result, False)
def test_similarity_regression_1(self): result = calculate_author_similarity("yufeng xin", "xinzhi xing") self.assertEqual(result, False)
def test_similarity_6(self): result = calculate_author_similarity("chin a bing jin", "anton j lin chin") self.assertEqual(result, False)
def test_similarity_regression_7(self): result = calculate_author_similarity("j p olivier dex sardan", "olivier peulen") self.assertEqual(result, False)
def test_similarity_4(self): result = calculate_author_similarity("chin anton jin", "anton j chin") self.assertEqual(result, True)
def test_similarity_3(self): result = calculate_author_similarity("chin jen lin", "lin j chin") self.assertEqual(result, False)
def test_similarity_2(self): result = calculate_author_similarity("chin jen lin", "chin j lin") self.assertEqual(result, True)
def test_similarity(self): result = calculate_author_similarity("Chin Jen Lin", "Chin A lin") self.assertEqual(result, False)
def test_similarity_regression_5(self): result = calculate_author_similarity("dah ming chiu", "dah ming w chiu") self.assertEqual(result, True) result2 = calculate_author_similarity("dah ming chiu", "chiu dah ming") self.assertEqual(result2, True)
def test_similarity_regression_8(self): self.assertEqual( calculate_author_similarity("howard ottensen", "howard o meyer"), False) self.assertEqual( calculate_author_similarity("howard ottensen", "h ottensen"), True)
def test_similarity_5(self): result = calculate_author_similarity("chin a j", "anton j b chin") self.assertEqual(result, False)