def test_one_clutter_after(self): paper_ind = random.randint(0, len(paper_titles) - 1) junk_ind = random.randint(0, len(junk) - 1) first = paper_titles[paper_ind] second = paper_titles[paper_ind] + junk[junk_ind] common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common)) # First paper has junk rather than second first = paper_titles[paper_ind] + junk[junk_ind] second = paper_titles[paper_ind] common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_one_clutter_before(self): paper_ind = random.randint(0, len(paper_titles) - 1) junk_ind = random.randint(0, len(junk) - 1) first = paper_titles[paper_ind] second = junk[junk_ind] + paper_titles[paper_ind] common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common)) # check it works if they are the other way round first = junk[junk_ind] + paper_titles[paper_ind] second = paper_titles[paper_ind] common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_noclutter_similar(self): similar_ind = random.randint(0, len(paper_title_similar_pairs) - 1) first = paper_title_similar_pairs[similar_ind][0] second = paper_title_similar_pairs[similar_ind][1] common = set(first).intersection(set(second)) self.assertFalse(arrays_contain_same_reference(first, second, common))
def test_both_clutter_after_repeated_different(self): # Artificially construct something so that the overlap is in the junk paper = ["this", "is", "the", "paper", "title"] junk_first = ["this", "here", "junk", "words"] junk_second = ["yet", "more", "stuff", "junk"] first = paper + junk_first second = paper + junk_second common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_noclutter_cutoff(self): # What do we want to be the case here? If we remove the last word in one # of the titles then it's sort of like the other one has junk at the # end? first = paper_titles[0] second = paper_titles[1][:-1] common = set(first).intersection(set(second)) # Go with a more inclusive assumption for now... self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_both_sameauthor_difftitle_before(self): first = paper_title_similar_pairs[0][0] second = paper_title_similar_pairs[0][1] authors = ["john", "baker", "jill", "smith", "bea", "zisserman"] first = authors + first second = authors + second common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_both_clutter_after(self): paper_ind = random.randint(0, len(paper_titles) - 1) junk_first_ind = random.randint(0, len(junk) - 1) junk_second_ind = random.randint(0, len(junk) - 1) while junk_first_ind == junk_second_ind: junk_second_ind = random.randint(0, len(junk) - 1) first = paper_titles[paper_ind] + junk[junk_first_ind] second = paper_titles[paper_ind] + junk[junk_second_ind] common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_one_clutter_after_repeatword(self): # use clutter which contains a word which exists in the paper title paper = [ 'whats', 'point', 'semantic', 'segmentation', 'with', 'point', 'supervision' ] junk = [ '3rd', 'workshop', 'semantic', 'perception', 'mapping', 'exploration', 'spme' ] first = paper second = paper + junk common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common)) first = paper + junk second = paper common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_one_clutter_before_repeatword(self): paper = [ 'deeplab', 'semantic', 'image', 'segmentation', 'with', 'deep', 'convolutional', 'nets', 'atrous', 'convolution', 'fully', 'connected', 'crfs' ] junk = [ 'navab', 'hornegger', 'wells', 'frangi', 'eds', 'medical', 'image', 'computing', 'computer-assisted', 'intervention', 'miccai', '2015' ] first = paper second = junk + paper common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_one_clutter_before_after_repeatword_after(self): paper = [ 'deeplab', 'semantic', 'image', 'segmentation', 'with', 'deep', 'convolutional', 'nets', 'atrous', 'convolution', 'fully', 'connected', 'crfs' ] junk_before = [ 'torch7', 'matlab-like', 'environment', 'machine', 'learning', 'biglearn', 'nips', 'workshop', 'dai', 'sun', '2015' ] junk_after = [ '3rd', 'workshop', 'semantic', 'perception', 'mapping', 'exploration', 'spme' ] first = paper second = junk_before + paper + junk_after common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_one_clutter_before_after_repeatword_before(self): paper = [ 'deeplab', 'semantic', 'image', 'segmentation', 'with', 'deep', 'convolutional', 'nets', 'atrous', 'convolution', 'fully', 'connected', 'crfs' ] junk_before = [ 'navab', 'hornegger', 'wells', 'frangi', 'eds', 'medical', 'image', 'computing', 'computer-assisted', 'intervention', 'miccai', '2015' ] junk_after = [ 'volume', 'jmlr', 'proceedings', 'pages', '195-206', 'jmlrorg', '2012', 'karen', 'simonyan', 'andrew', 'zisserman' ] first = paper second = junk_before + paper + junk_after common = set(first).intersection(set(second)) self.assertTrue(arrays_contain_same_reference(first, second, common))
def test_noclutter_invalid(self): first = paper_titles[0] second = paper_titles[1] common = set(first).intersection(set(second)) self.assertFalse(arrays_contain_same_reference(first, second, common))
def test_noclutter(self): for ind, title in enumerate(paper_titles): common = set(title).intersection(set(title)) self.assertTrue(arrays_contain_same_reference( title, title, common))