def testGetCollocationVector(self): # This test is handwritten, and is dependent on COLLOCATION_BOUNDS being as they were # on 3/23/2012 if Collocation.COLLOCATION_BOUNDS != [(-1,-1), (1,1), (-2, -2), (2,2), (-2, -1), (-1, 1), \ (1,2), (-3,-1), (-2,1), (-1,2), (1,3)]: raise Exception("COLLOCATION_BOUNDS need to be as they were on 3/23/2012") tag = 'began' context_string = '''He focused , stared , reached out and picked it up. It was light as filigree in his hand , the heel held to the shoe only by one fragment of cleanly cut leather . He held it in his hands and @began@ to blub . Oh master Conroy , do n't ! Don't upset yourself , lovie , do n't take on so !''' calculated_dict = Collocation.get_reference([context_string]) class Word(): def get_collocation_reference_vector(self): return calculated_dict w = Word() sample_context = "and @began@ to" # For reference: # {'and':0, 'to':1, 'hands':2, 'blub':3, 'hands and':4, 'and to':5, \ # 'to blub': 6, 'his hands and':7, 'hands and to':8, 'and to blub':9, 'to blub .':10 } expected_collocation_vector = [1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0] calculated_collocation_vector = Collocation.get_vector(sample_context, w) self.assertEquals(len(calculated_collocation_vector), 11) print "" self.assertEquals(expected_collocation_vector, calculated_collocation_vector)
def testGetCollocationReference(self): # This test is handwritten, and is dependent on COLLOCATION_BOUNDS being as they were # on 3/23/2012 if Collocation.COLLOCATION_BOUNDS != [(-1,-1), (1,1), (-2, -2), (2,2), (-2, -1), (-1, 1), \ (1,2), (-3,-1), (-2,1), (-1,2), (1,3)]: raise Exception("COLLOCATION_BOUNDS need to be as they were on 3/23/2012") tag = 'began' context_string = '''He focused , stared , reached out and picked it up. It was light as filigree in his hand , the heel held to the shoe only by one fragment of cleanly cut leather . He held it in his hands and @began@ to blub . Oh master Conroy , do n't ! Don't upset yourself , lovie , do n't take on so !''' epsilon = Collocation.EPSILON sentence = "He held it in his hands and @began@ to blub ." calculated_dict = Collocation.get_reference([context_string]) self.assertEquals(11, len(calculated_dict)) expected_dict = {((-1,-1),'and'):0, ((1,1),'to'):1, ((-2,-2), 'hands'):2, ((2,2),'blub'):3, ((-2,-1), 'hands and'):4, ((-1,1), 'and to'):5, \ ((1,2), 'to blub'): 6, ((-3,-1), 'his hands and'):7, ((-2,1),'hands and to'):8, ((-1,2), 'and to blub'):9, ((1,3), 'to blub .'):10 } self.assertEquals(expected_dict, calculated_dict)