Пример #1
0
class UtilityMethodsTests(unittest.TestCase):
    def setUp(self):
        self.phraseVector = {'project':1, 'cluster':1, 'highdimensional':1, 'streams':1}
        self.phraseTextAndDimensionMap = TwoWayMap()
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'project', 0)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 1)
        self.finalPhraseToIdMap = {'project': 0, 'cluster': 1, 'streams': 2, 'highdimensional': 3}
        settings['dimensions'] = 2
    def test_updatePhraseTextAndDimensionsMap_PhraseMapHasLesserDimensions(self):
        settings['dimensions'] = 4
        UtilityMethods.updatePhraseTextAndDimensionsMap(self.phraseVector, self.phraseTextAndDimensionMap, **settings)
        self.assertEqual(self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
    def test_updatePhraseTextAndDimensionsMap_PhraseMapHasMaximumDimensions(self):
        UtilityMethods.updatePhraseTextAndDimensionsMap(self.phraseVector, self.phraseTextAndDimensionMap, **settings)
        for k in ['streams', 'highdimensional']: del self.finalPhraseToIdMap[k]
        self.assertEqual(self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
Пример #2
0
class UtilityMethodsTests(unittest.TestCase):
    def setUp(self):
        self.phraseVector = {
            'project': 1,
            'cluster': 1,
            'highdimensional': 1,
            'streams': 1
        }
        self.phraseTextAndDimensionMap = TwoWayMap()
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'project', 0)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 1)
        self.finalPhraseToIdMap = {
            'project': 0,
            'cluster': 1,
            'streams': 2,
            'highdimensional': 3
        }
        settings['dimensions'] = 2

    def test_updatePhraseTextAndDimensionsMap_PhraseMapHasLesserDimensions(
            self):
        settings['dimensions'] = 4
        UtilityMethods.updatePhraseTextAndDimensionsMap(
            self.phraseVector, self.phraseTextAndDimensionMap, **settings)
        self.assertEqual(
            self.finalPhraseToIdMap,
            self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))

    def test_updatePhraseTextAndDimensionsMap_PhraseMapHasMaximumDimensions(
            self):
        UtilityMethods.updatePhraseTextAndDimensionsMap(
            self.phraseVector, self.phraseTextAndDimensionMap, **settings)
        for k in ['streams', 'highdimensional']:
            del self.finalPhraseToIdMap[k]
        self.assertEqual(
            self.finalPhraseToIdMap,
            self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
class UtilityMethodsTests(unittest.TestCase):
    def setUp(self):
        self.phraseVector = {'project':1, 'cluster':1, 'highdimensional':1, 'streams':1}
        self.phraseTextAndDimensionMap = TwoWayMap()
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'project', 0)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 1)
        self.phraseTextToPhraseObjectMap = {'project': Phrase('project', test_time, score=8), 'cluster': Phrase('cluster', test_time, score=8), 'abcd': Phrase('abcd', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], score=8)}
        self.vector = Vector({0:1, 1:1, 2:1, 3:1})
        self.initial_max_dimensions = stream_settings['dimensions']
        stream_settings['dimensions'] = 2
    def tearDown(self): stream_settings['dimensions'] = self.initial_max_dimensions
    def test_updatedPhraseObject_PhraseObjectScoresAreUpdatedCorrectly(self): 
        UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings)
        self.assertEqual(5, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score)
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
    
    def test_updatedPhraseObject_phrase_does_not_exist_in_phraseToIdMap_but_exists_in_phraseTextToPhraseObjectMap_with_dimensions_full(self): 
        stream_settings['dimensions'] = 1
        self.phraseTextAndDimensionMap.remove(TwoWayMap.MAP_FORWARD, 'cluster')
        UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings)
        self.assertEqual({'project':0}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
        self.assertEqual(5, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score)
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['cluster'].score)
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)

    def test_createOrAddNewPhraseObject(self):
        UtilityMethods.createOrAddNewPhraseObject('new_phrase', self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        UtilityMethods.createOrAddNewPhraseObject('project', self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(4, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['new_phrase'].score)
        self.assertEqual(9, self.phraseTextToPhraseObjectMap['project'].score)
    
    def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions(self):
        for phrase, score in zip(['added'], range(10,11)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual({'project':0, 'added': 1}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
    
    def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions_and_entire_map_is_changed(self):
        for phrase, score in zip(['added', 'are'], range(10,12)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual({'added':1, 'are': 0}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
    
    def test_updateDimensions_when_phraseTextToIdMap_has_lesser_than_max_dimensions(self):
        stream_settings['dimensions'] = 4
        for phrase, score in zip(['new', 'phrases', 'are', 'added'], range(7,11)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(set({'project':0, 'phrases': 1, 'are':2, 'added':3}), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))
        self.assertEqual(4, len(self.phraseTextAndDimensionMap))
    
    def test_updateDimensions_when_phrases_with_lower_id_are_removed_from_phraseTextToIdMap(self):
        stream_settings['dimensions'] = 3
        for phrase, score in zip(['new', 'phrases', 'are'], range(100,103)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 2)
        self.phraseTextToPhraseObjectMap['cluster'].score=100
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(range(3), sorted(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD).values()))
    
    def test_updateDimensions_remove_old_phrases(self):
        originalTime=self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime
        self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime=test_time
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertTrue('abcd' in self.phraseTextToPhraseObjectMap)
        self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime=originalTime
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertTrue('abcd' not in self.phraseTextToPhraseObjectMap)
    
    
    def test_updateDimensions_when_dimensions_have_to_be_removed(self):
        stream_settings['dimensions'] = 4
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdx', 2)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdxy', 3)
        for phrase, score in zip(['new_text'], range(7,8)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        self.phraseTextToPhraseObjectMap['cluster'].latestOccuranceTime=test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds']
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(set({'project':0, 'new_text': 1}), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))
    
    def test_checkCriticalErrorsInPhraseTextToIdMap_larger_than_expected_dimensions(self):
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'sdfsd', 3)
        print 'Ignore this message: ',
        self.assertRaises(SystemExit, UtilityMethods.checkCriticalErrorsInPhraseTextToIdMap, self.phraseTextAndDimensionMap, **stream_settings)
    
    def test_pruningConditionDeterministic(self):
        phrase1 = Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1)
        phrase2 = Phrase('dsf', test_time, 1)
        self.assertTrue(UtilityMethods.pruningConditionDeterministic(phrase1, test_time, **stream_settings))
        self.assertFalse(UtilityMethods.pruningConditionDeterministic(phrase2, test_time, **stream_settings))
    
    def test_pruningConditionRandom(self):
        phrase1 = Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1)
        phrase2 = Phrase('dsf', test_time, 1)
        self.assertTrue(UtilityMethods.pruningConditionRandom(phrase1, test_time, **stream_settings))
        self.assertFalse(UtilityMethods.pruningConditionRandom(phrase2, test_time, **stream_settings))
    
    def test_pruneUnnecessaryPhrases(self):
        phraseTextToPhraseObjectMap = {'dsf': Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1), 'abc': Phrase('abc', test_time, 1)}
        UtilityMethods.pruneUnnecessaryPhrases(phraseTextToPhraseObjectMap, test_time, UtilityMethods.pruningConditionRandom, **stream_settings)
        self.assertTrue('dsf' not in phraseTextToPhraseObjectMap)
        self.assertTrue('abc' in phraseTextToPhraseObjectMap)