def test_updatedPhraseObject_PhraseObjectScoresAreUpdatedCorrectly(self): UtilityMethods.updatePhraseTextToPhraseObject( self.phraseVector, test_time + timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream(message.streamId, message) self.getClusterAndUpdateExistingClusters(self.streamIdToStreamObjectMap[message.streamId]) else: previousStreamObject=Vector(vectorInitialValues=self.streamIdToStreamObjectMap[message.streamId]) self.streamIdToStreamObjectMap[message.streamId].updateForMessage(message, VectorUpdateMethods.exponentialDecay, **self.stream_settings ) streamObject=self.streamIdToStreamObjectMap[message.streamId] distance = Vector.euclideanDistance(streamObject, previousStreamObject) if distance>10: # print i, len(self.clusters), distance self.getClusterAndUpdateExistingClusters(self.streamIdToStreamObjectMap[message.streamId]) self.updateDimensionsMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) # self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call(time.time(), hdStreamClusteringObject=self, currentMessageTime=message.timeStamp, numberOfMessages=i) # print i, len(self.clusters) i+=1
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject( message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream( message.streamId, message) else: self.streamIdToStreamObjectMap[ message.streamId].updateForMessage( message, VectorUpdateMethods.exponentialDecay, **self.stream_settings) streamObject = self.streamIdToStreamObjectMap[message.streamId] self.updateDimensionsMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.getClusterAndUpdateExistingClusters(streamObject)
def run(self, dataIterator, estimationMethod, parameterSpecificDataCollectionMethod=None): estimationMethod = FixedIntervalMethod(estimationMethod, self.timeUnitInSeconds) for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) if CDA.messageInOrder(message.timeStamp): if parameterSpecificDataCollectionMethod != None: parameterSpecificDataCollectionMethod(estimationObject=self, message=message) UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) estimationMethod.call(message.timeStamp, estimationObject=self, currentMessageTime=message.timeStamp)
def test_updatedPhraseObject_phrase_does_not_exist_in_phraseToIdMap_but_exists_in_phraseTextToPhraseObjectMap_with_dimensions_full(self): stream_settings['dimensions'] = 1 self.phraseTextAndDimensionMap.remove(TwoWayMap.MAP_FORWARD, 'cluster') UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual({'project':0}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(5, self.phraseTextToPhraseObjectMap['cluster'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject( message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream( message.streamId, message) self.getClusterAndUpdateExistingClusters( self.streamIdToStreamObjectMap[message.streamId]) else: previousStreamObject = Vector( vectorInitialValues=self.streamIdToStreamObjectMap[ message.streamId]) self.streamIdToStreamObjectMap[ message.streamId].updateForMessage( message, VectorUpdateMethods.exponentialDecay, **self.stream_settings) streamObject = self.streamIdToStreamObjectMap[ message.streamId] distance = Vector.euclideanDistance( streamObject, previousStreamObject) if distance > 10: # print i, len(self.clusters), distance self.getClusterAndUpdateExistingClusters( self.streamIdToStreamObjectMap[message.streamId]) self.updateDimensionsMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) # self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call( time.time(), hdStreamClusteringObject=self, currentMessageTime=message.timeStamp, numberOfMessages=i) # print i, len(self.clusters) i += 1
def test_updatedPhraseObject_phrase_does_not_exist_in_phraseToIdMap_but_exists_in_phraseTextToPhraseObjectMap_with_dimensions_full( self): stream_settings['dimensions'] = 1 self.phraseTextAndDimensionMap.remove(TwoWayMap.MAP_FORWARD, 'cluster') UtilityMethods.updatePhraseTextToPhraseObject( self.phraseVector, test_time + timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual({'project': 0}, self.phraseTextAndDimensionMap.getMap( TwoWayMap.MAP_FORWARD)) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(5, self.phraseTextToPhraseObjectMap['cluster'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream(message.streamId, message) else: self.streamIdToStreamObjectMap[message.streamId].updateForMessage(message, VectorUpdateMethods.exponentialDecay, **self.stream_settings ) streamObject=self.streamIdToStreamObjectMap[message.streamId] self.updateDimensionsMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.getClusterAndUpdateExistingClusters(streamObject)
def test_updatedPhraseObject_PhraseObjectScoresAreUpdatedCorrectly(self): UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)