def generateExperimentData2(self, fixedType): global previousTime experts_twitter_stream_settings[ 'cluster_analysis_method'] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod2 if fixedType: experts_twitter_stream_settings[ 'dimensions_performance_type'] = JustifyDimensionsEstimation.first_n_dimension # experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod experts_twitter_stream_settings['phrase_decay_coefficient'] = 1.0 experts_twitter_stream_settings['stream_decay_coefficient'] = 1.0 experts_twitter_stream_settings[ 'stream_cluster_decay_coefficient'] = 1.0 for dimensions in range(10**4, 21 * 10**4, 10**4): experts_twitter_stream_settings[ 'dimensions'] = getLargestPrimeLesserThan(dimensions) previousTime = time.time() HDStreaminClustering( **experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5))) else: experts_twitter_stream_settings[ 'dimensions_performance_type'] = JustifyDimensionsEstimation.top_n_dimension previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5)))
def generateExperimentData2(self, fixedType): global previousTime experts_twitter_stream_settings[ "cluster_analysis_method" ] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod2 if fixedType: experts_twitter_stream_settings[ "dimensions_performance_type" ] = JustifyDimensionsEstimation.first_n_dimension # experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod experts_twitter_stream_settings["phrase_decay_coefficient"] = 1.0 experts_twitter_stream_settings["stream_decay_coefficient"] = 1.0 experts_twitter_stream_settings["stream_cluster_decay_coefficient"] = 1.0 for dimensions in range(10 ** 4, 21 * 10 ** 4, 10 ** 4): experts_twitter_stream_settings["dimensions"] = getLargestPrimeLesserThan(dimensions) previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5) ) ) else: experts_twitter_stream_settings["dimensions_performance_type"] = JustifyDimensionsEstimation.top_n_dimension previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5) ) )
def generate(self): i = 0 for tweet in TwitterIterators.iterateTweetsFromExperts(): FileIO.writeToFileAsJson(tweet, self.fileName) i += 1 if i == self.length: break os.system('gzip %s' % self.fileName)
def generateExperimentData(self, with_vanilla_lsh): global previousTime if with_vanilla_lsh: experts_twitter_stream_settings[ 'lsh_type'] = JustifyNotUsingVanillaLSH.with_vanilla_lsh experts_twitter_stream_settings['phrase_decay_coefficient'] = 1.0 experts_twitter_stream_settings['stream_decay_coefficient'] = 1.0 experts_twitter_stream_settings[ 'stream_cluster_decay_coefficient'] = 1.0 experts_twitter_stream_settings[ 'cluster_filtering_method'] = emptyClusterFilteringMethod experts_twitter_stream_settings[ 'signature_type'] = 'signature_type_list' experts_twitter_stream_settings[ 'dimensions'] = getLargestPrimeLesserThan(10000) experts_twitter_stream_settings[ 'update_dimensions_method'] = emptyUpdateDimensionsMethod else: experts_twitter_stream_settings[ 'lsh_type'] = JustifyNotUsingVanillaLSH.with_modified_lsh experts_twitter_stream_settings[ 'cluster_analysis_method'] = JustifyNotUsingVanillaLSH.modifiedClusterAnalysisMethod previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 27)))
def generateExperimentData(self, withOutPruning): global previousTime if withOutPruning: experts_twitter_stream_settings['cluster_filtering_method'] = emptyClusterFilteringMethod; experts_twitter_stream_settings['pruing_type'] = JustifyMemoryPruning.without_memory_pruning else: experts_twitter_stream_settings['pruing_type'] = JustifyMemoryPruning.with_memory_pruning experts_twitter_stream_settings['cluster_analysis_method'] = JustifyMemoryPruning.modifiedClusterAnalysisMethod previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts(expertsDataStartTime=datetime(2011,3,19), expertsDataEndTime=datetime(2011,3,27)))
def generate(self): i=0 for tweet in TwitterIterators.iterateTweetsFromExperts(): FileIO.writeToFileAsJson(tweet, self.fileName) i+=1 if i==self.length: break; os.system('gzip %s'%self.fileName)
def generateExperimentData(self, withOutDecay): global previousTime if withOutDecay: experts_twitter_stream_settings['decay_type'] = JustifyExponentialDecay.without_decay experts_twitter_stream_settings['phrase_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_cluster_decay_coefficient']=1.0; else: experts_twitter_stream_settings['decay_type'] = JustifyExponentialDecay.with_decay experts_twitter_stream_settings['cluster_analysis_method'] = JustifyExponentialDecay.modifiedClusterAnalysisMethod previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts(expertsDataStartTime=datetime(2011,4,1), expertsDataEndTime=datetime(2011,4,8)))
def generateExperimentData(self, withoutTrie): global previousTime if withoutTrie: experts_twitter_stream_settings['trie_type'] = JustifyTrie.with_sorted_list experts_twitter_stream_settings['signature_type']='signature_type_list' else: experts_twitter_stream_settings['trie_type'] = JustifyTrie.with_trie experts_twitter_stream_settings['cluster_analysis_method'] = JustifyTrie.modifiedClusterAnalysisMethod experts_twitter_stream_settings['cluster_filtering_method'] = emptyClusterFilteringMethod previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts(expertsDataStartTime=datetime(2011,3,19), expertsDataEndTime=datetime(2011,3,27)))
def generateDocsByLength(): for length in [1500000]: # for length in [150]: fileName = clustering_quality_experts_folder + 'data/%s' % str( length) print fileName i = 0 for tweet in TwitterIterators.iterateTweetsFromExperts(): FileIO.writeToFileAsJson(tweet, fileName) i += 1 if i == length: break
def generateExperimentData(self): global previousTime experts_twitter_stream_settings['dimensions_performance_type'] = JustifyDimensionsEstimation.first_n_dimension experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod experts_twitter_stream_settings['cluster_analysis_method'] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod for dimensions in range(10**4,21*10**4,10**4): experts_twitter_stream_settings['dimensions'] = getLargestPrimeLesserThan(dimensions) previousTime = time.time() try: HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts()) except Exception as e: pass
def generateDocsByLength(): for length in [1500000]: # for length in [150]: fileName = clustering_quality_experts_folder + "data/%s" % str(length) print fileName i = 0 for tweet in TwitterIterators.iterateTweetsFromExperts(): FileIO.writeToFileAsJson(tweet, fileName) i += 1 if i == length: break
def generateExperimentData(self, with_vanilla_lsh): global previousTime if with_vanilla_lsh: experts_twitter_stream_settings['lsh_type'] = JustifyNotUsingVanillaLSH.with_vanilla_lsh experts_twitter_stream_settings['phrase_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_cluster_decay_coefficient']=1.0; experts_twitter_stream_settings['cluster_filtering_method'] = emptyClusterFilteringMethod; experts_twitter_stream_settings['signature_type']='signature_type_list' experts_twitter_stream_settings['dimensions'] = getLargestPrimeLesserThan(10000) experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod else: experts_twitter_stream_settings['lsh_type'] = JustifyNotUsingVanillaLSH.with_modified_lsh experts_twitter_stream_settings['cluster_analysis_method'] = JustifyNotUsingVanillaLSH.modifiedClusterAnalysisMethod previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts(expertsDataStartTime=datetime(2011,3,19), expertsDataEndTime=datetime(2011,3,27)))
def generateClusters(): TwitterStreamAnalysis(**experts_twitter_stream_settings).generateClusters(TwitterIterators.iterateTweetsFromExperts())