def _adaptiveAggregation(V, n, yIntervals, weightF, param, freq): '''Apply adaptive aggregation algorithm to the given vocabulary. Algorithm 2 from paper. ''' # Initialize returned parameters finalVocabs = SortedDict() periodGroups = SortedDict() # Select weighting function f = _selectWeightingFunction(weightF, param) # Iterate over time frames for t in _arrangeIntervals(V, yIntervals, freq): mu_t = getRangeMiddle(t[0], t[-1]) V_prime = SortedDict({tx: V[tx] for tx in t}) score = defaultdict(float) for years_v, words_v in V_prime.iteritems(): mu_v = getRangeMiddle(years_v) fvt = f(mu_v, mu_t) for word, score_wv in words_v: score[word] += fvt * score_wv # Top n terms w sorted by score_w scoreList = [(k, v) for k, v in score.iteritems()] scoreList = sorted(scoreList, key=lambda pair: pair[1], reverse=True) topN = scoreList[:n] finalVocabs[str(int(mu_t))] = topN periodGroups[str(int(mu_t))] = t return finalVocabs, periodGroups
def doSpaceEmbedding(monitor, results, aggMetadata): '''Create 2D word embedding from given set of results''' embeddedResults = SortedDict() wordsT0 = None locsT0 = None for label,r in results.iteritems(): model = monitor._models[label] wordsT1 = [ w for w,_ in r ] dists = _getPairwiseDistances(wordsT1, model) locsT1 = _getMDSEmbedding(dists) if wordsT0 is not None: T = _findTransform(wordsT0, locsT0, wordsT1, locsT1) locsT1 = locsT1.dot(T) locsT1 = _normalizeCloud(locsT1) wordsT0 = wordsT1 locsT0 = locsT1 str_label = str(int(getRangeMiddle(label))) embeddedResults[str_label] = [ wordLocationAsDict(wordsT1[i],locsT1[i,:]) for i in range(len(wordsT1)) ] # Aggregation step (more like throwing away some years) embeddedResultsAgg = { year: embeddedResults[year] for year in aggMetadata } embeddedResultsAgg = SortedDict(embeddedResultsAgg) return embeddedResultsAgg
def doSpaceEmbedding(monitor, results, aggMetadata): '''Create 2D word embedding from given set of results''' embeddedResults = SortedDict() wordsT0 = None locsT0 = None for label, r in results.iteritems(): model = monitor._models[label] wordsT1 = [w for w, _ in r] dists = _getPairwiseDistances(wordsT1, model) locsT1 = _getMDSEmbedding(dists) if wordsT0 is not None: T = _findTransform(wordsT0, locsT0, wordsT1, locsT1) locsT1 = locsT1.dot(T) locsT1 = _normalizeCloud(locsT1) wordsT0 = wordsT1 locsT0 = locsT1 str_label = str(int(getRangeMiddle(label))) embeddedResults[str_label] = [ wordLocationAsDict(wordsT1[i], locsT1[i, :]) for i in range(len(wordsT1)) ] # Aggregation step (more like throwing away some years) embeddedResultsAgg = {year: embeddedResults[year] for year in aggMetadata} embeddedResultsAgg = SortedDict(embeddedResultsAgg) return embeddedResultsAgg