示例#1
0
def _adaptiveAggregation(V, n, yIntervals, weightF, param, freq):
    '''Apply adaptive aggregation algorithm to the given vocabulary.
    Algorithm 2 from paper.
    '''
    # Initialize returned parameters
    finalVocabs = SortedDict()
    periodGroups = SortedDict()

    # Select weighting function
    f = _selectWeightingFunction(weightF, param)

    # Iterate over time frames
    for t in _arrangeIntervals(V, yIntervals, freq):
        mu_t = getRangeMiddle(t[0], t[-1])
        V_prime = SortedDict({tx: V[tx] for tx in t})

        score = defaultdict(float)
        for years_v, words_v in V_prime.iteritems():
            mu_v = getRangeMiddle(years_v)
            fvt = f(mu_v, mu_t)
            for word, score_wv in words_v:
                score[word] += fvt * score_wv

        # Top n terms w sorted by score_w
        scoreList = [(k, v) for k, v in score.iteritems()]
        scoreList = sorted(scoreList, key=lambda pair: pair[1], reverse=True)
        topN = scoreList[:n]

        finalVocabs[str(int(mu_t))] = topN
        periodGroups[str(int(mu_t))] = t
    return finalVocabs, periodGroups
示例#2
0
def _adaptiveAggregation(V, n, yIntervals, weightF, param, freq):
    '''Apply adaptive aggregation algorithm to the given vocabulary.
    Algorithm 2 from paper.
    '''
    # Initialize returned parameters
    finalVocabs = SortedDict()
    periodGroups = SortedDict()

    # Select weighting function
    f = _selectWeightingFunction(weightF, param)

    # Iterate over time frames
    for t in _arrangeIntervals(V, yIntervals, freq):
        mu_t = getRangeMiddle(t[0], t[-1])
        V_prime = SortedDict({tx: V[tx] for tx in t})

        score = defaultdict(float)
        for years_v, words_v in V_prime.iteritems():
            mu_v = getRangeMiddle(years_v)
            fvt = f(mu_v, mu_t)
            for word, score_wv in words_v:
                score[word] += fvt * score_wv

        # Top n terms w sorted by score_w
        scoreList = [(k, v) for k, v in score.iteritems()]
        scoreList = sorted(scoreList, key=lambda pair: pair[1], reverse=True)
        topN = scoreList[:n]

        finalVocabs[str(int(mu_t))] = topN
        periodGroups[str(int(mu_t))] = t
    return finalVocabs, periodGroups
示例#3
0
def doSpaceEmbedding(monitor, results, aggMetadata):
    '''Create 2D word embedding from given set of results'''
    embeddedResults = SortedDict()

    wordsT0 = None
    locsT0  = None
    for label,r in results.iteritems():
        model = monitor._models[label]
        wordsT1 = [ w for w,_ in r ]

        dists = _getPairwiseDistances(wordsT1, model)
        locsT1 = _getMDSEmbedding(dists)

        if wordsT0 is not None:
            T = _findTransform(wordsT0, locsT0, wordsT1, locsT1)
            locsT1 = locsT1.dot(T)
            locsT1 = _normalizeCloud(locsT1)

        wordsT0 = wordsT1
        locsT0  = locsT1

        str_label = str(int(getRangeMiddle(label)))
        embeddedResults[str_label] = [ wordLocationAsDict(wordsT1[i],locsT1[i,:]) for i in range(len(wordsT1)) ]

    # Aggregation step (more like throwing away some years)
    embeddedResultsAgg = { year: embeddedResults[year] for year in aggMetadata }
    embeddedResultsAgg = SortedDict(embeddedResultsAgg)

    return embeddedResultsAgg
示例#4
0
def doSpaceEmbedding(monitor, results, aggMetadata):
    '''Create 2D word embedding from given set of results'''
    embeddedResults = SortedDict()

    wordsT0 = None
    locsT0 = None
    for label, r in results.iteritems():
        model = monitor._models[label]
        wordsT1 = [w for w, _ in r]

        dists = _getPairwiseDistances(wordsT1, model)
        locsT1 = _getMDSEmbedding(dists)

        if wordsT0 is not None:
            T = _findTransform(wordsT0, locsT0, wordsT1, locsT1)
            locsT1 = locsT1.dot(T)
            locsT1 = _normalizeCloud(locsT1)

        wordsT0 = wordsT1
        locsT0 = locsT1

        str_label = str(int(getRangeMiddle(label)))
        embeddedResults[str_label] = [
            wordLocationAsDict(wordsT1[i], locsT1[i, :])
            for i in range(len(wordsT1))
        ]

    # Aggregation step (more like throwing away some years)
    embeddedResultsAgg = {year: embeddedResults[year] for year in aggMetadata}
    embeddedResultsAgg = SortedDict(embeddedResultsAgg)

    return embeddedResultsAgg