示例#1
0
def get_trending_topics_summary(n=10):
    words = frequency.word_frequency.keys()
    counts = {w: frequency.get_wf(w, g.ts) for w in words}
    counts = {w: fl for w, fl in counts.iteritems()
            if fl[1] > 0 and fl[1] > 3 * fl[0]}
    mcounts = [(w, log((fl[1] + 0.003) / (max(fl[0],1) + 0.003))) for w, fl in counts.iteritems()]
    mcounts.sort(key = lambda x: -x[1])

    summaries = []
    g.penalty = defaultdict(lambda: 0)
    i = 0
    while len(summaries) < n:
        keyword = mcounts[i][0]
        #keywords = get_expanded_keywords([keyword])
        keywords = set([keyword])
        print keywords

        # select top starting bigrams that contain one of the keywords
        # to use as seeds for the sentences
        bigrams = [b for b in g.nw.items() if b[0][0] in keywords or b[0][1] in keywords]
        start = max(
                bigrams, 
                key=lambda x: x[1] - 10 * g.penalty[x[0][0]] - 10 * g.penalty[x[0][1]]
        )[0]

        summary = build_summary(start)

        if summary:
            summaries.append(summary)
            show_summaries([summary], keywords=start)
        
        i += 1
示例#2
0
def summarize_partial(start, n=3):
    summaries = []
    g.penalty = defaultdict(lambda: 0)
    start = tuple(start)

    while len(summaries) < n:
        summary = build_summary(start)

        if summary:
            summaries.append(summary)
            show_summaries([summary], keywords=start)
示例#3
0
def summarize_top(n=10):
    summaries = []
    g.penalty = defaultdict(lambda: 0)

    while len(summaries) < n:

        # select top starting bigrams
        # to use as seeds for the sentences
        start = max(
                g.nw.items(), 
                key=lambda x: x[1] - 10 * g.penalty[x[0][0]] - 10 * g.penalty[x[0][1]]
        )[0]
        print "start: %s" % list(start)

        summary = build_summary(start)

        if summary:
            summaries.append(summary)
            show_summaries([summary], keywords=start)
示例#4
0
    while len(summaries) < n:

        # select top starting bigrams that contain one of the keywords
        # to use as seeds for the sentences
        # put bigrams containing '_S' or '_E' further down the list
        bigrams = [b for b in g.nw.items() if b[0][0] in keywords or b[0][1] in keywords]
        start = max(bigrams, key=lambda x: \
                x[1] - 10 * g.penalty[x[0][0]] - 10 * g.penalty[x[0][1]] - 
                (0 if x[0][0] != '_S' and x[0][1] != '_E' else 100))
        start = start[0]

        summary = build_summary(start, keywords)

        if summary:
            summaries.append(summary)
            show_summaries([summary], keywords=start)

def get_trending_topics_summary(n=10):
    words = frequency.word_frequency.keys()
    counts = {w: frequency.get_wf(w, g.ts) for w in words}
    counts = {w: fl for w, fl in counts.iteritems()
            if fl[1] > 0 and fl[1] > 3 * fl[0]}
    mcounts = [(w, log((fl[1] + 0.003) / (max(fl[0],1) + 0.003))) for w, fl in counts.iteritems()]
    mcounts.sort(key = lambda x: -x[1])

    summaries = []
    g.penalty = defaultdict(lambda: 0)
    i = 0
    while len(summaries) < n:
        keyword = mcounts[i][0]
        #keywords = get_expanded_keywords([keyword])