示例#1
0
文件: preprocess.py 项目: jmelion/tts
def make_social_graph():
    print 'Constructing social graph...'
    links = get_links()
    social_graph = Graph()
    for link in links:
        social_graph.add_edge(link[1], link[2])
        social_graph.store_link(link[0], link[1], link[2])
    return social_graph
示例#2
0
文件: preprocess.py 项目: jmelion/tts
def weighted_social_graph(weights):
    print 'Constructing weighted social graph...'
    links = get_links()
    social_graph = Graph()
    for link in links:
        social_graph.store_link(link[0], link[1], link[2])
    links = social_graph.links
    
    email_count = 1
    with open('subject.txt') as subject_file:
        for email in subject_file:
            if email_count % 10000 == 0:
                print 'processing email ' + str(email_count)
            email_id = email.split()[0]
            subject_line = email[len(email_id) + 2:]
            subject_line = remove_punc(subject_line)
            try:
                link = links[email_id]
            except KeyError:
                continue
            words = subject_line.split()
            words = remove_stopwords(words)
            # Compute e-mail weight
            email_weight = 0
            word_count = float(len(words))
            for word in words: 
                try:
                    email_weight += weights[word]
                except KeyError:
                    word_count -= 1
                    continue
            try:
                email_weight /= word_count 
            except ZeroDivisionError:
                email_weight = 0
            social_graph.add_weighted_edge(link[0],link[1], email_weight)

            email_count += 1

    return social_graph