Пример #1
0
def main():
    log.info('Loading user ids...')
    uid_list = load_user_list('tables/users.txt')  # load users
    log.info('Loading user id, done.')

    # 导入所有group的topic
    topic_dict = load_topic()
    start = 0
    count = 100  # 一次处理100个用户
    total_users = len(uid_list)
    # 考虑到需要存储的内容较多,所以一次处理一定数目的用户
    topic_count = 0
    comment_count = 0
    while start < total_users:
        if start + count < total_users:
            puid_list = uid_list[start:(start + count)]
        else:
            puid_list = uid_list[start:]

        #print (start, start+count)
        behavior = get_behavior_statics(puid_list, topic_dict)
        # save to file
        topic_count, comment_count = save_behavior_statics(
            behavior, topic_count, comment_count)
        log.info('Processing uid range: [%d, %d)' % (start, start + count))

        start += count

    log.info('Total number of topics used: %d' % topic_count)
    log.info('Total number of comments used: %d' % comment_count)
    log.info('Thank God. It\'s done.')
Пример #2
0
def filter_user(interest_info):
    """ 注意:之后可能会根据interest的信息,过滤倒一些用户
    """
    pass
    
if __name__ == '__main__':
    if len(sys.argv) < 2:
        print 'Group ID not provided.'
        sys.exit(1)
        
    group_id = sys.argv[1]
    log.info('Prepare user interest for group: %s' % group_id)
    
    print 'Loading users...'
    path = 'social/' + group_id + '/all-users-' + group_id
    uid_list = load_user_list(path)
    total_user = len(uid_list)
    log.info('Number of users loaded: %d' % total_user)
    print 'Number of users loaded: %d' % total_user
    
    print 'Loading model and dict...'
    model_path = 'ldamodels/' + group_id + '/title-comment-' + group_id + '.ldamodel'
    dict_path = 'ldamodels/' + group_id + '/dict-title-comment-' + group_id + '.dict'
    log.info('Loading LDA model...')
    ldamodel = models.ldamodel.LdaModel.load(model_path) # load model
    log.info('Loading dict...')
    dictionary = corpora.dictionary.Dictionary.load(dict_path) # load dict
    
    print 'Gen user interest...'
    topic_path = 'tables/' + group_id + '/TopicInfo-raw-all-' + group_id
    comment_path = 'tables/' + group_id + '/CommentInfo-raw-all-' + group_id