def reposts2tree(weibo_info, repost_weibo):
    tree_nodes = []
    tree_stats = {}
    node = weibo_info['user']['uname']
    extra_infos = {
            'location': weibo_info['user']['location'],
            'datetime': weibo_info['timestamp'],
            'mid': weibo_info['mid'],
            'photo_url': weibo_info['user']['photo_url'],
            'weibo_url': base62.weiboinfo2url(weibo_info['user']['uid'], source_weibo['mid'])
            }
    tree_nodes.append(Tree(node, extra_infos))
    tree_stats['spread_begin'] = weibo_info['timestamp']
    tree_stats['spread_end'] = weibo_info['timestamp']
    #run_type
    if RUN_TYPE == 1:
        tree_stats['retweet_count'] = weibo_info['retweeted']
        tree_stats['retweet_people'] = set([weibo_info['user']['uid']])
    else:
        tree_stats['reposts_count'] = 0
        tree_stats['repost_peoples'] = set([weibo_info['user']['uid']])
    #sort reposts by uid
    reposts = sorted(reposts, key=lambda x:x['uid'])
    reposts = reposts[:1000]
    # generate tree
    for repost in repost_weibo:
        node = repost['user']['uname']
        extra_infos = {
                'location': weibo_info['user']['location'],
                'datetime': weibo_info['timestamp'],
                'mid': weibo_info['mid'],
                'photo_url': weibo_info['user']['photo_url'],
                'weibo_url': base62.weiboinfo2url(repost['user']['uid'], repost['mid'])
                }
        tree_nodes.append(Tree(node, extra_infos))
        
        repost_users = re.findall(u'/@([a-zA-Z-_\u0391-\uFFE5]+)', repost['text'])
        parent_idx = 0
        while parent_idx < len(repost_users):
            flag = False
            for node in tree_nodes[-2::-1]:
                if node.node == repost_users[parent_idx]:
                    node.append_child(tree_nodes[-1])
                    flag = True
                    break
            if flag:
                break
            parent_idx += 1
        else:
            tree_nodes[0].append_child(tree_nodes[-1])
        
        created_at = repost['timestamp']
        if created_at > tree_stats['spread_end']:
            tree_stats['spread_end'] = created_at
        tree_stats['repost_peoples'].add(repost['user']['id'])

    tree_stats['repost_people_count'] = len(tree_stats['repost_peoples'])
    del tree_stats['repost_peoples']

    return tree_nodes, tree_stats
示例#2
0
def reposts2tree(source_weibo, reposts, per_page, page_count):
    # root
    tree_nodes = []
    tree_stats = {}
    node = source_weibo['user']['name']
    extra_infos = {
        'location':
        source_weibo['user']['location'],
        'datetime':
        source_weibo['created_at'],
        'wid':
        source_weibo['id'],
        'img_url':
        source_weibo['user']['profile_image_url'],
        'weibo_url':
        base62.weiboinfo2url(source_weibo['user']['id'], source_weibo['mid'])
    }

    tree_nodes.append(Tree(node, extra_infos))
    created_at = source_weibo['created_at']
    created_at = datetime.datetime.strptime(created_at,
                                            '%a %b %d %H:%M:%S +0800 %Y')
    tree_stats['spread_begin'] = created_at
    tree_stats['spread_end'] = created_at
    tree_stats['reposts_count'] = source_weibo['reposts_count']
    tree_stats['repost_peoples'] = set([source_weibo['user']['id']])

    # sort reposts
    reposts = sorted(reposts, key=lambda x: x['id'])
    reposts = reposts[:per_page * page_count]

    # genarate tree
    for repost in reposts:
        node = repost['user']['name']
        extra_infos = {
            'location': repost['user']['location'],
            'datetime': repost['created_at'],
            'wid': repost['id'],
            'img_url': repost['user']['profile_image_url'],
            'weibo_url': base62.weiboinfo2url(repost['user']['id'],
                                              repost['mid'])
        }

        tree_nodes.append(Tree(node, extra_infos))

        repost_users = re.findall(u'/@([a-zA-Z-_\u0391-\uFFE5]+)',
                                  repost['text'])
        parent_idx = 0
        while parent_idx < len(repost_users):
            flag = False
            for node in tree_nodes[-2::-1]:
                if node.node == repost_users[parent_idx]:
                    node.append_child(tree_nodes[-1])
                    flag = True
                    break

            if flag:
                break
            parent_idx += 1
        else:
            tree_nodes[0].append_child(tree_nodes[-1])

        created_at = repost['created_at']
        created_at = datetime.datetime.strptime(created_at,
                                                '%a %b %d %H:%M:%S +0800 %Y')
        if created_at > tree_stats['spread_end']:
            tree_stats['spread_end'] = created_at
        tree_stats['repost_peoples'].add(repost['user']['id'])

    tree_stats['repost_people_count'] = len(tree_stats['repost_peoples'])
    del tree_stats['repost_peoples']

    return tree_nodes, tree_stats