def reposts2tree(weibo_info, repost_weibo): tree_nodes = [] tree_stats = {} node = weibo_info['user']['uname'] extra_infos = { 'location': weibo_info['user']['location'], 'datetime': weibo_info['timestamp'], 'mid': weibo_info['mid'], 'photo_url': weibo_info['user']['photo_url'], 'weibo_url': base62.weiboinfo2url(weibo_info['user']['uid'], source_weibo['mid']) } tree_nodes.append(Tree(node, extra_infos)) tree_stats['spread_begin'] = weibo_info['timestamp'] tree_stats['spread_end'] = weibo_info['timestamp'] #run_type if RUN_TYPE == 1: tree_stats['retweet_count'] = weibo_info['retweeted'] tree_stats['retweet_people'] = set([weibo_info['user']['uid']]) else: tree_stats['reposts_count'] = 0 tree_stats['repost_peoples'] = set([weibo_info['user']['uid']]) #sort reposts by uid reposts = sorted(reposts, key=lambda x:x['uid']) reposts = reposts[:1000] # generate tree for repost in repost_weibo: node = repost['user']['uname'] extra_infos = { 'location': weibo_info['user']['location'], 'datetime': weibo_info['timestamp'], 'mid': weibo_info['mid'], 'photo_url': weibo_info['user']['photo_url'], 'weibo_url': base62.weiboinfo2url(repost['user']['uid'], repost['mid']) } tree_nodes.append(Tree(node, extra_infos)) repost_users = re.findall(u'/@([a-zA-Z-_\u0391-\uFFE5]+)', repost['text']) parent_idx = 0 while parent_idx < len(repost_users): flag = False for node in tree_nodes[-2::-1]: if node.node == repost_users[parent_idx]: node.append_child(tree_nodes[-1]) flag = True break if flag: break parent_idx += 1 else: tree_nodes[0].append_child(tree_nodes[-1]) created_at = repost['timestamp'] if created_at > tree_stats['spread_end']: tree_stats['spread_end'] = created_at tree_stats['repost_peoples'].add(repost['user']['id']) tree_stats['repost_people_count'] = len(tree_stats['repost_peoples']) del tree_stats['repost_peoples'] return tree_nodes, tree_stats
def reposts2tree(source_weibo, reposts, per_page, page_count): # root tree_nodes = [] tree_stats = {} node = source_weibo['user']['name'] extra_infos = { 'location': source_weibo['user']['location'], 'datetime': source_weibo['created_at'], 'wid': source_weibo['id'], 'img_url': source_weibo['user']['profile_image_url'], 'weibo_url': base62.weiboinfo2url(source_weibo['user']['id'], source_weibo['mid']) } tree_nodes.append(Tree(node, extra_infos)) created_at = source_weibo['created_at'] created_at = datetime.datetime.strptime(created_at, '%a %b %d %H:%M:%S +0800 %Y') tree_stats['spread_begin'] = created_at tree_stats['spread_end'] = created_at tree_stats['reposts_count'] = source_weibo['reposts_count'] tree_stats['repost_peoples'] = set([source_weibo['user']['id']]) # sort reposts reposts = sorted(reposts, key=lambda x: x['id']) reposts = reposts[:per_page * page_count] # genarate tree for repost in reposts: node = repost['user']['name'] extra_infos = { 'location': repost['user']['location'], 'datetime': repost['created_at'], 'wid': repost['id'], 'img_url': repost['user']['profile_image_url'], 'weibo_url': base62.weiboinfo2url(repost['user']['id'], repost['mid']) } tree_nodes.append(Tree(node, extra_infos)) repost_users = re.findall(u'/@([a-zA-Z-_\u0391-\uFFE5]+)', repost['text']) parent_idx = 0 while parent_idx < len(repost_users): flag = False for node in tree_nodes[-2::-1]: if node.node == repost_users[parent_idx]: node.append_child(tree_nodes[-1]) flag = True break if flag: break parent_idx += 1 else: tree_nodes[0].append_child(tree_nodes[-1]) created_at = repost['created_at'] created_at = datetime.datetime.strptime(created_at, '%a %b %d %H:%M:%S +0800 %Y') if created_at > tree_stats['spread_end']: tree_stats['spread_end'] = created_at tree_stats['repost_peoples'].add(repost['user']['id']) tree_stats['repost_people_count'] = len(tree_stats['repost_peoples']) del tree_stats['repost_peoples'] return tree_nodes, tree_stats