def populate_node(root_node): process_list = [root_node] while process_list: # Remove one node at a time from the list process_node = process_list.pop(0) # Find a reply to the root node children_query = collection.find({'in_reply_to_status_id': process_node.id}) for child in children_query: child_node = AnyNode(id=child['id'], name=child['user']['name'], text=child['text'], parent=process_node) child_node.id = child['id'] process_list.insert(0, child_node) # put on top the stack
def export_conversation_trees_to_db(user_id): root_id_list = [] root_nodes_list = [] print("Selecting all tweets from id: {}...".format(user_id)) conversation_query = collection.find( {'user.id': user_id, 'in_reply_to_user_id': {"$ne": None}}) # Mentioning somebody conversation_query2 = collection.find({'in_reply_to_user_id': user_id}) # Mentioned by somebody print("SET A: {} B: {} ".format(conversation_query.count(), conversation_query2.count())) for i, tweet in enumerate(conversation_query): t = find_root_tweet(tweet) if t['id'] not in process_set: process_set.add(t['id']) root_id_list.append(t) if i % 1000 == 0: print("Processing mentions {}".format(i)) for i, tweet in enumerate(conversation_query2): t = find_root_tweet(tweet) if t['id'] not in process_set: process_set.add(t['id']) root_id_list.append(t) if i % 1000 == 0: print("Processing mentioned {}".format(i)) print("SET A: {} B: {} UNION: {}".format(conversation_query.count(), conversation_query2.count(), len(process_set))) for i, root_tweet in enumerate(root_id_list): root_nodes = AnyNode(id=root_tweet['id'], name=root_tweet['user']['name'], text=root_tweet['text']) root_nodes.id = root_tweet['id'] # Not sure if why I need to state this two times. if i % 1000 == 0: pprint.pprint("Populating: {}".format(i)) populate_node(root_nodes) root_nodes_list.append(root_nodes) pprint.pprint('Total sum of root nodes: {}'.format(len(root_id_list))) # DotExporter(root_nodes_list[0]).to_picture("test.png") count = 0 for i, tree in enumerate(root_nodes_list): count = count + len(tree.descendants) + 1 exporter = JsonExporter(indent=2, sort_keys=True) json_tree = exporter.export(tree) collection_trees.insert_one(json.loads(json_tree)) print("Total conversation count: {}".format(count)) print("Average conversation length: {}".format(count / (len(root_id_list))))