def connect_users_cities(graph, args): city_dict = graph_util.query_label_to_dict(graph, "City", "name") users = graph.nodes.match("User") relationships = [] print("Creating IS_FROM relationships..") for user in tqdm(users): for city in user['city']: relationships.append(Relationship(user, "IS_FROM", city_dict[city])) print("Done.") print("Writing %d relationships.. " % len(relationships)) graph_util.create_in_batch(graph, relationships) print("Done.")
def write_hydrated_tweets(graph, args): print("Reading tweets..") tweets = None with open("../data/tweets_fully_hydrated.json", encoding='utf-8', mode='r') as f: tweets = json.load(f) print("Done.") print("Creating Tweet nodes..") tweet_nodes = [] with tqdm(total=len(tweets)) as pbar: for tweet in tweets: tweet_nodes.append(create_hydrated_tweet(tweet)) pbar.update(1) print("Done.") print("Writing %d tweets to database.." % len(tweet_nodes)) graph_util.create_in_batch(graph, tweet_nodes) print("Done.")
def write_users(graph, args): print("Reading Userobjects..") users = None with open("../data/users_with-loc_with-gender.json", encoding='utf-8', mode='r') as f: inf = f.read() users = json.loads(inf) print("Read.") user_nodes = [] print("Creating User nodes..") for user in tqdm(users): user_nodes.append(create_user_node(user)) print("Done.") print("Writing %d users to database.." % len(user_nodes)) graph_util.create_in_batch(graph, user_nodes) print("Done.")
def write_tweets(graph, args): print("WARNING, DEPRECTED") print("Reading tweets..") tweets = None with open("../../Datar/twitter/180524_data_posstagged.pickle", mode='rb') as f: data = f.read() tweets = pickle.loads(data) tweets.fillna("N/A", inplace=True) # stop neo4j from breaking!! print("Done.") print("Creating Tweet nodes..") tweet_nodes = [] with tqdm(total=len(tweets)) as pbar: for index, tweet in tweets.iterrows(): tweet_nodes.append(create_tweet_node(tweet)) pbar.update(1) print("Done.") print("Writing %d tweets to database.." % len(tweet_nodes)) graph_util.create_in_batch(graph, tweet_nodes) print("Done.")
def annotate_politicians(graph, args): with open("../data/politician_user_handles.json", encoding='utf-8', mode='r') as f: inf = f.read() temp = json.loads(inf) parties = {} politicians = {} ## flip the dict around to have things more readable for party in temp.keys(): parties[party] = Node('Party', name=party) ## create party nodes along the way for politician in temp[party]: politicians[politician] = party relationships = [] graph_util.create_in_batch(graph, list(parties.values())) print("Done.") print("Creating party membership relations..") for politician in tqdm(politicians.keys()): politician_node = graph.nodes.match("User").where( "_.screen_name = \"%s\"" % politician).first() if politician_node != None: ### this needs to be done when updating anything, this is bullshit. graph.merge(politician_node) politician_node.add_label("Politician") graph.push(politician_node) relationships.append( Relationship(politician_node, "IS_MEMBER_OF", parties[politicians[politician]])) print("Done.") print("Writing %d party membership relations to database..") graph_util.create_in_batch(graph, relationships) print("Done.")
def connect_tweets_users(graph, args): relationships = [] user_nodes = graph_util.query_label_to_dict(graph, "User", "screen_name") tweet_nodes = graph.nodes.match("Tweet") print("Creating relationships between users via tweets..") for tweet in tqdm(tweet_nodes): if tweet['screen_name'] in user_nodes.keys(): user = user_nodes[tweet['screen_name']] # relate the tweet relationships.append(Relationship(user, "TWEETED", tweet)) if isinstance(tweet['mentions'], list): for mention in tweet['mentions']: if mention in user_nodes.keys(): mentioned_user = user_nodes[ mention] # grabbing the user object by screen_name relationships.append( Relationship(tweet, "MENTIONS", mentioned_user)) print("Done.") #### lets write mentions print("Writing %d relationships to database.." % len(relationships)) graph_util.create_in_batch(graph, relationships) print("Done.")
def write_locations(graph, args): #### and let's write the location community. def create_state_node_dict(states): result = {} for state in set(states): result[state] = Node("State", name=state) return result def load_city_mapping(): """ closed in function to get data """ city_mapping = {} find_state = re.compile("(\(\w{2}\))") find_city = re.compile("(.+ )") with open("../data/german_cities_raw.txt", encoding='utf-8', mode='r') as f: for entry in f: city = find_city.findall(entry)[0].strip() state = find_state.findall(entry)[0].replace("(", "").replace( ")", "") city_mapping[city] = state return city_mapping city_mapping = load_city_mapping() cities = [] states = [] for user in graph.nodes.match("User"): cities.extend(user['city']) states.extend(user['state']) states = create_state_node_dict(states) print("Writing state nodes..") graph_util.create_in_batch(graph, list(states.values())) print("Done.") city_nodes = [] for city in set(cities): city_nodes.append(Node("City", name=city)) print("Creating city nodes..") graph_util.create_in_batch(graph, city_nodes) print("Done.") city_nodes city_to_states = [] for city_node in city_nodes: city_to_states.append( Relationship(city_node, "IS_IN", states[city_mapping[city_node['name']]])) print("Creating city to state relationships..") graph_util.create_in_batch(graph, city_to_states) print("Done.")