def save(G, fname): json.dump(dict(nodes=[[n, G.node[n]] for n in G.nodes()], edges=[[u, v, G.edge[u][v]] for u, v in G.edges()]), open( fname, 'w', ), indent=2)
def wikilinks_graph(self): """ Generate a wikilinks graph using networkx :rtype: Graph """ import tempfile from networkx.readwrite import json_graph import networkx as nx import re import requests tmpdir = tempfile.gettempdir() graph_object = tmpdir + '/' + str(self.id) + '.wikilinks.json' def _get_links(ngram): ngram_links = json.loads(requests.get(template_query.format(ngram)).text) try: ngram_links = ngram_links['query']['pages'].values()[0]['links'] except KeyError: return [] ngram_links = [re.sub(r' \(.+\)', '', link['title'].lower()) for link in ngram_links] ngram_links = set([ngram for ngram in ngram_links if len(ngram.split()) > 1]) return ngram_links if not os.path.exists(graph_object): graph = nx.Graph() links_dict = {} template_query = u'http://en.wikipedia.org/w/api.php?action=query&titles={0}&prop=links&plnamespace=0&pllimit=500&format=json' article_ngrams = list(self.articlecollocation_set.values_list('ngram', flat=True)) for i, ngram1 in enumerate(article_ngrams): if ngram1 in links_dict: ngram1_links = links_dict[ngram1] else: ngram1_links = _get_links(ngram1) links_dict[ngram1] = ngram1_links for j in range(i+1, len(article_ngrams)): ngram2 = article_ngrams[j] if ngram2 in links_dict: ngram2_links = links_dict[ngram2] else: ngram2_links = _get_links(ngram2) links_dict[ngram2] = ngram2_links if ngram1 in ngram2_links or ngram2 in ngram1_links: graph.add_edge(ngram1, ngram2) json_graph.dump(graph, open(graph_object, 'w')) return graph else: graph = json_graph.load(open(graph_object)) return graph
def generate_local_instance(edge_type='prior_nodes'): #massive database call. Iterate through this if the database gets too large objs=Objecttype.objects.all() G=nx.DiGraph() i=0 #generate subgraphs for each node (along with metadata) and merge them for each in objs: sub_G=nbh_subgraph(each, edge_type) G.add_edges_from(sub_G.edges()) G.add_nodes_from(zip(sub_G.node.keys(),sub_G.node.values())) print i i=i+1 #serialize data into json format and save locally g_json = json_graph.node_link_data(G) # node-link format to serialize json_graph.dump(g_json, open("static/local_instance.json",'w'))
def consumer(q): while not q.empty(): module = open(q.get()) try: this_module = get_module(module) if not filter_on_package('com.nytimes',this_module): continue graph.add_node(this_module) imports = map(get_package_from_line,filter(is_import_line,module)) for impt in imports: if filter_on_package('com.nytimes',impt): graph.add_node(impt) graph.add_edge(this_module,impt) except StopIteration: print module.name dc = nx.degree_centrality(graph) nx.set_node_attributes(graph,'degree_cent',dc) print json_graph.dump(graph,open('graph.json','w'))
nx.draw_networkx_nodes(g, pos, nodelist=inds, node_color='gray', node_size=map(node_size, inds)) nx.draw_networkx_edges(g, pos, alpha=0.05) plt.show() else: # TODO: Create a script that compiles all external files into the # govtrack file (see virtualenv for details on how to do this). # With that done, create all of the HTML, CSS, and JS files on # invocation and write them to a temp directory. # Serialize the graph to the network.json file pwd = os.path.dirname(os.path.realpath(__file__)) browser_dir = os.path.join(pwd, 'browser') network_file = os.path.join(browser_dir, 'js', 'network.json') with open(network_file, 'w') as fout: if args.resize is not None: g.graph['resize'] = args.resize json_graph.dump(g, fout) # Switch to the browser directory and start up a simple HTTP server os.chdir(browser_dir) Server = type('Server', (TCPServer, object), {'allow_reuse_address': True}) httpd = Server(("", 8080), SimpleHTTPRequestHandler) p = Process(target=httpd.serve_forever) p.start() webbrowser.open("http://localhost:8080") try: print "Press Ctrl-c to quit..." p.join() except KeyboardInterrupt: p.terminate()
def files_to_json( chosen_pred, clasp_filename, clasp_is_timestamped, clasp_is_optimizing, cost_pred, gringo_text_filename, is_directed, json_data_filename, json_time_filename, json_soln_filename ): edge_id_map = None costs = None # If "gringo -t" output is given, then we use that. if gringo_text_filename and cost_pred: costs = parse_costs_from_gringo_text( cost_pred, gringo_text_filename ) # Otherwise, if we do not have the graph information, default to # using an undirected fully connected graph (ie. complete graph). # We parse the node names from the clasp answer file. else: nodes = parse_nodes_from_solution_file( chosen_pred, clasp_filename, clasp_is_timestamped ) nodes, edges, costs = create_complete_graph(nodes) is_directed = False # Just a check that nothing went wrong... if costs == None or len(costs) == 0: return (False, 'Costs can not be None or length 0') # Create a dictionary that, when converted to JSON, is compatible with # the vis.js javascript library. visjs_json_dict, edge_id_map = create_visjs_dict( costs, is_directed ) # Create two assisiting dictionaries: # - timing the animation: the time differences between answers # - edge sets: which edges belong to which answer # These dictionaries are also converted to JSON later. timing_dict, answer_sets_dict = create_timing_and_answer_set_dicts( chosen_pred, clasp_filename, clasp_is_timestamped, clasp_is_optimizing, edge_id_map ) # Create directories for the JSON files, if they do not exist. mkpath(os.path.dirname(json_data_filename)) mkpath(os.path.dirname(json_time_filename)) mkpath(os.path.dirname(json_soln_filename)) # Save the JSON files. # Write the graph to a JSON file. with open(json_data_filename, 'w') as fh: nxjson.dump(visjs_json_dict, fh, indent=2) # Write the timings to a JSON file. with open(json_time_filename, 'w') as fh: nxjson.dump(timing_dict, fh, indent=2) # Write the solutions to a JSON file. with open(json_soln_filename, 'w') as fh: nxjson.dump(answer_sets_dict, fh, indent=2) return (True, 'Success')
def save(G, fname): json.dump(dict(nodes=[[n, G.node[n]] for n in G.nodes()], edges=[[u, v, G.edge[u][v]] for u,v in G.edges()]), open(fname, 'w',), indent=2)
def main(): LOGGER.setLevel(logging.DEBUG) APILOGGER.setLevel(logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('seed', metavar='U', type=str, nargs='+',\ help='seed users') parser.add_argument('--saved', dest='savefile', metavar='SAVED', type=str,\ help='saved progress .json', default='') args = parser.parse_args() oauth = {} with open('oauth.json') as raw: oauth = json.load(raw) tapi = api.Wrapper(oauth['access'], oauth['accessSecret'],\ oauth['consumer'], oauth['consumerSecret']) if args.savefile: with open(args.savefile) as saved: graph = nxjson.load(saved) else: graph = nx.DiGraph() seed = set(map(int, args.seed)) users = seed.union(*[tapi.followers(acct) for acct in seed]) graph.add_nodes_from(users) users = graph.nodes() progress = 0 total = 0 for user in users: total += 1 if graph.node[user].get('username'): continue info = tapi.info(user) if not info.get('screen_name') or info.get('protected'): graph.remove_node(user) continue progress += 1 LOGGER.debug('Adding user %s aka @%s', user, info.get('screen_name')) graph.add_node(user, name=info.get('name',''), location=info.get('location',''),\ followers=info.get('followers_count',0), lang=info.get('lang',''),\ following=info.get('friends_count',0), username=info.get('screen_name',''),\ protected=info.get('protected')) if progress == 100: progress = 0 with open('tmp.json', 'w') as garph: nxjson.dump(graph, garph) LOGGER.info('Saved info for %s/%s users', total, len(users)) LOGGER.info('Info collected') with open('garph.json', 'w') as garph: nxjson.dump(graph, garph) nodeSet = set(graph.nodes()) users = graph.nodes() # Removed some nodes earlier, can't try to access them progress = 0 while users: # for some reason no edges to seed, but they are in users because we got their data user = users.pop() if graph.node[user].get('complete'): continue graph.node[user]['complete'] = True try: followers = tapi.followers(user) except api.NoDataError: continue relevant = followers & nodeSet graph.node[user]['follower_ids'] = ','.join(map(str, followers)) LOGGER.debug('Adding edges for user %s aka @%s', user, graph.node[user]['username']) graph.add_edges_from([(follower, user) for follower in relevant]) progress += 1 if progress == 5: progress = 0 with open('tmp.json', 'w') as garph: nxjson.dump(graph, garph) LOGGER.info('Saved graph with %s user edgesets remaining', len(users)) with open('garph.json', 'w') as garph: nxjson.dump(graph, garph) nx.write_gexf(graph, 'garph.gexf') LOGGER.info('Edges collected') LOGGER.info('Job complete')
def dbpedia_graph(self, redirects=True): """ Generate a dbpedia category TREE using networkx :rtype: nx.Graph """ import tempfile import requests from networkx.readwrite import json_graph tmpdir = tempfile.gettempdir() if redirects: graph_object = tmpdir + '/' + str(self.id) + 'redirects.' + '.dbpedia.json' else: graph_object = tmpdir + '/' + str(self.id) + '.dbpedia.json' if not os.path.exists(graph_object): stop_uris_set = open(settings.ABS_PATH('stop_uri.txt')).read().split() stop_uris_set = set([x.split('/')[-1] for x in stop_uris_set]) def recurse_populate_graph(resource, graph, depth): if resource in stop_uris_set: return if depth == 0: return if 'Category' in resource: query = u'SELECT ?broader, ?related, ?broaderof WHERE' \ u' {{{{ <http://dbpedia.org/resource/{0}> skos:broader ?broader }}' \ u' UNION {{ ?broaderof skos:broader <http://dbpedia.org/resource/{0}> }}' \ u' UNION {{ ?related skos:related <http://dbpedia.org/resource/{0}> }}' \ u' UNION {{ <http://dbpedia.org/resource/{0}> skos:related ?related }}}}'.format(resource) results = [] sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setReturnFormat(JSON) sparql.setQuery(query) results.extend(sparql.query().convert()['results']['bindings']) for result in results: for rel_type, value in result.iteritems(): uri = value['value'] parent_resource = uri.split('/')[-1] #print ' ' * (3 - depth), resource, '->', parent_resource graph.add_edge(resource, parent_resource, type=rel_type) recurse_populate_graph(parent_resource, graph, depth-1) else: if resource == 'cumulative gain': resource = 'Discounted_cumulative_gain' elif resource == 'world wide web conference': resource = 'International_World_Wide_Web_Conference' wiki_cat_query = u'http://en.wikipedia.org/w/api.php?action=query&titles={0}&prop=categories&cllimit=50&clshow=!hidden&format=json&redirects' results = json.loads(requests.get(wiki_cat_query.format(resource)).text)['query']['pages'].values()[0] if 'missing' in results: results = json.loads(requests.get(wiki_cat_query.format(resource.title())).text)['query']['pages'].values()[0] if 'missing' in results: print results, resource results = [] else: results = [c['title'].replace(' ', '_') for c in results['categories']] else: results = [c['title'].replace(' ', '_') for c in results['categories']] rel_type = "subject" for parent_resource in results: #print ' ' * (3 - depth), resource, '->', parent_resource graph.add_edge(resource, parent_resource, type=rel_type) recurse_populate_graph(parent_resource, graph, depth-1) import networkx as nx from SPARQLWrapper import SPARQLWrapper, JSON graph = nx.Graph() ngrams = set(self.articlecollocation_set.values_list('ngram', flat=True)) ngrams = self.CollocationModel.COLLECTION_MODEL.objects.filter(ngram__in=ngrams) for ngram in ngrams: if 'dbpedia' in ngram.source or (redirects and 'wiki_redirect' in ngram.source): recurse_populate_graph(ngram.ngram, graph, 2) json_graph.dump(graph, open(graph_object, 'w')) else: graph = json_graph.load(open(graph_object)) return graph
advisor_id = match.split('id=')[1].split('"')[0] current = current.split(match)[1] advisor_name = current.split('</a>')[0] advisors[advisor_id] = advisor_name author_id = name + author_id if not G.has_node(author_id): G.add_node(author_id, name=name) for advisor in advisors: advisor_id = advisors[advisor] + advisor if not G.has_node(advisor_id): G.add_node(advisor_id, name=advisors[advisor]) if not G.has_edge(advisor_id, author_id): G.add_edge(advisor_id, author_id) done = (i / total_files) * 100 if i % 300 == 0: print done print 'Graph created' print 'Nodes:' print len(G.nodes()) print 'Edges:' print len(G.edges()) print 'Writing file...' nx.write_gexf(G, './data/genealogy.gexf') with open('./data/genealogy.json', 'w') as outfile: json_graph.dump(G, outfile) nx.write_edgelist(G, './data/genealogy.csv') print 'done'
def save_output_graph(contacts, residues, residues_info, prg, options): descriptions_dict = load_json(options.descriptions_dict) g = nx.MultiDiGraph() for r in residues: r_id = "" if options.dont_normalize: r_id += r.get_parent().get_id() r_id += str(r.get_id()[1]) resname = r.resname.strip() kwargs = {'resname': resname} if residues_info.has_key(r_id): kwargs['conf'] = residues_info[r_id]['conf'] g.add_node(r_id, **kwargs) if prg == '--': edge_type = 'dist' else: edge_type = 'contact' all_nodes = set(g.nodes()) for (num1, num2), (r1, r2), desc in contacts: tmp_desc = desc if prg == '--': short_desc = 'close-doublet' else: if prg == 'MC': tmp_desc = re.sub("_\d+$", "", tmp_desc) elif prg == 'FR': if re.match('^n', tmp_desc): tmp_desc = "" desc = "" short_desc = descriptions_dict[prg].get(tmp_desc, 'UNK_SHORT_DESC') if prg == "RV": # RNA-view uppercase all chains, so we should check the lowercase version _num1 = num1[0].lower() + num1[1:] _num2 = num2[0].lower() + num2[1:] if num1 not in all_nodes and _num1 in all_nodes: num1 = _num1 if num2 not in all_nodes and _num2 in all_nodes: num2 = _num2 n_type = r1 + r2 if desc != "": g.add_edge(num1, num2, type=edge_type, prg=prg, desc=short_desc, full_desc=desc, n_type=n_type) if prg != 'FR': if prg == '--': rev_short_desc = 'close-doublet' else: rev_short_desc = reverse_desc(short_desc) g.add_edge(num2, num1, type=edge_type, prg=prg, desc=rev_short_desc, full_desc="REV:" + desc, reverse=True, n_type=n_type[::-1]) if re.match(r"^.*\.gz$", options.output_graph): f = gzip.open(options.output_graph, "w") else: f = open(options.output_graph, "w") json_graph.dump(g, f, indent=2) f.close()
def save_to_jsonfile(graph, filename): g=graph g_json=json_graph.node_link_data(g) json_graph.dump(g_json, open(filename,'w'))