def pull(self): query = CypherQuery(self.graph, "START a=node({a}) RETURN a") results = query.execute(a=self._id) node, = results[0].values super(Node, self).properties.clear() super(Node, self).properties.update(node.properties) self._Node__stale.clear()
def export_nodes_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","): """Export general nodes to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 5000 with open(out_file, "wb") as fil: dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab") # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";" query = CypherQuery(self, query_str_with_limits) for item in query.stream(): for anode in item: anode = item.values[0] try: d = copy.copy(initial_dict) anode["permalink"] = self.get_permalink(anode) d["nodes"] = anode["permalink"] d["id"] = anode._id nd = dict() for key, val in anode.get_properties().iteritems(): field_set.add(key) nd[key] = val d.update(nd) dw.writerow(d) n_exported += 1 if (n_exported % 1000) == 0: print "Nodes exported ", n_exported except UnicodeEncodeError as uee: n_errors += 1 print "Unicode Error Inside on Nodes", uee.args except ValueError as ve: n_errors += 1 print "Value Error Inside on Nodes", ve.args print "\nExport of {} {} nodes complete. There were {} errors.".format(n_exported, type, n_errors) print " Unexported fields: {}".format(field_set - header_set)
def get_movie(title): query = CypherQuery(graph, "MATCH (movie:Movie {title:{title}}) " "OPTIONAL MATCH (movie)<-[r]-(person:Person) " "RETURN movie.title as title," "collect([person.name, head(split(lower(type(r)),'_')), r.roles]) as cast " "LIMIT 1") results = query.execute(title=title) row = results.data[0] return {"title": row["title"], "cast": [dict(zip(("name", "job", "role"), member)) for member in row["cast"]]}
def cypher_prop(self): # noinspection PyUnresolvedReferences from py2neo.neo4j import CypherQuery cypher_ns = SimpleNamespace() cypher_ns.stream = six.create_bound_method( lambda s, q, **ps: CypherQuery(s, q).stream(**ps), self.graph_db) cypher_ns.execute = six.create_bound_method( lambda s, q, **ps: CypherQuery(s, q).execute(**ps), self.graph_db) return cypher_ns
def get_search(): try: q = request.query["q"] except KeyError: return [] else: query = CypherQuery(graph, "MATCH (movie:Movie) " "WHERE movie.title =~ {title} " "RETURN movie") results = query.execute(title="(?i).*" + q + ".*") response.content_type = "application/json" return json.dumps([{"movie": row["movie"].get_cached_properties()} for row in results.data])
def unity(): results = CypherQuery( graph, 'MATCH (game)-[:USES_MECHANIC]->(mechanic) WHERE mechanic.mechanic = "Trick-taking" RETURN game.bgg_name order by game.bgg_name' ).execute() return render_template("games.html", games=results)
def export_funded_relationships_to_csv( self, out_file_name='funded_relations.tab', limit=9999999): """Export edges to csv file to be read in with Gephi. Abstraction layer for export_relationships_to_csv.""" rel_type = 'funded' query_str = 'match (a)-[r:' + rel_type + ']->(b) ' + ' return a.permalink as source, r, b.permalink as target, id(r) as id' initial_dict = {'label': rel_type, 'source_id': ''} funded_fields = [ u'source', u'target', u'type', u'source_id', u'id', u'label', u'name', u'category_code', u'crunchbase_url', u'round_code', u'raised_amount', u'permalink', u'source_url', u'raised_currency_code', u'funded_year' ] # Source descriptions has lots of non-standard characters--u'source_description', # Probably don't need: u'funded_month', u'funded_day' result = CypherQuery( self, 'match ()-[r:' + rel_type + ']->() ' + ' return count(r);').execute() count, = result.data[0].values count = min(count, limit) print 'Count rels', count self.export_relations_to_csv('funded', query_str, count, out_file_name, funded_fields, initial_dict, sep='\n')
def export_company_node_to_csv(self, out_file_name='company_nodes.tab', limit=9999999): """Export company nodes to csv file to be read in with Gephi. Abstraction layer for export_nodes_to_csv.""" node_type = 'company' query_str = 'match (n:' + node_type + ') ' + ' return n ' initial_dict = {'label': node_type} company_fields = [ u'nodes', u'id', u'label', u'name', u'category_code', u'crunchbase_url', u'description', u'number_of_employees', u'alias_list', u'deadpooled_year', u'total_money_raised', u'error', u'founded_year' ] # Probably don't need: u'deadpooled_month',u'created_at', u'updated_at', u'founded_day', # u'deadpooled_day', u'deadpooled_url', u'twitter_username', u'homepage_url', # u'blog_url', u'blog_feed_url', u'founded_month', u'email_address', result = CypherQuery( self, 'match (n:' + node_type + ') ' + ' return count(n);').execute() count, = result.data[0].values count = min(count, limit) print '\nBeginning export of {} company nodes'.format(count) self.export_nodes_to_csv('company', query_str, count, out_file_name, company_fields, initial_dict, sep='\n')
def export_financial_nodes_to_csv(self, out_file_name='financial_nodes.tab', limit=9999999): """Export financial institution nodes to csv file to be read in with Gephi. Abstraction layer for export_nodes_to_csv.""" node_type = 'funder' query_str = 'match (n:' + node_type + ') ' + ' return n ' initial_dict = {'label': node_type} funder_fields = [ u'nodes', u'id', u'label', u'name', u'permalink', u'crunchbase_url', u'homepage_url', u'description', u'overview', u'twitter_username', u'founded_year', u'alias_list', u'tag_list', u'deadpooled_month', u'deadpooled_year', u'total_money_raised', u'error' ] # Don't need: u'blog_url', u'blog_feed_url', u'phone_number', u'email_address', u'founded_month', # u'created_at', u'updated_at', u'founded_day', u'deadpooled_day', u'deadpooled_url', result = CypherQuery( self, 'match (n:' + node_type + ') ' + ' return count(n);').execute() count, = result.data[0].values count = min(count, limit) print '\nBeginning export of {} financial-institution nodes'.format( count) self.export_nodes_to_csv(node_type, query_str, count, out_file_name, funder_fields, initial_dict, sep='\n')
def export_person_nodes_to_csv(self, out_file_name='person_nodes.tab', limit=9999999): """Export person nodes to csv file to be read in with Gephi. Abstraction layer for export_nodes_to_csv.""" node_type = 'person' query_str = 'match (n:' + node_type + ') ' + ' return n' initial_dict = {'label': node_type} person_fields = [ u'nodes', u'id', u'label', u'first_name', u'last_name', u'affiliation_name', u'alias_list', u'crunchbase_url', u'born_year' ] # Probably don't need: u'created_at', u'updated_at', u'twitter_username', u'blog_feed_url', # u'blog_url', u'born_month', u'homepage_url', u'born_day', result = CypherQuery( self, 'match (n:' + node_type + ') ' + ' return count(n);').execute() count, = result.data[0].values count = min(count, limit) print '\nBeginning export of {} person nodes'.format(count) self.export_nodes_to_csv(node_type, query_str, count, out_file_name, person_fields, initial_dict, sep='\n')
def execute(graph, query, params=None, row_handler=None, metadata_handler=None, error_handler=None): query = CypherQuery(graph, query) data, metadata = [], None try: results = query.execute(**params or {}) except CypherError as err: if error_handler: error_handler(err.message, err.exception, err.stack_trace) else: raise else: metadata = Metadata(results.columns) if metadata_handler: metadata_handler(metadata) if row_handler: for record in results: row_handler(list(record)) return data, metadata else: return [list(record) for record in results], metadata
def get_graph(): query = CypherQuery(graph, "MATCH (m:Movie)<-[:ACTED_IN]-(a:Person) " "RETURN m.title as movie, collect(a.name) as cast " "LIMIT {limit}") results = query.execute(limit=request.query.get("limit", 100)) nodes = [] rels = [] i = 0 for movie, cast in results.data: nodes.append({"title": movie, "label": "movie"}) target = i i += 1 for name in cast: actor = {"title": name, "label": "actor"} try: source = nodes.index(actor) except ValueError: nodes.append(actor) source = i i += 1 rels.append({"source": source, "target": target}) return {"nodes": nodes, "links": rels}
def export_nodes_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=','): """Export general nodes to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 5000 with open(out_file, 'wb') as fil: dw = csv.DictWriter(fil, fields, extrasaction='ignore', dialect='excel-tab') # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + ' skip ' + str( first) + ' limit ' + str(query_size) + ';' query = CypherQuery(self, query_str_with_limits) for item in query.stream(): for anode in item: anode = item.values[0] try: d = copy.copy(initial_dict) anode['permalink'] = self.get_permalink(anode) d['nodes'] = anode['permalink'] d['id'] = anode._id nd = dict() for key, val in anode.get_properties().iteritems(): field_set.add(key) nd[key] = val d.update(nd) dw.writerow(d) n_exported += 1 if (n_exported % 1000) == 0: print 'Nodes exported ', n_exported except UnicodeEncodeError as uee: n_errors += 1 print 'Unicode Error Inside on Nodes', uee.args except ValueError as ve: n_errors += 1 print 'Value Error Inside on Nodes', ve.args print '\nExport of {} {} nodes complete. There were {} errors.'.format( n_exported, type, n_errors) print ' Unexported fields: {}'.format(field_set - header_set)
def export_relations_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=','): """Export general relationships to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 2000 with open(out_file, 'wb') as fil: dw = csv.DictWriter(fil, fields, extrasaction='ignore', dialect='excel-tab') # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + ' skip ' + str( first) + ' limit ' + str(query_size) + ';' query = CypherQuery(self, query_str_with_limits) for relationship in query.stream(): #print 'try to pull out rels parts', len(relationship) rel_parts = relationship.values[1] d = initial_dict d['label'] = rel_parts.type d['type'] = rel_parts.type d['source'] = self.encode_chars(rel_parts.start_node) d['target'] = self.encode_chars(rel_parts.end_node) d['permalink'] = rel_parts.start_node[ 'permalink'] + '__' + rel_parts.end_node['permalink'] edge_properties = self.encode_chars( rel_parts.get_properties()) d['source']['overview'] = '' d['target']['overview'] = '' print 'Edge_props (cleaned)', edge_properties print ' Source props (cleaned)', d['permalink'] #print ' Source', d['source'] # TODO: unicode errors are in source or target node information try: d['id'] = d['permalink'] for key in d: field_set.add(key) d.update(edge_properties) dw.writerow(self.encode_chars(d)) n_exported += 1 if (n_exported % 1000) == 0: print 'Relationships exported: ', n_exported except UnicodeEncodeError as uee: n_errors += 1 print 'Unicode Error Inside in Export Relationships', uee.args except ValueError as err: n_errors += 1 print 'Unknown Error Inside in Export Relationships', err.args print '\nExport of {} {} relationships complete. There were {} errors.'.format( n_exported, type, n_errors) print ' Unexported fields: {}'.format(field_set - header_set)
def export_relations_to_csv(self, type, query_str, count, out_file, fields, initial_dict={}, sep=","): """Export general relationships to csv for import to Gephi. :param str type: person, funder, or company :param str query_str: cypher query string to return the nodes or relations :param str out_file: output file :param list fields: fields to write :param dict initial_dict: dict with any vars not in node :param str sep: separator to use in output file :rtype None: """ field_set = set() header_set = set() n_exported = 0 n_errors = 0 query_size = 2000 with open(out_file, "wb") as fil: dw = csv.DictWriter(fil, fields, extrasaction="ignore", dialect="excel-tab") # Generate and write the header line header = dict() for txt in fields: header[txt] = txt header_set.add(txt) dw.writerow(header) for first in xrange(0, count, query_size): query_str_with_limits = query_str + " skip " + str(first) + " limit " + str(query_size) + ";" query = CypherQuery(self, query_str_with_limits) for relationship in query.stream(): # print 'try to pull out rels parts', len(relationship) rel_parts = relationship.values[1] d = initial_dict d["label"] = rel_parts.type d["type"] = rel_parts.type d["source"] = self.encode_chars(rel_parts.start_node) d["target"] = self.encode_chars(rel_parts.end_node) d["permalink"] = rel_parts.start_node["permalink"] + "__" + rel_parts.end_node["permalink"] edge_properties = self.encode_chars(rel_parts.get_properties()) d["source"]["overview"] = "" d["target"]["overview"] = "" print "Edge_props (cleaned)", edge_properties print " Source props (cleaned)", d["permalink"] # print ' Source', d['source'] # TODO: unicode errors are in source or target node information try: d["id"] = d["permalink"] for key in d: field_set.add(key) d.update(edge_properties) dw.writerow(self.encode_chars(d)) n_exported += 1 if (n_exported % 1000) == 0: print "Relationships exported: ", n_exported except UnicodeEncodeError as uee: n_errors += 1 print "Unicode Error Inside in Export Relationships", uee.args except ValueError as err: n_errors += 1 print "Unknown Error Inside in Export Relationships", err.args print "\nExport of {} {} relationships complete. There were {} errors.".format(n_exported, type, n_errors) print " Unexported fields: {}".format(field_set - header_set)
def date_range(self, start_date=None, end_date=None): """ Fetch the calendar node representing the date range defined by `start_date` and `end_date`. If either are unspecified, this defines an open-ended range. Either `start_date` or `end_date` must be specified. """ # (CAL) # | # [:RANGE] # | # v # (START)<-[:START_DATE]-(RANGE)-[:END_DATE]->(END) range_ = GregorianCalendar.DateRange(start_date, end_date) start, end = range_.start_date, range_.end_date if start and end: # if start and end are equal, return the day node instead if (start.year, start.month, start.day) == (end.year, end.month, end.day): return start.get_node(self) if (start.year, start.month) == (end.year, end.month): root = self.month(start.year, start.month) elif start.year == end.year: root = self.year(start.year) else: root = self._calendar query = """\ START z=node({z}), s=node({s}), e=node({e}) CREATE UNIQUE (s)<-[:START_DATE]-(r {r})-[:END_DATE]->(e), (z)-[:DATE_RANGE]->(r {r}) RETURN r """ params = { "z": root._id, "s": start.get_node(self)._id, "e": end.get_node(self)._id, "r": { "start_date": str(start), "end_date": str(end), }, } elif start: query = """\ START z=node({z}), s=node({s}) CREATE UNIQUE (s)<-[:START_DATE]-(r {r}), (z)-[:DATE_RANGE]->(r {r}) RETURN r """ params = { "z": self._calendar._id, "s": start.get_node(self)._id, "r": { "start_date": str(start), }, } elif end: query = """\ START z=node({z}), e=node({e}) CREATE UNIQUE (r {r})-[:END_DATE]->(e), (z)-[:DATE_RANGE]->(r {r}) RETURN r """ params = { "z": self._calendar._id, "e": end.get_node(self)._id, "r": { "end_date": str(end), }, } else: raise ValueError("Either start or end date must be supplied " "for a date range") return CypherQuery(self._graph, query).execute_one(**params)
import json from py2neo.neo4j import CypherQuery, GraphDatabaseService, WriteBatch from py2neo import neo4j db = neo4j.GraphDatabaseService() business_index_query = CypherQuery(db, "CREATE INDEX ON :Business(id)") business_index_query.execute() category_index_query = CypherQuery(db, "CREATE INDEX ON :Category(name)") category_index_query.execute() create_business_query = ''' // MERGE ON categories CREATE (b:Business {id: {business_id}, name: {name}, lat:{latitude}, lon:{longitude}, stars: {stars}, review_count: {review_count}}) ''' merge_category_query = ''' MATCH (b:Business {id: {business_id}}) MERGE (c:Category {name: {category}}) CREATE UNIQUE (c)<-[:IS_IN]-(b) ''' print "Beginning business batch" with open('data/yelp_academic_dataset_business.json', 'r') as f: business_batch = WriteBatch(db) count = 0 for b in (json.loads(l) for l in f): business_batch.append_cypher(create_business_query, b) count += 1