def construct_traversals(root, node, visited, path): recurse = lambda neighbor: ( neighbor # no backtracking and neighbor not in visited and neighbor != node # no traveling THROUGH terminal nodes and (path[-1] not in terminal_nodes if path else neighbor.label not in terminal_nodes) and (not path[-1].startswith('_related') if path else not neighbor.label.startswith('_related'))) for edge in Edge._get_edges_with_src(node.__name__): neighbor = [n for n in Node.get_subclasses() if n.__name__ == edge.__dst_class__][0] if recurse(neighbor): construct_traversals( root, neighbor, visited+[node], path+[edge.__src_dst_assoc__]) for edge in Edge._get_edges_with_dst(node.__name__): neighbor = [n for n in Node.get_subclasses() if n.__name__ == edge.__src_class__][0] if recurse(neighbor): construct_traversals( root, neighbor, visited+[node], path+[edge.__dst_src_assoc__]) traversals[root][node.label] = traversals[root].get(node.label) or set() traversals[root][node.label].add('.'.join(path))
def grant_graph_permissions(engine, roles, grant_users): for grant_user in grant_users: for cls in Node.get_subclasses() + Edge.get_subclasses(): stmt = "GRANT {roles} ON TABLE {table} TO {user};".format( roles=roles, table=cls.__tablename__, user=grant_user) print stmt.strip() engine.execute(text("BEGIN;" + stmt + "COMMIT;"))
def execute_for_all_graph_tables(engine, sql, *args, **kwargs): """Execute a SQL statment that has a python format variable {table} to be replaced with the tablename for all Node and Edge tables """ for cls in Node.__subclasses__() + Edge.__subclasses__(): _kwargs = dict(kwargs, **{'table': cls.__tablename__}) statement = sql.format(**_kwargs) execute(engine, statement)
def tearDownClass(cls): """Recreate the database for tests that follow. """ cls.create_all_tables() # Re-grant permissions to test user for scls in Node.__subclasses__() + Edge.__subclasses__(): statment = ("GRANT ALL PRIVILEGES ON TABLE {} TO test" .format(scls.__tablename__)) cls.engine.execute('BEGIN; %s; COMMIT;' % statment)
def _clear_tables(self): conn = g.engine.connect() conn.execute('commit') for table in Node().get_subclass_table_names(): if table != Node.__tablename__: conn.execute('delete from {}'.format(table)) for table in Edge.get_subclass_table_names(): if table != Edge.__tablename__: conn.execute('delete from {}'.format(table)) conn.execute('delete from _voided_nodes') conn.execute('delete from _voided_edges') conn.close()
def construct_traversals_from_node(root_node, app): traversals = {node.label: set() for node in Node.get_subclasses()} to_visit = [(root_node, [], [])] path = [] while to_visit: node, path, visited = to_visit.pop() if path: path_string = '.'.join(path) if path_string in traversals[node.label]: continue traversals[node.label].add(path_string) # stop at terminal nodes if path[-1] in terminal_nodes: continue # Don't walk back up the tree if not is_valid_direction(node, visited or [root_node]): continue name_to_subclass = getattr(app, 'name_to_subclass', None) if name_to_subclass is None: name_to_subclass = app.name_to_subclass = { n.__name__: n for n in Node.get_subclasses() } neighbors_dst = {(name_to_subclass[edge.__dst_class__], edge.__src_dst_assoc__) for edge in Edge._get_edges_with_src(node.__name__) if name_to_subclass[edge.__dst_class__]} neighbors_src = {(name_to_subclass[edge.__src_class__], edge.__dst_src_assoc__) for edge in Edge._get_edges_with_dst(node.__name__) if name_to_subclass[edge.__src_class__]} to_visit.extend([ (neighbor, path + [edge], visited + [node]) for neighbor, edge in neighbors_dst.union(neighbors_src) if neighbor not in visited ]) return { label: list(paths) for label, paths in traversals.iteritems() if paths }
def export_to_csv(self, data_dir, silent=False): node_ids = dict() if not silent: i = 0 node_count = self.psqlgraphDriver.nodes().not_sysan({'to_delete': True}).count() print("Exporting {n} nodes:".format(n=node_count)) if node_count != 0: pbar = self.start_pbar(node_count) edge_file = open(os.path.join(data_dir, 'rels.csv'), 'w') print('start\tend\ttype\t', file=edge_file) self.create_node_files(data_dir) batch_size = 1000 id_count = 0 for node_type in Node.get_subclasses(): nodes = self.psqlgraphDriver.nodes(node_type).not_sysan({'to_delete': True}).yield_per(batch_size) for node in nodes: self.convert_node(node) self.node_to_csv(str(id_count), node) node_ids[node.node_id] = id_count id_count += 1 if not silent and node_count != 0: i = self.update_pbar(pbar, i) if not silent and node_count != 0: self.update_pbar(pbar, node_count) self.close_files() if not silent: i = 0 edge_count = self.psqlgraphDriver.get_edge_count() print("Exporting {n} edges:".format(n=edge_count)) if edge_count != 0: pbar = self.start_pbar(node_count) for edge_type in Edge.get_subclasses(): edges = self.psqlgraphDriver.edges(edge_type).yield_per(batch_size) for edge in edges: src = node_ids.get(edge.src_id, '') dst = node_ids.get(edge.dst_id, '') if src != '' and dst != '': edge_file.write(str(src)+'\t'+str(dst)+'\t'+edge.label+'\n') if not silent and edge_count != 0: i = self.update_pbar(pbar, i) edge_file.close() if not silent and edge_count != 0: self.update_pbar(pbar, edge_count)
def _run(connection): create_all(connection) # migrate indexes exist_index_uniqueness = dict( iter( connection.execute("SELECT i.relname, ix.indisunique " "FROM pg_class i, pg_index ix " "WHERE i.oid = ix.indexrelid"))) for cls in Node.__subclasses__() + Edge.__subclasses__(): for index in cls.__table__.indexes: uniq = exist_index_uniqueness.get(index.name, None) if uniq is None: # create the missing index index.create(connection) elif index.unique != uniq: # recreate indexes whose uniqueness changed index.drop(connection) index.create(connection)
def import_metadata(self): with self.driver.session_scope(): if not self.validate_metadata(): return doc = self.signpost.create() doc.urls=[self.metadata['url']] doc.identifiers = { 'ark':self.search_identifier('ark:/31807/osdc-'+doc.did.split('-')[0]) } doc.patch() properties = self.metadata.copy() del properties['url'] del properties['keywords'] node = Node(node_id=doc.did,label='dataset',properties=properties) self.driver.node_merge(node=node) keyword_nodes = self.import_keywords() for keyword in keyword_nodes: self.driver.edge_insert(Edge(node.node_id,keyword.node_id,'member_of')) print 'metadata %s created' % doc.did
def create_indexes(host, user, password, database): print('Creating indexes') engine = create_engine("postgres://{user}:{pwd}@{host}/{db}".format( user=user, host=host, pwd=password, db=database)) index = lambda t, c: ["CREATE INDEX ON {} ({})".format(t, x) for x in c] for scls in Node.get_subclasses(): tablename = scls.__tablename__ map(engine.execute, index(tablename, [ 'node_id', ])) map(engine.execute, [ "CREATE INDEX ON {} USING gin (_sysan)".format(tablename), "CREATE INDEX ON {} USING gin (_props)".format(tablename), "CREATE INDEX ON {} USING gin (_sysan, _props)".format(tablename), ]) for scls in Edge.get_subclasses(): map( engine.execute, index(scls.__tablename__, [ 'src_id', 'dst_id', 'dst_id, src_id', ]))
def create_indexes(host, user, password, database): print('Creating indexes') engine = create_engine("postgres://{user}:{pwd}@{host}/{db}".format( user=user, host=host, pwd=password, db=database)) index = lambda t, c: ["CREATE INDEX ON {} ({})".format(t, x) for x in c] for scls in Node.get_subclasses(): tablename = scls.__tablename__ map(engine.execute, index( tablename, [ 'node_id', ])) map(engine.execute, [ "CREATE INDEX ON {} USING gin (_sysan)".format(tablename), "CREATE INDEX ON {} USING gin (_props)".format(tablename), "CREATE INDEX ON {} USING gin (_sysan, _props)".format(tablename), ]) for scls in Edge.get_subclasses(): map(engine.execute, index( scls.__tablename__, [ 'src_id', 'dst_id', 'dst_id, src_id', ]))
def create_indexes(host, port, user, password, database, use_ssl=False): print("Creating indexes") # added for Postgresql SSL connect_args = {} if use_ssl: connect_args["sslmode"] = "require" engine = create_engine( _get_connection_string(user=user, password=password, host=host, port=port, database=database), connect_args=connect_args, ) index = lambda t, c: ["CREATE INDEX ON {} ({})".format(t, x) for x in c] for scls in Node.get_subclasses(): tablename = scls.__tablename__ list(map(engine.execute, index(tablename, ["node_id"]))) list( map( engine.execute, [ "CREATE INDEX ON {} USING gin (_sysan)".format(tablename), "CREATE INDEX ON {} USING gin (_props)".format(tablename), "CREATE INDEX ON {} USING gin (_sysan, _props)".format( tablename), ], )) for scls in Edge.get_subclasses(): list( map( engine.execute, index(scls.__tablename__, ["src_id", "dst_id", "dst_id, src_id"]), ))
#!/usr/bin/env python from psqlgraph import Node, Edge from gdcdatamodel import models as md CACHE_EDGES = { Node.get_subclass_named(edge.__src_class__): edge for edge in Edge.get_subclasses() if 'RelatesToCase' in edge.__name__ } LEVEL_1_SQL = """ INSERT INTO {cache_edge_table} (src_id, dst_id, _props, _sysan, acl) SELECT {cls_table}.node_id, node_case.node_id, '{{}}'::jsonb, '{{}}'::jsonb, '{{}}'::text[] FROM {cls_table} -- Step directly to case JOIN {cls_to_case_edge_table} ON {cls_table}.node_id = {cls_to_case_edge_table}.src_id JOIN node_case ON node_case.node_id = {cls_to_case_edge_table}.dst_id -- Append only, e.g. insert only those missing WHERE NOT EXISTS ( SELECT 1 FROM {cache_edge_table} WHERE {cls_table}.node_id = {cache_edge_table}.src_id AND node_case.node_id = {cache_edge_table}.dst_id) """ APPEND_CACHE_FROM_PARENT_SQL = """
related_cases_from_cache, related_cases_from_parents, ) logger = get_logger('gdcdatamodel') # These are properties that are defined outside of the JSONB column in # the database, inform later code to skip these excluded_props = ['id', 'type'] # At module load time, evaluate which classes have already been # registered as subclasses of the abstract bases Node and Edge to # prevent double-registering loaded_nodes = [c.__name__ for c in Node.get_subclasses()] loaded_edges = [c.__name__ for c in Edge.get_subclasses()] def remove_spaces(s): """Returns a stripped string with all of the spaces removed. :param str s: String to remove spaces from """ return s.replace(' ', '') def register_class(cls): """Register a class in `globals`. This allows us to import the ORM classes from :mod:`gdcdatamodel.models`
cache_related_cases_on_delete, related_cases_from_cache, related_cases_from_parents, ) logger = get_logger('gdcdatamodel') # These are properties that are defined outside of the JSONB column in # the database, inform later code to skip these excluded_props = ['id', 'type'] # At module load time, evaluate which classes have already been # registered as subclasses of the abstract bases Node and Edge to # prevent double-registering loaded_nodes = [c.__name__ for c in Node.get_subclasses()] loaded_edges = [c.__name__ for c in Edge.get_subclasses()] def remove_spaces(s): """Returns a stripped string with all of the spaces removed. :param str s: String to remove spaces from """ return s.replace(' ', '') def register_class(cls): """Register a class in `globals`. This allows us to import the ORM classes from :mod:`gdcdatamodel.models`