示例#1
0
def construct_traversals(root, node, visited, path):
    recurse = lambda neighbor: (
        neighbor
        # no backtracking
        and neighbor not in visited
        and neighbor != node
        # no traveling THROUGH terminal nodes
        and (path[-1] not in terminal_nodes
             if path else neighbor.label not in terminal_nodes)
        and (not path[-1].startswith('_related')
             if path else not neighbor.label.startswith('_related')))

    for edge in Edge._get_edges_with_src(node.__name__):
        neighbor = [n for n in Node.get_subclasses()
                    if n.__name__ == edge.__dst_class__][0]
        if recurse(neighbor):
            construct_traversals(
                root, neighbor, visited+[node], path+[edge.__src_dst_assoc__])

    for edge in Edge._get_edges_with_dst(node.__name__):
        neighbor = [n for n in Node.get_subclasses()
                    if n.__name__ == edge.__src_class__][0]
        if recurse(neighbor):
            construct_traversals(
                root, neighbor, visited+[node], path+[edge.__dst_src_assoc__])

    traversals[root][node.label] = traversals[root].get(node.label) or set()
    traversals[root][node.label].add('.'.join(path))
def grant_graph_permissions(engine, roles, grant_users):
    for grant_user in grant_users:
        for cls in Node.get_subclasses() + Edge.get_subclasses():
            stmt = "GRANT {roles} ON TABLE {table} TO {user};".format(
                roles=roles, table=cls.__tablename__, user=grant_user)
            print stmt.strip()
            engine.execute(text("BEGIN;" + stmt + "COMMIT;"))
def execute_for_all_graph_tables(engine, sql, *args, **kwargs):
    """Execute a SQL statment that has a python format variable {table}
    to be replaced with the tablename for all Node and Edge tables

    """
    for cls in Node.__subclasses__() + Edge.__subclasses__():
        _kwargs = dict(kwargs, **{'table': cls.__tablename__})
        statement = sql.format(**_kwargs)
        execute(engine, statement)
    def tearDownClass(cls):
        """Recreate the database for tests that follow.

        """
        cls.create_all_tables()

        # Re-grant permissions to test user
        for scls in Node.__subclasses__() + Edge.__subclasses__():
            statment = ("GRANT ALL PRIVILEGES ON TABLE {} TO test"
                        .format(scls.__tablename__))
            cls.engine.execute('BEGIN; %s; COMMIT;' % statment)
示例#5
0
 def _clear_tables(self):
     conn = g.engine.connect()
     conn.execute('commit')
     for table in Node().get_subclass_table_names():
         if table != Node.__tablename__:
             conn.execute('delete from {}'.format(table))
     for table in Edge.get_subclass_table_names():
         if table != Edge.__tablename__:
             conn.execute('delete from {}'.format(table))
     conn.execute('delete from _voided_nodes')
     conn.execute('delete from _voided_edges')
     conn.close()
示例#6
0
 def _clear_tables(self):
     conn = g.engine.connect()
     conn.execute('commit')
     for table in Node().get_subclass_table_names():
         if table != Node.__tablename__:
             conn.execute('delete from {}'.format(table))
     for table in Edge.get_subclass_table_names():
         if table != Edge.__tablename__:
             conn.execute('delete from {}'.format(table))
     conn.execute('delete from _voided_nodes')
     conn.execute('delete from _voided_edges')
     conn.close()
示例#7
0
def construct_traversals_from_node(root_node, app):
    traversals = {node.label: set() for node in Node.get_subclasses()}
    to_visit = [(root_node, [], [])]
    path = []
    while to_visit:
        node, path, visited = to_visit.pop()
        if path:
            path_string = '.'.join(path)
            if path_string in traversals[node.label]:
                continue
            traversals[node.label].add(path_string)
            # stop at terminal nodes
            if path[-1] in terminal_nodes:
                continue
        # Don't walk back up the tree
        if not is_valid_direction(node, visited or [root_node]):
            continue
        name_to_subclass = getattr(app, 'name_to_subclass', None)
        if name_to_subclass is None:
            name_to_subclass = app.name_to_subclass = {
                n.__name__: n
                for n in Node.get_subclasses()
            }
        neighbors_dst = {(name_to_subclass[edge.__dst_class__],
                          edge.__src_dst_assoc__)
                         for edge in Edge._get_edges_with_src(node.__name__)
                         if name_to_subclass[edge.__dst_class__]}
        neighbors_src = {(name_to_subclass[edge.__src_class__],
                          edge.__dst_src_assoc__)
                         for edge in Edge._get_edges_with_dst(node.__name__)
                         if name_to_subclass[edge.__src_class__]}
        to_visit.extend([
            (neighbor, path + [edge], visited + [node])
            for neighbor, edge in neighbors_dst.union(neighbors_src)
            if neighbor not in visited
        ])
    return {
        label: list(paths)
        for label, paths in traversals.iteritems() if paths
    }
示例#8
0
    def export_to_csv(self, data_dir, silent=False):
        node_ids = dict()
        if not silent:
            i = 0
            node_count = self.psqlgraphDriver.nodes().not_sysan({'to_delete': True}).count()
            print("Exporting {n} nodes:".format(n=node_count))
            if node_count != 0:
                pbar = self.start_pbar(node_count)

        edge_file = open(os.path.join(data_dir, 'rels.csv'), 'w')
        print('start\tend\ttype\t', file=edge_file)
        self.create_node_files(data_dir)
        batch_size = 1000
        id_count = 0
        for node_type in Node.get_subclasses():
            nodes = self.psqlgraphDriver.nodes(node_type).not_sysan({'to_delete': True}).yield_per(batch_size)
            for node in nodes:
                self.convert_node(node)
                self.node_to_csv(str(id_count), node)
                node_ids[node.node_id] = id_count
                id_count += 1

                if not silent and node_count != 0:
                    i = self.update_pbar(pbar, i)

            if not silent and node_count != 0:
                self.update_pbar(pbar, node_count)

        self.close_files()
        if not silent:
            i = 0
            edge_count = self.psqlgraphDriver.get_edge_count()
            print("Exporting {n} edges:".format(n=edge_count))
            if edge_count != 0:
                pbar = self.start_pbar(node_count)

        for edge_type in Edge.get_subclasses():
            edges = self.psqlgraphDriver.edges(edge_type).yield_per(batch_size)
            for edge in edges:
                src = node_ids.get(edge.src_id, '')
                dst = node_ids.get(edge.dst_id, '')
                if src != '' and dst != '':
                    edge_file.write(str(src)+'\t'+str(dst)+'\t'+edge.label+'\n')
                if not silent and edge_count != 0:
                    i = self.update_pbar(pbar, i)

        edge_file.close()
        if not silent and edge_count != 0:
            self.update_pbar(pbar, edge_count)
示例#9
0
    def _run(connection):
        create_all(connection)

        # migrate indexes
        exist_index_uniqueness = dict(
            iter(
                connection.execute("SELECT i.relname, ix.indisunique "
                                   "FROM pg_class i, pg_index ix "
                                   "WHERE i.oid = ix.indexrelid")))
        for cls in Node.__subclasses__() + Edge.__subclasses__():
            for index in cls.__table__.indexes:
                uniq = exist_index_uniqueness.get(index.name, None)
                if uniq is None:
                    # create the missing index
                    index.create(connection)
                elif index.unique != uniq:
                    # recreate indexes whose uniqueness changed
                    index.drop(connection)
                    index.create(connection)
示例#10
0
 def import_metadata(self):
     with self.driver.session_scope():
         if not self.validate_metadata():
             return
         
         doc = self.signpost.create()
         doc.urls=[self.metadata['url']]
         doc.identifiers = {
             'ark':self.search_identifier('ark:/31807/osdc-'+doc.did.split('-')[0])
         }
         doc.patch()
         properties = self.metadata.copy()
         del properties['url']
         del properties['keywords']
         
         node = Node(node_id=doc.did,label='dataset',properties=properties)
         self.driver.node_merge(node=node)
         keyword_nodes = self.import_keywords()
         for keyword in keyword_nodes:
             self.driver.edge_insert(Edge(node.node_id,keyword.node_id,'member_of'))
         print 'metadata %s created' % doc.did
示例#11
0
def create_indexes(host, user, password, database):
    print('Creating indexes')
    engine = create_engine("postgres://{user}:{pwd}@{host}/{db}".format(
        user=user, host=host, pwd=password, db=database))
    index = lambda t, c: ["CREATE INDEX ON {} ({})".format(t, x) for x in c]
    for scls in Node.get_subclasses():
        tablename = scls.__tablename__
        map(engine.execute, index(tablename, [
            'node_id',
        ]))
        map(engine.execute, [
            "CREATE INDEX ON {} USING gin (_sysan)".format(tablename),
            "CREATE INDEX ON {} USING gin (_props)".format(tablename),
            "CREATE INDEX ON {} USING gin (_sysan, _props)".format(tablename),
        ])
    for scls in Edge.get_subclasses():
        map(
            engine.execute,
            index(scls.__tablename__, [
                'src_id',
                'dst_id',
                'dst_id, src_id',
            ]))
def create_indexes(host, user, password, database):
    print('Creating indexes')
    engine = create_engine("postgres://{user}:{pwd}@{host}/{db}".format(
        user=user, host=host, pwd=password, db=database))
    index = lambda t, c: ["CREATE INDEX ON {} ({})".format(t, x) for x in c]
    for scls in Node.get_subclasses():
        tablename = scls.__tablename__
        map(engine.execute, index(
            tablename, [
                'node_id',
            ]))
        map(engine.execute, [
            "CREATE INDEX ON {} USING gin (_sysan)".format(tablename),
            "CREATE INDEX ON {} USING gin (_props)".format(tablename),
            "CREATE INDEX ON {} USING gin (_sysan, _props)".format(tablename),
        ])
    for scls in Edge.get_subclasses():
        map(engine.execute, index(
            scls.__tablename__, [
                'src_id',
                'dst_id',
                'dst_id, src_id',
            ]))
示例#13
0
def create_indexes(host, port, user, password, database, use_ssl=False):
    print("Creating indexes")

    # added for Postgresql SSL
    connect_args = {}
    if use_ssl:
        connect_args["sslmode"] = "require"

    engine = create_engine(
        _get_connection_string(user=user,
                               password=password,
                               host=host,
                               port=port,
                               database=database),
        connect_args=connect_args,
    )
    index = lambda t, c: ["CREATE INDEX ON {} ({})".format(t, x) for x in c]
    for scls in Node.get_subclasses():
        tablename = scls.__tablename__
        list(map(engine.execute, index(tablename, ["node_id"])))
        list(
            map(
                engine.execute,
                [
                    "CREATE INDEX ON {} USING gin (_sysan)".format(tablename),
                    "CREATE INDEX ON {} USING gin (_props)".format(tablename),
                    "CREATE INDEX ON {} USING gin (_sysan, _props)".format(
                        tablename),
                ],
            ))
    for scls in Edge.get_subclasses():
        list(
            map(
                engine.execute,
                index(scls.__tablename__,
                      ["src_id", "dst_id", "dst_id, src_id"]),
            ))
示例#14
0
#!/usr/bin/env python

from psqlgraph import Node, Edge
from gdcdatamodel import models as md

CACHE_EDGES = {
    Node.get_subclass_named(edge.__src_class__): edge
    for edge in Edge.get_subclasses() if 'RelatesToCase' in edge.__name__
}

LEVEL_1_SQL = """

INSERT INTO {cache_edge_table} (src_id, dst_id, _props, _sysan, acl)
SELECT {cls_table}.node_id, node_case.node_id,
       '{{}}'::jsonb, '{{}}'::jsonb, '{{}}'::text[]
    FROM {cls_table}

    -- Step directly to case
    JOIN {cls_to_case_edge_table}
         ON {cls_table}.node_id = {cls_to_case_edge_table}.src_id
    JOIN node_case
         ON node_case.node_id = {cls_to_case_edge_table}.dst_id

    -- Append only, e.g. insert only those missing
    WHERE NOT EXISTS (
          SELECT 1 FROM {cache_edge_table}
          WHERE {cls_table}.node_id = {cache_edge_table}.src_id
          AND   node_case.node_id   = {cache_edge_table}.dst_id)
"""

APPEND_CACHE_FROM_PARENT_SQL = """
示例#15
0
    related_cases_from_cache,
    related_cases_from_parents,
)

logger = get_logger('gdcdatamodel')

# These are properties that are defined outside of the JSONB column in
# the database, inform later code to skip these
excluded_props = ['id', 'type']


# At module load time, evaluate which classes have already been
# registered as subclasses of the abstract bases Node and Edge to
# prevent double-registering
loaded_nodes = [c.__name__ for c in Node.get_subclasses()]
loaded_edges = [c.__name__ for c in Edge.get_subclasses()]


def remove_spaces(s):
    """Returns a stripped string with all of the spaces removed.

    :param str s: String to remove spaces from

    """
    return s.replace(' ', '')


def register_class(cls):
    """Register a class in `globals`.  This allows us to import the ORM
    classes from :mod:`gdcdatamodel.models`
示例#16
0
    cache_related_cases_on_delete,
    related_cases_from_cache,
    related_cases_from_parents,
)

logger = get_logger('gdcdatamodel')

# These are properties that are defined outside of the JSONB column in
# the database, inform later code to skip these
excluded_props = ['id', 'type']

# At module load time, evaluate which classes have already been
# registered as subclasses of the abstract bases Node and Edge to
# prevent double-registering
loaded_nodes = [c.__name__ for c in Node.get_subclasses()]
loaded_edges = [c.__name__ for c in Edge.get_subclasses()]


def remove_spaces(s):
    """Returns a stripped string with all of the spaces removed.

    :param str s: String to remove spaces from

    """
    return s.replace(' ', '')


def register_class(cls):
    """Register a class in `globals`.  This allows us to import the ORM
    classes from :mod:`gdcdatamodel.models`