示例#1
0
    def __init__(self, graph_name, cog_home="cog_home", cog_path_prefix=None):
        '''
        :param graph_name:
        :param cog_home: Home directory name, for most use cases use default.
        :param cog_path_prefix: sets the root directory location for Cog db. Default: '/tmp' set in cog.Config. Change this to current directory when running in an IPython environment.
        '''

        self.config = cfg
        self.config.COG_HOME = cog_home

        if cog_path_prefix:
            self.config.COG_PATH_PREFIX = cog_path_prefix

        self.graph_name = graph_name
        self.cache = {}
        dictConfig(self.config.logging_config)
        self.logger = logging.getLogger("torque")

        self.logger.debug("Torque init on graph: " + graph_name + " predicates: ")

        self.cog = Cog(self.cache)
        self.cog.create_or_load_namespace(self.graph_name)

        self.all_predicates = self.cog.list_tables()
        self.views_dir = self.config.cog_views_dir()

        if not os.path.exists(self.views_dir):
            os.mkdir(self.views_dir)
        self.logger.debug("predicates: " + str(self.all_predicates))

        self.last_visited_vertices = None
示例#2
0
 def test_list_tables(self):
     cogdb = Cog(config=config)
     cogdb.create_namespace("test_ns")
     cogdb.create_or_load_table("table1", "test_ns")
     cogdb.create_or_load_table("table2", "test_ns")
     cogdb.create_or_load_table("table3", "test_ns")
     self.assertEquals(cogdb.list_tables(), ['table2', 'table3', 'table1'])
示例#3
0
文件: lib_test.py 项目: uhaz1/cog
 def test_db(self):
     data = ('testKey', 'testVal')
     cogdb = Cog("/tmp/" + DIR_NAME + "/test")
     cogdb.create_namespace("test")
     cogdb.create_table("db_test", "test")
     cogdb.put(data)
     self.assertEqual(cogdb.get("testKey"), ('0', ('testKey', 'testVal')))
示例#4
0
文件: db_test.py 项目: uhaz1/cog
 def test_db(self):
     data = ('user100', '{"firstname":"Hari","lastname":"seldon"}')
     cogdb = Cog(config=config)
     cogdb.create_namespace("test")
     cogdb.create_table("db_test", "test")
     cogdb.put(data)
     scanner = cogdb.scanner()
     for r in scanner:
         print r
示例#5
0
文件: torque.py 项目: uhaz1/cog
 def __init__(self, graph_name, cog_dir):
     '''
     :param graph_name:
     :param cog_dir:
     '''
     self.predicates = self.list_predicate_tables(cog_dir, graph_name)
     self.graph_name = graph_name
     self.cog_dir = cog_dir
     self.cogs = {}
     for predicate in self.predicates:
         cog = Cog(db_path=cog_dir)
         cog.use_table(predicate, graph_name)
         self.cogs[predicate] = cog
示例#6
0
 def __init__(self, graph_name, cog_dir):
     '''
     :param graph_name:
     :param cog_dir:
     list of
     '''
     self.config = cfg
     self.cog = Cog(db_path=cog_dir, config=cfg)
     self.graph_name = graph_name
     self.cog_dir = cog_dir
     self.all_predicates = self.cog.list_tables()
     self.last_visited_vertices = None
     self.cog.create_namespace(self.graph_name)
示例#7
0
文件: torque.py 项目: uhaz1/cog
class Loader:

    def __init__(self, db_path=None, config=cfg):
         self.cog = Cog(db_path=db_path, config=config)

    def load_triples(self, graph_data_path, graph_name):
         self.cog.create_namespace(graph_name)
         with open(graph_data_path) as f:
             for line in f:
                 tokens = line.split()
                 this_vertex = tokens[0].strip()
                 predicate = tokens[1].strip()
                 other_vertex = tokens[2].strip()
                 self.cog.create_table(predicate, graph_name) #it wont create if it exists.
                 put_node(self.cog,this_vertex, predicate, other_vertex)


    def load_edgelist(self, edgelist_file_path, graph_name, predicate="none"):
        self.cog.create_namespace(graph_name)
        with open(edgelist_file_path) as f:
            for line in f:
                tokens = line.split()
                v1 = tokens[0].strip()
                v2 = tokens[1].strip()
                self.cog.create_table(predicate, graph_name)
                put_node(self.cog, v1, predicate, v2)
示例#8
0
 def test_db(self):
     data = ('user100', '{"firstname":"Hari","lastname":"seldon"}')
     cogdb = Cog(config=config)
     cogdb.create_namespace("test")
     cogdb.create_or_load_table("db_test", "test")
     cogdb.put(data)
     scanner = cogdb.scanner()
     for r in scanner:
         res = r
     self.assertEqual(
         res, ('user100', '{"firstname":"Hari","lastname":"seldon"}'))
示例#9
0
文件: torque.py 项目: uhaz1/cog
 def put(self, vertex1, predicate, vertex2):
     cog = Cog(db_path=self.cog_dir)
     cog.create_namespace(self.graph_name)
     cog.use_table(predicate, self.graph_name)  # it wont create if it exists.
     put_node(cog, vertex1, predicate, vertex2)
     self.cogs[predicate] = cog
     return self
示例#10
0
文件: torque_test.py 项目: uhaz1/cog
    def test_aaa_before_all_tests(self):

        if not os.path.exists("/tmp/" + DIR_NAME):
            os.mkdir("/tmp/" + DIR_NAME)

        if os.path.exists("test-data/test.nq"):
            loader = Loader("/tmp/" + DIR_NAME)
            loader.load_triples("test-data/test.nq", "people")
        else:
            loader = Loader("/tmp/" + DIR_NAME)
            loader.load_triples("test/test-data/test.nq", "people")

        TorqueTest.cog = Cog("/tmp/" + DIR_NAME)
        TorqueTest.g = Graph(graph_name="people", cog_dir="/tmp/" + DIR_NAME)
示例#11
0
文件: test_db_2.py 项目: humaohai/cog
    def test_db(self):
        db_path = '/tmp/cogtestdb2'
        try:
            os.makedirs(db_path)
        except OSError:
            if not os.path.isdir(db_path):
                raise
        config.CUSTOM_COG_DB_PATH = db_path

        cogdb = Cog()

        # create a namespace
        cogdb.create_namespace("my_namespace")

        # create new table
        cogdb.create_table("new_db", "my_namespace")

        # put some data
        cogdb.put(Record('A', 'val'))
        cogdb.put(Record('B', 'val'))
        cogdb.put(Record('key3', 'val'))
        cogdb.put(Record('key3', 'val_updated'))

        self.assertEqual(cogdb.get('key3').value, 'val_updated')

        cogdb.close()
示例#12
0
class Graph:
    """
        https://www.w3.org/TR/WD-rdf-syntax-971002/
        https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md
    """
    def __init__(self, graph_name, cog_dir):
        '''
        :param graph_name:
        :param cog_dir:
        list of
        '''
        self.config = cfg
        self.cog = Cog(db_path=cog_dir, config=cfg)
        self.graph_name = graph_name
        self.cog_dir = cog_dir
        self.all_predicates = self.cog.list_tables()
        self.last_visited_vertices = None
        self.cog.create_namespace(self.graph_name)
        #self.cog.create_or_load_table(self.config.GRAPH_NODE_SET_TABLE_NAME, self.graph_name)

    def load_edgelist(self, edgelist_file_path, graph_name, predicate="none"):
        self.cog.load_edgelist(edgelist_file_path, graph_name, predicate)
        self.all_predicates = self.cog.list_tables()

    def load_triples(self, graph_data_path, graph_name):
        '''
        Loads a list of triples
        :param graph_data_path:
        :param graph_name:
        :return:
        '''
        self.cog.load_triples(graph_data_path, graph_name)
        self.all_predicates = self.cog.list_tables()

    def put(self, vertex1, predicate, vertex2):
        #self.cog.create_or_load_table(predicate, self.graph_name)
        self.cog.use_namespace(self.graph_name).use_table(predicate)
        self.cog.put_node(vertex1, predicate, vertex2)
        self.all_predicates = self.cog.list_tables()
        return self

    def list_predicate_tables(self, cog_dir, graph_name):
        p = set(())
        path = "/".join([cog_dir, graph_name])
        if not os.path.exists(path): return p
        files = [f for f in listdir(path) if isfile(join(path, f))]
        for f in files:
            p.add(f.split("-")[0])
        return p

    def v(self, vertex=None):
        #TODO: need to check if node exists
        if vertex:
            self.last_visited_vertices = [Vertex(vertex)]
        else:
            self.last_visited_vertices = []
            self.cog.use_namespace(self.graph_name).use_table(
                self.config.GRAPH_NODE_SET_TABLE_NAME)
            for r in self.cog.scanner():
                self.last_visited_vertices.append(Vertex(r))
        return self

    def out(self, predicates=None):
        '''
        List of string predicates
        :param predicates:
        :return:
        '''
        if predicates:
            assert type(predicates) == list
        self.__hop("out", predicates)
        return self

    def inc(self, predicates=None):
        self.__hop("in", predicates)
        return self

    def __hop(self, direction, predicates=None, tag=NOTAG):
        self.cog.use_namespace(self.graph_name)
        predicates = self.all_predicates if not predicates else predicates
        #print "hopping from vertices: " + str(map(lambda x : x.id, self.last_visited_vertices))
        #print "direction: " + str(direction) + " predicates: "+str(self.all_predicates)
        traverse_vertex = []
        for predicate in predicates:
            for v in self.last_visited_vertices:
                if direction == "out":
                    record = self.cog.use_table(predicate).get(out_nodes(v.id))
                else:
                    record = self.cog.use_table(predicate).get(in_nodes(v.id))
                #print "==? " + str(direction)+ " <> " + str(predicate) + " ::: " + str(v.id) + " ==> " + str(record)
                if record:
                    for v_adjacent in ast.literal_eval(record[1][1]):
                        v_adjacent_obj = Vertex(v_adjacent)
                        v_adjacent_obj.tags.update(v.tags)
                        traverse_vertex.append(v_adjacent_obj)
        self.last_visited_vertices = traverse_vertex

    def tag(self, tag_name):
        '''
        Saves nodes with a tag name and returned in the result set.
        Primarily used to capture nodes while navigating the graph.
        :param tag_name:
        :return:
        '''
        for v in self.last_visited_vertices:
            v.tags[tag_name] = v.id
        return self

    def count(self):
        return len(self.last_visited_vertices)

    def all(self):
        """
        returns all the nodes in the result.
        https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md
        :return:
        """
        result = []
        for v in self.last_visited_vertices:
            #print "all:: tag: " + v + " vertex:"+ str(self.last_visited_vertices[v])
            item = {"id": v.id}
            item.update(v.tags)
            result.append(item)
        return {"result": result}
示例#13
0
 def test_db(self):
     data = Record('testKey','testVal')
     cogdb = Cog()
     cogdb.create_namespace("test")
     cogdb.create_table("db_test", "test")
     cogdb.put(data)
     self.assertTrue(cogdb.get("testKey").is_equal_val(Record('testKey', 'testVal')))
     cogdb.close()
示例#14
0
 def test_list_tables(self):
     cogdb = Cog()
     cogdb.create_namespace("test_ns")
     cogdb.create_table("table1", "test_ns")
     cogdb.create_table("table2", "test_ns")
     cogdb.create_table("table3", "test_ns")
     self.assertEqual(set(cogdb.list_tables()), {'table2', 'table3', 'table1'})
     cogdb.close()
示例#15
0
 def test_db(self):
     data = Record('user100','{"firstname":"Hari","lastname":"seldon"}')
     cogdb = Cog()
     cogdb.create_namespace("test")
     cogdb.create_table("db_test", "test")
     cogdb.put(data)
     scanner = cogdb.scanner()
     res = None
     for r in scanner:
         res = r
     print(res)
     self.assertTrue(data.is_equal_val(res))
     cogdb.close()
示例#16
0
    def test_db(self):
        cogdb = Cog('/tmp/cogtestdb2')

        # create a namespace
        cogdb.create_namespace("my_namespace")

        # create new table
        cogdb.create_or_load_table("new_db", "my_namespace")

        # put some data
        cogdb.put(('A', 'val'))
        cogdb.put(('B', 'val'))
        cogdb.put(('key3', 'val'))
        cogdb.put(('key3', 'val_updated'))

        self.assertEqual(cogdb.get('key3')[1][1], 'val_updated')

        cogdb.close()
示例#17
0
class Graph:
    """
    Creates a graph object.
    """

    def __init__(self, graph_name, cog_home="cog_home", cog_path_prefix=None):
        '''
        :param graph_name:
        :param cog_home: Home directory name, for most use cases use default.
        :param cog_path_prefix: sets the root directory location for Cog db. Default: '/tmp' set in cog.Config. Change this to current directory when running in an IPython environment.
        '''

        self.config = cfg
        self.config.COG_HOME = cog_home

        if cog_path_prefix:
            self.config.COG_PATH_PREFIX = cog_path_prefix

        self.graph_name = graph_name
        self.cache = {}
        dictConfig(self.config.logging_config)
        self.logger = logging.getLogger("torque")

        self.logger.debug("Torque init on graph: " + graph_name + " predicates: ")

        self.cog = Cog(self.cache)
        self.cog.create_or_load_namespace(self.graph_name)

        self.all_predicates = self.cog.list_tables()
        self.views_dir = self.config.cog_views_dir()

        if not os.path.exists(self.views_dir):
            os.mkdir(self.views_dir)
        self.logger.debug("predicates: " + str(self.all_predicates))

        self.last_visited_vertices = None

    def refresh(self):
        self.cog.refresh_all()

    def load_triples(self, graph_data_path, graph_name=None):
        '''
        Loads a list of triples
        :param graph_data_path:
        :param graph_name:
        :return:
        '''

        graph_name = self.graph_name if graph_name is None else graph_name
        self.cog.load_triples(graph_data_path, graph_name)
        self.all_predicates = self.cog.list_tables()
        return None

    def load_csv(self, csv_path, id_column_name, graph_name=None):
        """
        Loads CSV to a graph. One column must be designated as ID column.
        :param csv_path:
        :param id_column_name:
        :param graph_name:
        :return:
        """
        if id_column_name is None:
            raise Exception("id_column_name must not be None")
        graph_name = self.graph_name if graph_name is None else graph_name
        self.cog.load_csv(csv_path, id_column_name, graph_name)
        self.all_predicates = self.cog.list_tables()

    def close(self):
        self.logger.info("closing graph: "+self.graph_name)
        self.cog.close()

    def put(self, vertex1, predicate, vertex2):
        self.cog.use_namespace(self.graph_name).use_table(predicate)
        self.cog.put_node(vertex1, predicate, vertex2)
        self.all_predicates = self.cog.list_tables()
        return self

    def v(self, vertex=None, func=None):
        if vertex is not None:
            if isinstance(vertex, list):
                self.last_visited_vertices = [Vertex(v) for v in vertex]
            else:
                self.last_visited_vertices = [Vertex(vertex)]
        else:
            self.last_visited_vertices = []
            self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_NODE_SET_TABLE_NAME)
            for r in self.cog.scanner():
                if func is not None and not func(r.key):
                    continue
                self.last_visited_vertices.append(Vertex(r.key))
        return self

    def out(self, predicates=None, func=None):
        '''
        Traverse forward through edges.
        :param predicates: A string or a List of strings.
        :return:
        '''

        if func:
            assert callable(func),  "func must be a lambda. Example: func = lambda d: int(d) > 5"
            assert not isinstance(predicates, list), "func cannot be used with a list of predicates"

        if predicates is not None:
            if not isinstance(predicates, list):
                predicates = [predicates]
            predicates = list(map(hash_predicate, predicates))
        else:
            predicates = self.all_predicates

        self.logger.debug("OUT: predicates: "+str(predicates))
        self.__hop("out", predicates=predicates, func=func)
        return self

    def inc(self, predicates=None, func=None):
        '''
        Traverse backward through edges.
        :param predicates:
        :return:
        '''

        if func:
            assert callable(func), "func must be a lambda. Example: func = lambda d: int(d) > 5"
            assert not isinstance(predicates, list), "func cannot be used with a list of predicates"

        if predicates is not None:
            if not isinstance(predicates, list):
                predicates = [predicates]
            predicates = list(map(hash_predicate, predicates))
        else:
            predicates = self.all_predicates

        self.__hop("in", predicates, func=func)
        return self

    def __adjacent_vertices(self, vertex, predicates, direction='out'):
        self.cog.use_namespace(self.graph_name)
        adjacent_vertices = []
        for predicate in predicates:
            if direction == 'out':
                out_record = self.cog.use_table(predicate).get(out_nodes(vertex.id))
                if out_record is not None:
                    for v_adj in out_record.value:
                        adjacent_vertices.append(Vertex(v_adj).set_edge(predicate))
            elif direction == 'in':
                in_record = self.cog.use_table(predicate).get(in_nodes(vertex.id))
                if not in_record is not None:
                    for v_adj in in_record.value:
                        adjacent_vertices.append(Vertex(v_adj).set_edge(predicate))

        return adjacent_vertices

    def has(self, predicates, vertex):
        """
        Filters all outgoing edges from a vertex that matches a list of predicates.
        :param predicates:
        :param vertex:
        :return:
        """

        if predicates is not None:
            if not isinstance(predicates, list):
                predicates = [predicates]
            predicates = list(map(hash_predicate, predicates))

        has_vertices = []
        for lv in self.last_visited_vertices:
            adj_vertices = self.__adjacent_vertices(lv, predicates)
            for av in adj_vertices:
                if av.id == vertex:
                    has_vertices.append(lv)

        self.last_visited_vertices = has_vertices
        return self

    def hasr(self, predicates, vertex):
        """
        'Has' in reverse. Filters all incoming edges from a vertex that matches a list of predicates.
        :param predicates:
        :param vertex:
        :return:
        """

        if predicates is not None:
            if not isinstance(predicates, list):
                predicates = [predicates]
            predicates = list(map(hash_predicate, predicates))

        has_vertices = []
        for lv in self.last_visited_vertices:
            adj_vertices = self.__adjacent_vertices(lv, predicates, 'in')
            # print(lv.id + " -> " + str([x.id for x in adj_vertices]))
            for av in adj_vertices:
                if av.id == vertex:
                    has_vertices.append(lv)

        self.last_visited_vertices = has_vertices
        return self


    def scan(self, limit=10, scan_type='v'):
        '''
        Scans vertices or edges in a graph.
        :param limit:
        :param scan_type:
        :return:
        '''
        assert type(scan_type) is str, "Scan type must be either 'v' for vertices or 'e' for edges."
        if scan_type == 'e':
            self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME)
        else:
            self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_NODE_SET_TABLE_NAME)
        result = []
        for i, r in enumerate(self.cog.scanner()):
            if i < limit:
                if scan_type == 'v':
                    v = Vertex(r.key)
                else:
                    v = Vertex(r.value)
                result.append({"id": v.id})
            else:
                break
        return {"result": result}

    def __hop(self, direction, predicates=None, tag=NOTAG, func=None):
        self.logger.debug("__hop : direction: " + str(direction) + " predicates: " + str(predicates) + " graph name: "+self.graph_name)
        self.cog.use_namespace(self.graph_name)
        self.logger.debug("hopping from vertices: " + str(map(lambda x : x.id, self.last_visited_vertices)))
        self.logger.debug("direction: " + str(direction) + " predicates: "+str(self.all_predicates))
        traverse_vertex = []
        for predicate in predicates:
            self.logger.debug("__hop predicate: "+predicate + " of "+ str(predicates))
            for v in self.last_visited_vertices:
                if direction == "out":
                    record = self.cog.use_table(predicate).get(out_nodes(v.id))
                else:
                    record = self.cog.use_table(predicate).get(in_nodes(v.id))
                if record is not None:
                    for v_adjacent in record.value:
                        if func is not None and not func(v_adjacent):
                            continue
                        v_adjacent_obj = Vertex(v_adjacent).set_edge(predicate)
                        v_adjacent_obj.tags.update(v.tags)
                        traverse_vertex.append(v_adjacent_obj)
        self.last_visited_vertices = traverse_vertex

    def tag(self, tag_name):
        '''
        Saves vertices with a tag name. Used to capture vertices while traversing a graph.
        :param tag_name:
        :return:
        '''
        for v in self.last_visited_vertices:
            v.tags[tag_name] = v.id
        return self

    def count(self):
        return len(self.last_visited_vertices)

    def all(self, options=None):
        """
        Returns all the vertices that are resultant of the graph query. Options 'e' would include the edges that were traversed.
        https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md
        :return:
        """
        result = []
        show_edge = True if options is not None and 'e' in options else False
        for v in self.last_visited_vertices:
            item = {"id": v.id}
            if show_edge and v.edges:
                item['edges'] = [self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME).get(edge).value for edge in v.edges]
            # item['edge'] = self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME).get(item['edge']).value
            item.update(v.tags)

            result.append(item)
        res = {"result": result}
        return res

    def view(self, view_name, js_src="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.js"):
        """
            Returns html view of the resulting graph from a query.
            :return:
        """
        assert view_name is not None, "a view name is required to create a view, it can be any string."
        result = self.all()
        view_html = script_part1 + graph_lib_src.format(js_src=js_src) + graph_template.format(plot_data_insert=json.dumps(result['result'])) + script_part2
        view = self.views_dir+"/{view_name}.html".format(view_name=view_name)
        view = View(view, view_html)
        view.persist()
        return view

    def getv(self, view_name):
        view = self.views_dir + "/{view_name}.html".format(view_name=view_name)
        assert os.path.isfile(view), "view not found, create a view by calling .view()"
        with open(view, 'r') as f:
            view_html = f.read()
        view = View(view, view_html)
        return view

    def lsv(self):
        return [f.split(".")[0] for f in listdir(self.views_dir)]

    def get_new_graph_instance(self):
        return Graph(self.graph_name, self.config.COG_HOME, self.config.COG_PATH_PREFIX)
示例#18
0
文件: torque.py 项目: uhaz1/cog
 def __init__(self, db_path=None, config=cfg):
      self.cog = Cog(db_path=db_path, config=config)