def __init__(self, graph_name, cog_home="cog_home", cog_path_prefix=None): ''' :param graph_name: :param cog_home: Home directory name, for most use cases use default. :param cog_path_prefix: sets the root directory location for Cog db. Default: '/tmp' set in cog.Config. Change this to current directory when running in an IPython environment. ''' self.config = cfg self.config.COG_HOME = cog_home if cog_path_prefix: self.config.COG_PATH_PREFIX = cog_path_prefix self.graph_name = graph_name self.cache = {} dictConfig(self.config.logging_config) self.logger = logging.getLogger("torque") self.logger.debug("Torque init on graph: " + graph_name + " predicates: ") self.cog = Cog(self.cache) self.cog.create_or_load_namespace(self.graph_name) self.all_predicates = self.cog.list_tables() self.views_dir = self.config.cog_views_dir() if not os.path.exists(self.views_dir): os.mkdir(self.views_dir) self.logger.debug("predicates: " + str(self.all_predicates)) self.last_visited_vertices = None
def test_list_tables(self): cogdb = Cog(config=config) cogdb.create_namespace("test_ns") cogdb.create_or_load_table("table1", "test_ns") cogdb.create_or_load_table("table2", "test_ns") cogdb.create_or_load_table("table3", "test_ns") self.assertEquals(cogdb.list_tables(), ['table2', 'table3', 'table1'])
def test_db(self): data = ('testKey', 'testVal') cogdb = Cog("/tmp/" + DIR_NAME + "/test") cogdb.create_namespace("test") cogdb.create_table("db_test", "test") cogdb.put(data) self.assertEqual(cogdb.get("testKey"), ('0', ('testKey', 'testVal')))
def test_db(self): data = ('user100', '{"firstname":"Hari","lastname":"seldon"}') cogdb = Cog(config=config) cogdb.create_namespace("test") cogdb.create_table("db_test", "test") cogdb.put(data) scanner = cogdb.scanner() for r in scanner: print r
def __init__(self, graph_name, cog_dir): ''' :param graph_name: :param cog_dir: ''' self.predicates = self.list_predicate_tables(cog_dir, graph_name) self.graph_name = graph_name self.cog_dir = cog_dir self.cogs = {} for predicate in self.predicates: cog = Cog(db_path=cog_dir) cog.use_table(predicate, graph_name) self.cogs[predicate] = cog
def __init__(self, graph_name, cog_dir): ''' :param graph_name: :param cog_dir: list of ''' self.config = cfg self.cog = Cog(db_path=cog_dir, config=cfg) self.graph_name = graph_name self.cog_dir = cog_dir self.all_predicates = self.cog.list_tables() self.last_visited_vertices = None self.cog.create_namespace(self.graph_name)
class Loader: def __init__(self, db_path=None, config=cfg): self.cog = Cog(db_path=db_path, config=config) def load_triples(self, graph_data_path, graph_name): self.cog.create_namespace(graph_name) with open(graph_data_path) as f: for line in f: tokens = line.split() this_vertex = tokens[0].strip() predicate = tokens[1].strip() other_vertex = tokens[2].strip() self.cog.create_table(predicate, graph_name) #it wont create if it exists. put_node(self.cog,this_vertex, predicate, other_vertex) def load_edgelist(self, edgelist_file_path, graph_name, predicate="none"): self.cog.create_namespace(graph_name) with open(edgelist_file_path) as f: for line in f: tokens = line.split() v1 = tokens[0].strip() v2 = tokens[1].strip() self.cog.create_table(predicate, graph_name) put_node(self.cog, v1, predicate, v2)
def test_db(self): data = ('user100', '{"firstname":"Hari","lastname":"seldon"}') cogdb = Cog(config=config) cogdb.create_namespace("test") cogdb.create_or_load_table("db_test", "test") cogdb.put(data) scanner = cogdb.scanner() for r in scanner: res = r self.assertEqual( res, ('user100', '{"firstname":"Hari","lastname":"seldon"}'))
def put(self, vertex1, predicate, vertex2): cog = Cog(db_path=self.cog_dir) cog.create_namespace(self.graph_name) cog.use_table(predicate, self.graph_name) # it wont create if it exists. put_node(cog, vertex1, predicate, vertex2) self.cogs[predicate] = cog return self
def test_aaa_before_all_tests(self): if not os.path.exists("/tmp/" + DIR_NAME): os.mkdir("/tmp/" + DIR_NAME) if os.path.exists("test-data/test.nq"): loader = Loader("/tmp/" + DIR_NAME) loader.load_triples("test-data/test.nq", "people") else: loader = Loader("/tmp/" + DIR_NAME) loader.load_triples("test/test-data/test.nq", "people") TorqueTest.cog = Cog("/tmp/" + DIR_NAME) TorqueTest.g = Graph(graph_name="people", cog_dir="/tmp/" + DIR_NAME)
def test_db(self): db_path = '/tmp/cogtestdb2' try: os.makedirs(db_path) except OSError: if not os.path.isdir(db_path): raise config.CUSTOM_COG_DB_PATH = db_path cogdb = Cog() # create a namespace cogdb.create_namespace("my_namespace") # create new table cogdb.create_table("new_db", "my_namespace") # put some data cogdb.put(Record('A', 'val')) cogdb.put(Record('B', 'val')) cogdb.put(Record('key3', 'val')) cogdb.put(Record('key3', 'val_updated')) self.assertEqual(cogdb.get('key3').value, 'val_updated') cogdb.close()
class Graph: """ https://www.w3.org/TR/WD-rdf-syntax-971002/ https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md """ def __init__(self, graph_name, cog_dir): ''' :param graph_name: :param cog_dir: list of ''' self.config = cfg self.cog = Cog(db_path=cog_dir, config=cfg) self.graph_name = graph_name self.cog_dir = cog_dir self.all_predicates = self.cog.list_tables() self.last_visited_vertices = None self.cog.create_namespace(self.graph_name) #self.cog.create_or_load_table(self.config.GRAPH_NODE_SET_TABLE_NAME, self.graph_name) def load_edgelist(self, edgelist_file_path, graph_name, predicate="none"): self.cog.load_edgelist(edgelist_file_path, graph_name, predicate) self.all_predicates = self.cog.list_tables() def load_triples(self, graph_data_path, graph_name): ''' Loads a list of triples :param graph_data_path: :param graph_name: :return: ''' self.cog.load_triples(graph_data_path, graph_name) self.all_predicates = self.cog.list_tables() def put(self, vertex1, predicate, vertex2): #self.cog.create_or_load_table(predicate, self.graph_name) self.cog.use_namespace(self.graph_name).use_table(predicate) self.cog.put_node(vertex1, predicate, vertex2) self.all_predicates = self.cog.list_tables() return self def list_predicate_tables(self, cog_dir, graph_name): p = set(()) path = "/".join([cog_dir, graph_name]) if not os.path.exists(path): return p files = [f for f in listdir(path) if isfile(join(path, f))] for f in files: p.add(f.split("-")[0]) return p def v(self, vertex=None): #TODO: need to check if node exists if vertex: self.last_visited_vertices = [Vertex(vertex)] else: self.last_visited_vertices = [] self.cog.use_namespace(self.graph_name).use_table( self.config.GRAPH_NODE_SET_TABLE_NAME) for r in self.cog.scanner(): self.last_visited_vertices.append(Vertex(r)) return self def out(self, predicates=None): ''' List of string predicates :param predicates: :return: ''' if predicates: assert type(predicates) == list self.__hop("out", predicates) return self def inc(self, predicates=None): self.__hop("in", predicates) return self def __hop(self, direction, predicates=None, tag=NOTAG): self.cog.use_namespace(self.graph_name) predicates = self.all_predicates if not predicates else predicates #print "hopping from vertices: " + str(map(lambda x : x.id, self.last_visited_vertices)) #print "direction: " + str(direction) + " predicates: "+str(self.all_predicates) traverse_vertex = [] for predicate in predicates: for v in self.last_visited_vertices: if direction == "out": record = self.cog.use_table(predicate).get(out_nodes(v.id)) else: record = self.cog.use_table(predicate).get(in_nodes(v.id)) #print "==? " + str(direction)+ " <> " + str(predicate) + " ::: " + str(v.id) + " ==> " + str(record) if record: for v_adjacent in ast.literal_eval(record[1][1]): v_adjacent_obj = Vertex(v_adjacent) v_adjacent_obj.tags.update(v.tags) traverse_vertex.append(v_adjacent_obj) self.last_visited_vertices = traverse_vertex def tag(self, tag_name): ''' Saves nodes with a tag name and returned in the result set. Primarily used to capture nodes while navigating the graph. :param tag_name: :return: ''' for v in self.last_visited_vertices: v.tags[tag_name] = v.id return self def count(self): return len(self.last_visited_vertices) def all(self): """ returns all the nodes in the result. https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md :return: """ result = [] for v in self.last_visited_vertices: #print "all:: tag: " + v + " vertex:"+ str(self.last_visited_vertices[v]) item = {"id": v.id} item.update(v.tags) result.append(item) return {"result": result}
def test_db(self): data = Record('testKey','testVal') cogdb = Cog() cogdb.create_namespace("test") cogdb.create_table("db_test", "test") cogdb.put(data) self.assertTrue(cogdb.get("testKey").is_equal_val(Record('testKey', 'testVal'))) cogdb.close()
def test_list_tables(self): cogdb = Cog() cogdb.create_namespace("test_ns") cogdb.create_table("table1", "test_ns") cogdb.create_table("table2", "test_ns") cogdb.create_table("table3", "test_ns") self.assertEqual(set(cogdb.list_tables()), {'table2', 'table3', 'table1'}) cogdb.close()
def test_db(self): data = Record('user100','{"firstname":"Hari","lastname":"seldon"}') cogdb = Cog() cogdb.create_namespace("test") cogdb.create_table("db_test", "test") cogdb.put(data) scanner = cogdb.scanner() res = None for r in scanner: res = r print(res) self.assertTrue(data.is_equal_val(res)) cogdb.close()
def test_db(self): cogdb = Cog('/tmp/cogtestdb2') # create a namespace cogdb.create_namespace("my_namespace") # create new table cogdb.create_or_load_table("new_db", "my_namespace") # put some data cogdb.put(('A', 'val')) cogdb.put(('B', 'val')) cogdb.put(('key3', 'val')) cogdb.put(('key3', 'val_updated')) self.assertEqual(cogdb.get('key3')[1][1], 'val_updated') cogdb.close()
class Graph: """ Creates a graph object. """ def __init__(self, graph_name, cog_home="cog_home", cog_path_prefix=None): ''' :param graph_name: :param cog_home: Home directory name, for most use cases use default. :param cog_path_prefix: sets the root directory location for Cog db. Default: '/tmp' set in cog.Config. Change this to current directory when running in an IPython environment. ''' self.config = cfg self.config.COG_HOME = cog_home if cog_path_prefix: self.config.COG_PATH_PREFIX = cog_path_prefix self.graph_name = graph_name self.cache = {} dictConfig(self.config.logging_config) self.logger = logging.getLogger("torque") self.logger.debug("Torque init on graph: " + graph_name + " predicates: ") self.cog = Cog(self.cache) self.cog.create_or_load_namespace(self.graph_name) self.all_predicates = self.cog.list_tables() self.views_dir = self.config.cog_views_dir() if not os.path.exists(self.views_dir): os.mkdir(self.views_dir) self.logger.debug("predicates: " + str(self.all_predicates)) self.last_visited_vertices = None def refresh(self): self.cog.refresh_all() def load_triples(self, graph_data_path, graph_name=None): ''' Loads a list of triples :param graph_data_path: :param graph_name: :return: ''' graph_name = self.graph_name if graph_name is None else graph_name self.cog.load_triples(graph_data_path, graph_name) self.all_predicates = self.cog.list_tables() return None def load_csv(self, csv_path, id_column_name, graph_name=None): """ Loads CSV to a graph. One column must be designated as ID column. :param csv_path: :param id_column_name: :param graph_name: :return: """ if id_column_name is None: raise Exception("id_column_name must not be None") graph_name = self.graph_name if graph_name is None else graph_name self.cog.load_csv(csv_path, id_column_name, graph_name) self.all_predicates = self.cog.list_tables() def close(self): self.logger.info("closing graph: "+self.graph_name) self.cog.close() def put(self, vertex1, predicate, vertex2): self.cog.use_namespace(self.graph_name).use_table(predicate) self.cog.put_node(vertex1, predicate, vertex2) self.all_predicates = self.cog.list_tables() return self def v(self, vertex=None, func=None): if vertex is not None: if isinstance(vertex, list): self.last_visited_vertices = [Vertex(v) for v in vertex] else: self.last_visited_vertices = [Vertex(vertex)] else: self.last_visited_vertices = [] self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_NODE_SET_TABLE_NAME) for r in self.cog.scanner(): if func is not None and not func(r.key): continue self.last_visited_vertices.append(Vertex(r.key)) return self def out(self, predicates=None, func=None): ''' Traverse forward through edges. :param predicates: A string or a List of strings. :return: ''' if func: assert callable(func), "func must be a lambda. Example: func = lambda d: int(d) > 5" assert not isinstance(predicates, list), "func cannot be used with a list of predicates" if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) else: predicates = self.all_predicates self.logger.debug("OUT: predicates: "+str(predicates)) self.__hop("out", predicates=predicates, func=func) return self def inc(self, predicates=None, func=None): ''' Traverse backward through edges. :param predicates: :return: ''' if func: assert callable(func), "func must be a lambda. Example: func = lambda d: int(d) > 5" assert not isinstance(predicates, list), "func cannot be used with a list of predicates" if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) else: predicates = self.all_predicates self.__hop("in", predicates, func=func) return self def __adjacent_vertices(self, vertex, predicates, direction='out'): self.cog.use_namespace(self.graph_name) adjacent_vertices = [] for predicate in predicates: if direction == 'out': out_record = self.cog.use_table(predicate).get(out_nodes(vertex.id)) if out_record is not None: for v_adj in out_record.value: adjacent_vertices.append(Vertex(v_adj).set_edge(predicate)) elif direction == 'in': in_record = self.cog.use_table(predicate).get(in_nodes(vertex.id)) if not in_record is not None: for v_adj in in_record.value: adjacent_vertices.append(Vertex(v_adj).set_edge(predicate)) return adjacent_vertices def has(self, predicates, vertex): """ Filters all outgoing edges from a vertex that matches a list of predicates. :param predicates: :param vertex: :return: """ if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) has_vertices = [] for lv in self.last_visited_vertices: adj_vertices = self.__adjacent_vertices(lv, predicates) for av in adj_vertices: if av.id == vertex: has_vertices.append(lv) self.last_visited_vertices = has_vertices return self def hasr(self, predicates, vertex): """ 'Has' in reverse. Filters all incoming edges from a vertex that matches a list of predicates. :param predicates: :param vertex: :return: """ if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) has_vertices = [] for lv in self.last_visited_vertices: adj_vertices = self.__adjacent_vertices(lv, predicates, 'in') # print(lv.id + " -> " + str([x.id for x in adj_vertices])) for av in adj_vertices: if av.id == vertex: has_vertices.append(lv) self.last_visited_vertices = has_vertices return self def scan(self, limit=10, scan_type='v'): ''' Scans vertices or edges in a graph. :param limit: :param scan_type: :return: ''' assert type(scan_type) is str, "Scan type must be either 'v' for vertices or 'e' for edges." if scan_type == 'e': self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME) else: self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_NODE_SET_TABLE_NAME) result = [] for i, r in enumerate(self.cog.scanner()): if i < limit: if scan_type == 'v': v = Vertex(r.key) else: v = Vertex(r.value) result.append({"id": v.id}) else: break return {"result": result} def __hop(self, direction, predicates=None, tag=NOTAG, func=None): self.logger.debug("__hop : direction: " + str(direction) + " predicates: " + str(predicates) + " graph name: "+self.graph_name) self.cog.use_namespace(self.graph_name) self.logger.debug("hopping from vertices: " + str(map(lambda x : x.id, self.last_visited_vertices))) self.logger.debug("direction: " + str(direction) + " predicates: "+str(self.all_predicates)) traverse_vertex = [] for predicate in predicates: self.logger.debug("__hop predicate: "+predicate + " of "+ str(predicates)) for v in self.last_visited_vertices: if direction == "out": record = self.cog.use_table(predicate).get(out_nodes(v.id)) else: record = self.cog.use_table(predicate).get(in_nodes(v.id)) if record is not None: for v_adjacent in record.value: if func is not None and not func(v_adjacent): continue v_adjacent_obj = Vertex(v_adjacent).set_edge(predicate) v_adjacent_obj.tags.update(v.tags) traverse_vertex.append(v_adjacent_obj) self.last_visited_vertices = traverse_vertex def tag(self, tag_name): ''' Saves vertices with a tag name. Used to capture vertices while traversing a graph. :param tag_name: :return: ''' for v in self.last_visited_vertices: v.tags[tag_name] = v.id return self def count(self): return len(self.last_visited_vertices) def all(self, options=None): """ Returns all the vertices that are resultant of the graph query. Options 'e' would include the edges that were traversed. https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md :return: """ result = [] show_edge = True if options is not None and 'e' in options else False for v in self.last_visited_vertices: item = {"id": v.id} if show_edge and v.edges: item['edges'] = [self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME).get(edge).value for edge in v.edges] # item['edge'] = self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME).get(item['edge']).value item.update(v.tags) result.append(item) res = {"result": result} return res def view(self, view_name, js_src="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.js"): """ Returns html view of the resulting graph from a query. :return: """ assert view_name is not None, "a view name is required to create a view, it can be any string." result = self.all() view_html = script_part1 + graph_lib_src.format(js_src=js_src) + graph_template.format(plot_data_insert=json.dumps(result['result'])) + script_part2 view = self.views_dir+"/{view_name}.html".format(view_name=view_name) view = View(view, view_html) view.persist() return view def getv(self, view_name): view = self.views_dir + "/{view_name}.html".format(view_name=view_name) assert os.path.isfile(view), "view not found, create a view by calling .view()" with open(view, 'r') as f: view_html = f.read() view = View(view, view_html) return view def lsv(self): return [f.split(".")[0] for f in listdir(self.views_dir)] def get_new_graph_instance(self): return Graph(self.graph_name, self.config.COG_HOME, self.config.COG_PATH_PREFIX)
def __init__(self, db_path=None, config=cfg): self.cog = Cog(db_path=db_path, config=config)