def __init__(self, graph, cat, var, di, json_dict, regex_dict): self.cat = cat self.var = var print(self.label_gen()) self.di = di self.json_map = json_dict self.regex = regex_dict self.graph = graph self.schema = Schema(self.graph)
def open_spider(self, spider): # connect to mongo and create index self.client = pymongo.MongoClient(host=self.mongo_uri, port=self.mongo_port) self.db = self.client[self.mongo_db] self.db.authenticate(self.mongo_usr, self.mongo_pwd, mechanism='SCRAM-SHA-1') self.db[self.user_collection_name].create_index("puid", unique=True) self.db[self.topic_collection_name].create_index("tid", unique=True) # create index and constraint for neo4j graph = Graph(NEO4J_URI, password=NEO4J_PWD) Schema(graph).create_index('User', 'puid') Schema(graph).create_index('User', 'name')
def test_hash_constraints(self): folder = os.path.dirname(os.path.abspath(__file__)) test_driller = DefaultDriller(os.path.join(folder, 'cnfg_simple.yml')) db_init.create_hash_constraints(test_driller.graph) schm = Schema(test_driller.graph) labels = ["Developer", "Branch", "Commit", "File", "Method"] for l in labels: c = schm.get_uniqueness_constraints(l) assert len(c) == 1 # clean for l in labels: schm.drop_uniqueness_constraint(l, 'hash')
def __init__(self, graph, cat, var, di, json_dict, regex_dict): self.graph = graph self.cat = cat self.var = var self.di = di self.json_map = json_dict self.regex = regex_dict self.graph = graph self.schema = Schema(self.graph) self.PR_list = [] self.clusters = {} self.cid = count(start=1, step=1)
def test_indices(self): folder = os.path.dirname(os.path.abspath(__file__)) test_driller = DefaultDriller(os.path.join(folder, 'cnfg_simple.yml')) db_init.create_indices(test_driller.graph, hash_index=True) schm = Schema(test_driller.graph) index_authors = schm.get_indexes("Developer") assert len(index_authors) == 1 index_branch = schm.get_indexes("Branch") assert len(index_branch) == 2 index_commits = schm.get_indexes("Commit") assert len(index_commits) == 2 index_files = schm.get_indexes("File") assert len(index_files) == 3 index_methods = schm.get_indexes("Method") assert len(index_methods) == 3 # clean schm.drop_index("Developer", "hash") schm.drop_index("Branch", "hash") schm.drop_index("Branch", "project_id") schm.drop_index("Commit", "hash") schm.drop_index("Commit", "project_id") schm.drop_index("File", "hash") schm.drop_index("File", "project_id") schm.drop_index("Method", "hash") schm.drop_index("Method", "project_id")
def create_indexes(): graph = util.GraphConnector().connector db_schema = Schema(graph) db_schema.create_index("Project", "project_id") db_schema.create_index("Platform", "name") db_schema.create_index("License", "name") db_schema.create_index("Language", "name") db_schema.create_index("Status", "name") db_schema.create_index("Version", "id") db_schema.create_index("Version", "number")
class GraphGenerator(Parse, ID_generator): def __init__(self, graph, cat, var, di, json_dict, regex_dict): self.cat = cat self.var = var print(self.label_gen()) self.di = di self.json_map = json_dict self.regex = regex_dict self.graph = graph self.schema = Schema(self.graph) def NodeInit(self, node_count, Index_gen=True): if Index_gen: self.schema.create_index(self.label_gen(), 'uid', 'cid') tx = self.graph.begin() for i in range(0, node_count): tx.create( Node(self.label_gen(), name=self.name_gen(i), uid=self.uniq_id(i), cid='|0|')) tx.commit() return self.graph def Relation(self, path, itr_limit=1000): gen = self.__giveout(path, itr_limit, overrite=False) dict_nodes = next(gen) while True: for key, vals in dict_nodes.items(): self.graph.run( self.create(what='relation', label=self.label_gen(), uid=key, sets=str(vals))) dict_nodes = next(gen) if dict_nodes == None: print("The Job is Complete") break return None def gen_adj_list(self, itr_limit, path): gen = self.__giveout(path, itr_limit, overrite=True) dict_nodes = next(gen) with open('json_files/Adj_list' + '_' + self.label_gen() + '.json', 'w') as fp: json.dump(dict_nodes, fp) return dict_nodes def __giveout(self, path, itr_limit, overrite=False): with open(path) as fp: elements = fp.readline().strip().split(" ") dict_nodes = dict() if overrite: itr_limit = int(elements[1]) for j in range(1, int(elements[1]) + 1): nodes = list(map(int, fp.readline().strip().split(" "))) nodes = [self.uniq_id(nodes[0]), self.uniq_id(nodes[1])] if nodes[0] in dict_nodes: dict_nodes[nodes[0]].append(nodes[1]) else: dict_nodes[nodes[0]] = [nodes[1]] if nodes[1] in dict_nodes: dict_nodes[nodes[1]].append(nodes[0]) else: dict_nodes[nodes[1]] = [nodes[0]] if j % itr_limit == 0: #print(j) yield dict_nodes dict_nodes = dict() sets = set() yield dict_nodes yield None
def getCurrentRelationshipsLabels(self): schema = Schema(self.graph) return schema.relationship_types, len(schema.relationship_types)
def getCurrentNodeLabels(self): schema = Schema(self.graph) return schema.node_labels, len(schema.node_labels)