示例#1
0
 def __init__(self, graph, cat, var, di, json_dict, regex_dict):
     self.cat = cat
     self.var = var
     print(self.label_gen())
     self.di = di
     self.json_map = json_dict
     self.regex = regex_dict
     self.graph = graph
     self.schema = Schema(self.graph)
示例#2
0
    def open_spider(self, spider):
        # connect to mongo and create index
        self.client = pymongo.MongoClient(host=self.mongo_uri,
                                          port=self.mongo_port)
        self.db = self.client[self.mongo_db]
        self.db.authenticate(self.mongo_usr,
                             self.mongo_pwd,
                             mechanism='SCRAM-SHA-1')
        self.db[self.user_collection_name].create_index("puid", unique=True)
        self.db[self.topic_collection_name].create_index("tid", unique=True)

        # create index and constraint for neo4j
        graph = Graph(NEO4J_URI, password=NEO4J_PWD)
        Schema(graph).create_index('User', 'puid')
        Schema(graph).create_index('User', 'name')
示例#3
0
    def test_hash_constraints(self):
        folder = os.path.dirname(os.path.abspath(__file__))
        test_driller = DefaultDriller(os.path.join(folder, 'cnfg_simple.yml'))

        db_init.create_hash_constraints(test_driller.graph)

        schm = Schema(test_driller.graph)

        labels = ["Developer", "Branch", "Commit", "File", "Method"]

        for l in labels:
            c = schm.get_uniqueness_constraints(l)
            assert len(c) == 1

        # clean
        for l in labels:
            schm.drop_uniqueness_constraint(l, 'hash')
 def __init__(self, graph, cat, var, di, json_dict, regex_dict):
     self.graph = graph
     self.cat = cat
     self.var = var
     self.di = di
     self.json_map = json_dict
     self.regex = regex_dict
     self.graph = graph
     self.schema = Schema(self.graph)
     self.PR_list = []
     self.clusters = {}
     self.cid = count(start=1, step=1)
示例#5
0
    def test_indices(self):
        folder = os.path.dirname(os.path.abspath(__file__))
        test_driller = DefaultDriller(os.path.join(folder, 'cnfg_simple.yml'))

        db_init.create_indices(test_driller.graph, hash_index=True)

        schm = Schema(test_driller.graph)

        index_authors = schm.get_indexes("Developer")
        assert len(index_authors) == 1

        index_branch = schm.get_indexes("Branch")
        assert len(index_branch) == 2

        index_commits = schm.get_indexes("Commit")
        assert len(index_commits) == 2

        index_files = schm.get_indexes("File")
        assert len(index_files) == 3

        index_methods = schm.get_indexes("Method")
        assert len(index_methods) == 3

        # clean
        schm.drop_index("Developer", "hash")
        schm.drop_index("Branch", "hash")
        schm.drop_index("Branch", "project_id")
        schm.drop_index("Commit", "hash")
        schm.drop_index("Commit", "project_id")
        schm.drop_index("File", "hash")
        schm.drop_index("File", "project_id")
        schm.drop_index("Method", "hash")
        schm.drop_index("Method", "project_id")
示例#6
0
def create_indexes():
    graph = util.GraphConnector().connector

    db_schema = Schema(graph)
    db_schema.create_index("Project", "project_id")
    db_schema.create_index("Platform", "name")
    db_schema.create_index("License", "name")
    db_schema.create_index("Language", "name")
    db_schema.create_index("Status", "name")
    db_schema.create_index("Version", "id")
    db_schema.create_index("Version", "number")
示例#7
0
class GraphGenerator(Parse, ID_generator):
    def __init__(self, graph, cat, var, di, json_dict, regex_dict):
        self.cat = cat
        self.var = var
        print(self.label_gen())
        self.di = di
        self.json_map = json_dict
        self.regex = regex_dict
        self.graph = graph
        self.schema = Schema(self.graph)

    def NodeInit(self, node_count, Index_gen=True):
        if Index_gen:
            self.schema.create_index(self.label_gen(), 'uid', 'cid')
        tx = self.graph.begin()
        for i in range(0, node_count):
            tx.create(
                Node(self.label_gen(),
                     name=self.name_gen(i),
                     uid=self.uniq_id(i),
                     cid='|0|'))
        tx.commit()
        return self.graph

    def Relation(self, path, itr_limit=1000):
        gen = self.__giveout(path, itr_limit, overrite=False)
        dict_nodes = next(gen)
        while True:
            for key, vals in dict_nodes.items():
                self.graph.run(
                    self.create(what='relation',
                                label=self.label_gen(),
                                uid=key,
                                sets=str(vals)))
            dict_nodes = next(gen)
            if dict_nodes == None:
                print("The Job is Complete")
                break
        return None

    def gen_adj_list(self, itr_limit, path):
        gen = self.__giveout(path, itr_limit, overrite=True)
        dict_nodes = next(gen)
        with open('json_files/Adj_list' + '_' + self.label_gen() + '.json',
                  'w') as fp:
            json.dump(dict_nodes, fp)
        return dict_nodes

    def __giveout(self, path, itr_limit, overrite=False):
        with open(path) as fp:
            elements = fp.readline().strip().split(" ")
            dict_nodes = dict()
            if overrite:
                itr_limit = int(elements[1])
            for j in range(1, int(elements[1]) + 1):
                nodes = list(map(int, fp.readline().strip().split(" ")))
                nodes = [self.uniq_id(nodes[0]), self.uniq_id(nodes[1])]
                if nodes[0] in dict_nodes:
                    dict_nodes[nodes[0]].append(nodes[1])
                else:
                    dict_nodes[nodes[0]] = [nodes[1]]
                if nodes[1] in dict_nodes:
                    dict_nodes[nodes[1]].append(nodes[0])
                else:
                    dict_nodes[nodes[1]] = [nodes[0]]

                if j % itr_limit == 0:
                    #print(j)
                    yield dict_nodes
                    dict_nodes = dict()
                    sets = set()
            yield dict_nodes
            yield None
 def getCurrentRelationshipsLabels(self):
     schema = Schema(self.graph)
     return schema.relationship_types, len(schema.relationship_types)
 def getCurrentNodeLabels(self):
     schema = Schema(self.graph)
     return schema.node_labels, len(schema.node_labels)