示例#1
0
 def graph_merge_nodes(self, nodes=None, toleration=10):
     """
     把一个子图合并到数据库中去,因为我们一般都是批量插入节点或者关系,
     但有时候在这批量数据中可能有一些异常的节点,会影响整批数据的插入,
     因此,在一般情况下,将这些少数的坏节点抛弃掉,将大部分正常的节点
     继续写入数据库。
     :param toleration: 容忍失败的最大节点数
     :param nodes:
     :return:
     """
     try:
         if nodes is not None:
             tx = self.graph.begin()
             tx.merge(Subgraph(nodes=nodes))
             tx.commit()
     except Exception as e:
         self.logger.debug('commit subgraph to database '
                           'raise ({})'.format(e))
         l = len(nodes)
         if l < toleration:
             return
         bk = l // 10 + 1  # 每块的数量
         for i in range(0, 11):
             nds = nodes[i * bk:(i + 1) * bk]
             try:
                 if len(nds):
                     tx = self.graph.begin()
                     tx.merge(Subgraph(nodes=nds))
                     tx.commit()
             except Exception as e:
                 self.logger.debug(
                     'commit subgraph to database raise ({}) on '
                     '[{}:{}]'.format(e, i * bk, (i + 1) * bk))
                 self.graph_merge_nodes(nds, toleration)
示例#2
0
def import_company_relation():
    df = pd.read_csv('company_data/公司-供应商.csv')
    matcher = NodeMatcher(graph)
    eid1 = df['eid1'].values
    eid2 = df['eid2'].values
    relations = []
    data = list(zip(eid1, eid2))
    for e1, e2 in tqdm(data):
        if pd.notna(e1) and pd.notna(e2) and e1 != e2:
            company1 = matcher.match('company', eid=e1).first()
            company2 = matcher.match('company', eid=e2).first()

            if company1 is not None and company2 is not None:
                relations.append(Relationship(company1, '供应商', company2))

    graph.create(Subgraph(relationships=relations))
    print('import company-supplier relation succeeded')

    df = pd.read_csv('company_data/公司-担保.csv')
    matcher = NodeMatcher(graph)
    eid1 = df['eid1'].values
    eid2 = df['eid2'].values
    relations = []
    data = list(zip(eid1, eid2))
    for e1, e2 in tqdm(data):
        if pd.notna(e1) and pd.notna(e2) and e1 != e2:
            company1 = matcher.match('company', eid=e1).first()
            company2 = matcher.match('company', eid=e2).first()

            if company1 is not None and company2 is not None:
                relations.append(Relationship(company1, '担保', company2))

    graph.create(Subgraph(relationships=relations))
    print('import company-guarantee relation succeeded')

    df = pd.read_csv('company_data/公司-客户.csv')
    matcher = NodeMatcher(graph)
    eid1 = df['eid1'].values
    eid2 = df['eid2'].values
    relations = []
    data = list(zip(eid1, eid2))
    for e1, e2 in tqdm(data):
        if pd.notna(e1) and pd.notna(e2):
            company1 = matcher.match('company', eid=e1).first()
            company2 = matcher.match('company', eid=e2).first()

            if company1 is not None and company2 is not None:
                relations.append(Relationship(company1, '客户', company2))

    graph.create(Subgraph(relationships=relations))
    print('import company-customer relation succeeded')
    def get_shortest_path_to_name_in_subgraph(self,
                                              start_id,
                                              end_node_name,
                                              max_degree=6,
                                              limit=3,
                                              max_end_node_number=3):
        end_nodes = self.find_by_name_property("entity",
                                               name=end_node_name,
                                               limit=max_end_node_number)
        total_nodes = []
        total_relations = []

        for node in end_nodes:
            end_id = self.get_id_for_node(node)
            subgraph = self.get_shortest_path_in_subgraph(
                start_id=start_id,
                end_id=end_id,
                max_degree=max_degree,
                limit=limit)
            if subgraph:
                total_nodes.extend(subgraph.nodes())
                total_relations.extend(subgraph.relationships())

        if total_nodes:
            return Subgraph(total_nodes, total_relations)
        else:
            return None
示例#4
0
def import_company_r_guaranty(data_path):
    df = pd.read_csv(data_path)
    logger.info(f'''处理公司-担保关系, 数据文件:{data_path}''')
    r_total = len(df.index)
    eid1 = df['eid1'].values
    eid2 = df['eid2'].values
    data = list(zip(eid1, eid2))
    c_g_relations = []
    exists_r = 0
    for c_id, guaranty_id in tqdm(data):
        if c_id == guaranty_id:
            continue
        company = matcher.match("company").where(
            f'''_.cid=\'{c_id}\'''').first()
        guaranty = matcher.match("company").where(
            f'''_.cid=\'{guaranty_id}\'''').first()
        c_r_g = "担保"
        if company is not None and guaranty is not None:
            r_match = r_matcher.match(nodes=(company, guaranty), r_type=c_r_g)
            exists_r += len(list(r_match))
            if len(list(r_match)) == 0:
                relations = Relationship(company, c_r_g, guaranty)
                c_g_relations.append(relations)

    effective_relations = len(c_g_relations)
    if effective_relations != 0:
        graph.create(Subgraph(relationships=c_g_relations))
    logger.info(
        f'''导入公司-供应商关系完成, 共需导入: {r_total} 个, 有效关系: {effective_relations} 个, 已存在关系:{exists_r}个'''
    )
示例#5
0
def _author_to_neo(info, graph):
    print(info)
    tx = graph.begin()
    author = Node("Author", CellID=info['CellID'])

    # paper_author = [
    #     Node("PaperAuthor", CellID=hash(info['CellID'] + pid)) for pid in info['PaperIDs']
    # ]
    # papers = [Node("Paper", CellID=pid) for pid in info['PaperIDs']]
    # affilitation = [Node("Affiliation", CellID=aid) for aid in info['AffiliationIDs']]

    # authorships1 = [Relationship(p, 'HAS', pa) for p, pa in zip(papers, paper_author)]
    # authorships2 = [Relationship(pa, 'IS', author) for pa in paper_author]
    # affiliationship = [Relationship(pa, 'AFFILIATE', aff) for pa, aff in zip(paper_author, affilitation)]

    # nodes =  [author] + papers + paper_author + affilitation
    # relations = authorships1 + authorships2 + affiliationship
    nodes = [author]
    relations = None

    sg = Subgraph(nodes, relations)
    tx.merge(sg, primary_label=None, primary_key='CellID')
    tx.commit()

    direct_info = keyfilter(author_fields.__contains__, info)

    author.update(**direct_info)
    graph.push(author)
示例#6
0
def CreateNode(elements):
    nodes = {}

    for key in elements.keys():
        if key not in ['主体代码', '证券代码']:
            nodes[key] = []
            if key == '主体名称':
                for element in elements[key]:
                    nodes[key].append(
                        Node(key, name=element, code=elements['主体代码'][0]))
            elif key == '证券产品':
                for i in range(len(elements[key])):
                    nodes[key].append(
                        Node(key,
                             name=elements['证券产品'][i],
                             code=elements['证券代码'][i],
                             belonging=elements['主体名称'][0]))
            else:
                for element in elements[key]:
                    nodes[key].append(
                        Node(key, name=element, belonging=elements['主体名称'][0]))

    for node in nodes.values():
        graph.create(Subgraph(node))

    return nodes
    def get_shortest_path_in_subgraph(self,
                                      start_id,
                                      end_id,
                                      max_degree=6,
                                      limit=3):
        '''
        get short path in Subgraph object
        :param start_id: the start id of node
        :param end_id: the end id of node
        :param max_degree: the max_degree of the path
        :param limit: the limit path
        :return:Subgraph object, if error, return None
        '''
        try:
            query = 'Match path = shortestPath((n:entity)-[*..{max_degree}]-(m:entity)) where ID(n)={start_id} and ID(m)={end_id} RETURN distinct path limit {limit}'
            query = query.format(start_id=start_id,
                                 end_id=end_id,
                                 max_degree=max_degree,
                                 limit=limit)

            record_list = self.graph.run(query)
            nodes = []
            relations = []

            for record in record_list:
                path = record["path"]
                nodes.extend(path.nodes())
                relations.extend(path.relationships())
            if nodes:
                return Subgraph(nodes, relations)
            else:
                return None
        except Exception:
            _logger.exception("----------")
            return None
    def search_nodes_by_keyword(self,
                                keyword,
                                label='api',
                                top_number=10,
                                index_name="api"):

        try:
            keywords = keyword.split()
            name = ""
            for k in keywords:
                name += (k + "* ")
            name = name.replace("(", " ").replace(")", " ")
            query = "call apoc.index.search('{index_name}', '{name}') YIELD node match (node:`{label}`) return node limit {top_number}"
            query = query.format(index_name=index_name,
                                 name=name,
                                 top_number=top_number,
                                 label=label)
            nodes = []
            result = self.graph.run(query)
            for q in result:
                nodes.append(q['node'])
            return Subgraph(nodes=nodes)
        except Exception:
            _logger.exception("-----------")
            return None
 def get_relations_between_two_nodes_in_subgraph(self, start_id, end_id):
     '''
     get the relations between two nodes, for two direction,
     and return result in a subgraph object,but could be Node
     :param start_id: the start id
     :param end_id:
     :return: a Subgraph, could be None
     '''
     try:
         result = self.get_relations_between_two_nodes(start_id, end_id)
         if result:
             nodes = []
             relations = []
             for record in result:
                 relations.append(record["r"])
                 nodes.append(record["n"])
                 nodes.append(record["m"])
             if nodes:
                 return Subgraph(nodes, relations)
             else:
                 return None
         else:
             return None
     except Exception, error:
         _logger.exception("-----------")
         return None
示例#10
0
def import_company_r_person(data_path):
    df = pd.read_csv(data_path)
    logger.info(f'''处理公司-人物关系, 数据文件:{data_path}''')
    r_total = len(df.index)
    eid = df['eid'].values
    pid = df['pid'].values
    post = df['post'].values
    data = list(zip(eid, pid, post))
    c_p_relations = []
    exists_r = 0
    for c_id, p_id, c_r_p in tqdm(data):
        person = matcher.match("person").where(f'''_.pid=\'{p_id}\'''').first()
        company = matcher.match("company").where(
            f'''_.cid=\'{c_id}\'''').first()
        if person is not None and company is not None:
            r_match = r_matcher.match(nodes=(company, person), r_type=c_r_p)
            exists_r += len(list(r_match))
            if len(list(r_match)) == 0:
                relations = Relationship(company, c_r_p, person)
                c_p_relations.append(relations)

    effective_relations = len(c_p_relations)
    if effective_relations != 0:
        graph.create(Subgraph(relationships=c_p_relations))
    logger.info(
        f'''导入公司-人物关系完成, 共需导入: {r_total} 个, 有效关系: {effective_relations} 个, 已存在关系:{exists_r}个'''
    )
示例#11
0
def import_company(data_path):
    df = pd.read_csv(data_path)
    c_total = len(df.index)
    logger.info(f'''处理公司节点入库, 数据文件:{data_path}''')
    c_name = df['companyname'].values
    c_id = df['eid'].values
    c_dishonesty = df['dishonesty'].values
    data = list(zip(c_id, c_name, c_dishonesty))
    nodes = []
    exists_n = 0
    for id, name, dishonesty in tqdm(data):
        # 收入
        profit = np.random.randint(100000, 10000000, 1)[0]
        c_match = matcher.match("company").where(f'''_.cid=\'{id}\'''').first()
        if c_match is None:
            node = Node("company",
                        name=name,
                        cid=str(id),
                        dishonesty=int(dishonesty),
                        profit=int(profit))
            nodes.append(node)
        else:
            exists_n += 1
    effective_node = len(nodes)
    if effective_node != 0:
        graph.create(Subgraph(nodes))
    logger.info(
        f'''共需导入公司节点: {c_total} 个, 有效节点: {effective_node} 个, 已存在节点:{exists_n}个'''
    )
示例#12
0
    def expand_node(self, node_id, limit=40):
        """
        get the directly_adjacent_nodes of one node
        :return: return value is a subgraph
        """
        low_quality_query = "Match (n:entity)-[r]-(m:entity) where ID(n)={start_id} return distinct r,n,m limit {limit}"
        low_quality_query = low_quality_query.format(start_id=node_id, limit=limit)
        try:
            nodes = []
            relationships = []
            record_list_for_all_relation = self.graph.run(low_quality_query)

            for record in record_list_for_all_relation:
                r = record["r"]
                relationships.append(r)
                nodes.append(record["n"])
                nodes.append(record["m"])

            if nodes:
                return Subgraph(nodes, relationships)
            else:
                return None
        except Exception:
            traceback.print_exc()
            return None
示例#13
0
    def batch_relations(self, rela, relations):
        tx = self.graph.begin()
        new_relationships = []
        old_relationships = []
        for data in relations:
            entityname1 = data["entityname1"]
            entityname2 = data["entityname2"]
            matcher = NodeMatcher(self.graph)
            node1 = matcher.match(name=entityname1).first()
            node2 = matcher.match(name=entityname2).first()
            # print("node-----------", node1, node2)

            matcher = RelationshipMatcher(self.graph)
            old_relationship = matcher.match([node1, node2],
                                             r_type=rela).first()
            print("-------old_relationship", old_relationship)

            if old_relationship is None:
                relationship = Relationship(node1, rela, node2, score=100)
                print("-------relationship", relationship)
                new_relationships.append(relationship)

        if len(new_relationships) > 0:
            print("new_relationships--------", new_relationships)
            sub = Subgraph(relationships=new_relationships)
            tx.create(sub)

        tx.commit()
示例#14
0
文件: DBConc.py 项目: zjssss/FF-AID
 def deleteRelationships(self, rtype):
     try:
         subG = Subgraph(relationships=self.graph.relationships.match(
             r_type=rtype))
         # self.graph.create(subG)
         self.graph.separate(subG)
     except ValueError as e:
         print(e)
示例#15
0
 def to_subgraph(self):
     """ Convert a RecordList into a Subgraph.
     """
     entities = []
     for record in self.records:
         for value in record:
             if isinstance(value, (Node, Path)):
                 entities.append(value)
     return Subgraph(*entities)
示例#16
0
    def import_harmonized_attribute(self, harmonized_attribute):
        system = harmonized_attribute['system']
        entity = harmonized_attribute['entity']
        attribute = harmonized_attribute['attribute']
        logger.info(
            f'Importing HarmonizedAttribute {system}.{entity}.{attribute} ...')

        ha_node = self.mdr_graph.get_harmonized_attribute(
            system, entity, attribute)

        if ha_node is not None:  # already exists. Skip
            # TODO: Update
            return
        ha_node = self.mdr_graph.create_harmonized_attribute(
            system, entity, attribute)
        ha_node['definition'] = harmonized_attribute['definition']

        subgraph = Subgraph([ha_node])

        # to-do: What's created here is empty. Is it updated later anywhere? If not,
        # ...is this just something that Dazhi never got to? - joeflack4 2021/11/30
        cs_node = self.mdr_graph.create_code_set()
        subgraph |= cs_node
        subgraph |= Relationship(ha_node, 'HAS_MEANING', cs_node)

        # node_attributes: Looks like will only be mappings, of the format:
        # ...<MODEL>:<ENTITY>.<ATTR> - joeflack4 2021/11/19
        if 'node_attributes' in harmonized_attribute:
            # TODO: Shouldn't exact_mapping be nested within node_attributes or
            #  ...node_attributes/mappings?= instead? (updated here and in
            #  ...CRDCH.import_harmonized_attributes()
            for node_attribute in harmonized_attribute['node_attributes']:
                try:
                    system, entity_attribute = node_attribute.split(':')
                    entity, attribute = entity_attribute.split('.')
                except ValueError as e:
                    logger.error(
                        f'Failed to parse the mapping attribute name: {node_attribute}'
                    )
                    logger.error(e)
                    continue
                na_node = self.mdr_graph.get_node_attribute(
                    system, entity, attribute)

                if na_node is None:
                    logger.warning(node_attribute + ' not found in database')
                else:
                    subgraph |= Relationship(na_node, 'MAPS_TO', ha_node)

        tx = self.graph.begin()
        tx.create(subgraph)
        self.graph.commit(tx)

        logger.info(
            f'Importing HarmonizedAttribute {system}.{entity}.{attribute} was successful'
        )
示例#17
0
def import_assign():
    df = pd.read_csv('company_data/分红.csv')
    names = df['schemetype'].values

    nodes = []
    for name in tqdm(names):
        node = Node('assign', name=name)
        nodes.append(node)

    graph.create(Subgraph(nodes))
示例#18
0
def import_violations():
    df = pd.read_csv('company_data/违规类型.csv')
    names = df['gooltype'].values

    nodes = []
    for name in tqdm(names):
        node = Node('violations', name=name)
        nodes.append(node)

    graph.create(Subgraph(nodes))
示例#19
0
def import_industry():
    df = pd.read_csv('company_data/行业.csv')
    names = df['orgtype'].values

    nodes = []
    for name in tqdm(names):
        node = Node('industry', name=name)
        nodes.append(node)

    graph.create(Subgraph(nodes))
示例#20
0
def import_bond():
    df = pd.read_csv('company_data/债券类型.csv')
    names = df['securitytype'].values

    nodes = []
    for name in tqdm(names):
        node = Node('bond', name=name)
        nodes.append(node)

    graph.create(Subgraph(nodes))
示例#21
0
 def create_node(self, names, label):
     nodes = []
     for name in tqdm(names):
         if list(self.node_match.match(label, name=name))==[]:
             nodes.append(Node(label, name=name))
     if nodes!=[]:
         subgraf = Subgraph(nodes)
         self.graf.create(subgraf)
         print('完成创建节点{}个'.format(len(nodes)))
     pass
示例#22
0
 def add_boons(cls, id_, boons=None):
     if boons is None:
         boons = []
     _, _, guide = Guide.find_by_id(id_)
     boon_list = list(
         NodeMatcher(graph_).match("Boon").where(f"_.id in {boons}"))
     relationships = []
     for boon in boon_list:
         relationships.append(Relationship(guide, "HAS_BOON", boon))
     sub_graph = Subgraph(boon_list + [guide], relationships)
     graph_.create(sub_graph)
 def __call__(self, data, indexes=None, *args, **kwargs):
     assert isinstance(
         data, list), "except data to be list, but got %s" % type(data)
     nodes = []
     for datum in data:
         new_node = deepcopy(self.node_template)
         for attr in datum:
             new_node[attr] = datum[attr]
         nodes.append(new_node)
     nodes = Subgraph(nodes)
     self._create(nodes, indexes)
示例#24
0
文件: adcp.py 项目: miaouPlop/pyadcp
    def __simplify_graph(self, graph: Subgraph) -> Subgraph:
        # List edges
        edge_dict = dict()
        edge_deny = dict()
        for edge in graph.relationships():
            source = edge.start_node()['name']
            target = edge.end_node()['name']
            edge_dict.setdefault(source + target, [])
            if not edge.type() in edge_dict[source + target]:
                edge_dict[source + target].append(edge.type())
            else:
                edge['__drop__'] = 1

            edge_deny.setdefault(source + target, False)
            edge_deny[source + target] |= bool(edge['DENY'])

        # Group edges
        new_edges = []
        for edge in graph.relationships():
            source = edge.start_node()['name']
            target = edge.end_node()['name']
            tags = edge_dict.get(source + target, [])
            if len(tags) > 1:
                edge['__drop__'] = 1
                if '__done__' not in tags:
                    new_rel = Relationship(edge.start_node(), ','.join(tags),
                                           edge.end_node())
                    if edge_deny.get(source + target):
                        new_rel['DENY'] = 1
                    edge_dict[source + target].append('__done__')

                    new_edges.append(new_rel)

        # Build the simplified graph
        new_graph = Subgraph(
            nodes=[x for x in graph.nodes()],
            relationships=[
                x for x in graph.relationships() if not x['__drop__']
            ] + new_edges)

        return new_graph
示例#25
0
def import_company():
    df = pd.read_csv('company_data/公司.csv')
    eid = df['eid'].values
    name = df['companyname'].values

    nodes = []
    data = list(zip(eid, name))
    for eid, name in tqdm(data):
        profit = np.random.randint(100000, 100000000, 1)[0]
        node = Node('company', name=name, profit=int(profit), eid=eid)
        nodes.append(node)

    graph.create(Subgraph(nodes))
示例#26
0
def import_person():
    df = pd.read_csv('company_data/人物.csv')
    pid = df['personcode'].values
    name = df['personname'].values

    nodes = []
    data = list(zip(pid, name))
    for eid, name in tqdm(data):
        age = np.random.randint(20, 70, 1)[0]
        node = Node('person', name=name, age=int(age), pid=str(eid))
        nodes.append(node)

    graph.create(Subgraph(nodes))
    def expand_node_for_adjacent_nodes_to_subgraph(self, node_id, limit=40):
        """
        get the directly_adjacent_nodes of one node
        :return: return value is a subgraph
        """
        high_quality_query = "Match (n:entity)-[r]-(m:wikidata) where ID(n)={start_id} return distinct r,n,m limit {limit}"
        high_quality_query = high_quality_query.format(start_id=node_id,
                                                       limit=limit)
        media_quality_query = "Match (n:entity)-[r]-(m:api) where ID(n)={start_id} return distinct r,n,m limit {limit}"
        media_quality_query = media_quality_query.format(start_id=node_id,
                                                         limit=limit)
        low_quality_query = "Match (n:entity)-[r]-(m:entity) where ID(n)={start_id} return distinct r,n,m limit {limit}"
        low_quality_query = low_quality_query.format(start_id=node_id,
                                                     limit=limit)
        try:

            nodes = []
            relationships = []
            # todo speed up this by multiple thread
            record_list_for_all_relation = self.graph.run(high_quality_query)

            for record in record_list_for_all_relation:
                r = record["r"]
                relationships.append(r)
                nodes.append(record["n"])
                nodes.append(record["m"])

            record_list_for_all_relation = self.graph.run(media_quality_query)

            for record in record_list_for_all_relation:
                r = record["r"]
                relationships.append(r)
                nodes.append(record["n"])
                nodes.append(record["m"])

            record_list_for_all_relation = self.graph.run(low_quality_query)

            for record in record_list_for_all_relation:
                r = record["r"]
                relationships.append(r)
                nodes.append(record["n"])
                nodes.append(record["m"])

            if nodes:
                return Subgraph(nodes, relationships)
            else:
                return None
        except Exception:
            traceback.print_exc()
            return None
示例#28
0
文件: io.py 项目: pyz2020/comptox_ai
    def add_edges(self, edges: List[tuple]):
        """
        Add a list of edges to the graph and synchronize them to the remote
        database.
        """
        es = []
        # Since we have to synchronize changes as a single chunk, it's not as
        # simple as calling add_edge() for every element of `edges`.
        for e in edges:
            u, rel_type, v, props = e
            ee = Relationship(u, rel_type, v, props)
            es.append(ee)

        self._graph.create(Subgraph(es))
示例#29
0
 def bacth_node_label(self, label, entity_labes):
     tx = self.graph.begin()
     newnodelist = []
     oldnodelist = []
     matcher = NodeMatcher(self.graph)
     for data in entity_labes:
         node = matcher.match(name=data).first()
         if node is None:
             oneNode = Node()
             oneNode.add_label(label=label)
             oneNode["name"] = data
             newnodelist.append(oneNode)
         else:
             node.add_label(label=label)
             oldnodelist.append(node)
     if len(newnodelist) > 0:
         newsub = Subgraph(newnodelist)
         print("newnodelist----", newnodelist)
         tx.create(newsub)
     if len(oldnodelist) > 0:
         oldsub = Subgraph(oldnodelist)
         print("oldnodelist----", oldnodelist)
         tx.push(oldsub)
     tx.commit()
示例#30
0
 def batch_node(self, entitys_items):
     tx = self.graph.begin()
     newnodes = []
     matcher = NodeMatcher(self.graph)
     for data in entitys_items:
         node = matcher.match(name=data).first()
         if node is None:
             oneNode = Node()
             oneNode["name"] = data
             newnodes.append(oneNode)
     if len(newnodes) > 0:
         print("newnodes---------", newnodes)
         sub = Subgraph(newnodes)
         tx.create(sub)
     tx.commit()