def load_industry_category_layer4(sheet): row_count = 0 new_industry_category_node_count = 0 common_category_count = 0 node_matcher = NodeMatcher(graph) for rowx in range(1,sheet.nrows): stock_name = sheet.row_values(rowx)[1] listed_company_node = node_matcher.match("上市公司", name = stock_name).first() if listed_company_node == None : listed_company_node = Node("上市公司", name = stock_name) industry_category_layer4_tmp = sheet.row_values(rowx)[13] #原始数据有些括号,node_matcher.match(label,product_category)会出错,处理一下 industry_category_layer4 = re.sub("\\(.*\\)|\\{.*?}|\\[.*?]", "", industry_category_layer4_tmp).split(';') row_count += 1 print(row_count, ': ', listed_company_node, industry_category_layer4) # """ for industry_category in industry_category_layer4: #多个上市公司的行业类别可能是重合的,先判断行业类别是否存在,如果已经存在,不创建。 industry_category_node = node_matcher.match(industry_category_layer4_label.name, name = industry_category).first() if (industry_category_node == None ): industry_category_node = Node(industry_category_layer4_label.name, name=industry_category) graph.create(industry_category_node) new_industry_category_node_count += 1 print(industry_category_node,new_industry_category_node_count) else: common_category.append(industry_category+"\/n") common_category_count += 1 print("重合类型: ",industry_category, common_category_count) #程序跑错了,重复跑,之前已经建立了部分relation可能会重复,需要检查一下是否relation已经存在,有机会改造一下这块 relationship = Relationship(listed_company_node, industry_category_relation, industry_category_node) graph.create(relationship) print(relationship)
def load_category_and_relationship(sheet,column_num,label_name, relation_name): row_count = 0 common_concept_count = 0 node_matcher = NodeMatcher(graph) for rowx in range(1,sheet.nrows): stock_name = sheet.row_values(rowx)[1] listed_company_node = node_matcher.match("上市公司", name = stock_name).first() if listed_company_node == None : listed_company_node = Node("上市公司", name = stock_name) row_content_tmp = sheet.row_values(rowx)[column_num] if len(row_content_tmp) == 0 : print("内容没有,需要补全") break #原始数据有些括号,node_matcher.match(label,product_category)会出错,处理一下 row_content_list = re.sub("\\(.*\\)|\\{.*?}|\\[.*?]", "", row_content_tmp).split(';') row_count += 1 print(row_count, ': ', listed_company_node, row_content_list) # """ for row_content in row_content_list: #多个上市公司的概念板块可能是重合的,先判断概念是否存在,如果已经存在,不创建。 category_node = node_matcher.match(label_name, name = row_content).first() if (category_node == None ): category_node = Node(label_name, name=row_content) graph.create(category_node) print(category_node) else: print("分类已存在,无需重复创建") common_category.append(category_node) #程序跑错了,重复跑,之前已经建立了部分relation可能会重复,需要检查一下是否relation已经存在,有机会改造一下这块 relationship = Relationship(listed_company_node, relation_name, category_node) graph.create(relationship) print(relationship)
def add_propterty(): STOCK_NAME_LABEL = '股票名称' COMPANY_NAME_LABEL = '公司名称' STOCK_CODE_PROPERTY = '股票代码' COMPANY_STOCK_RELATIONSHIP = '股票名称是' row_count = 0 node_matcher = NodeMatcher(graph) for rowx in range(1,sheet.nrows): row_count += 1 stock_code = sheet.row_values(rowx)[0] stock_name = sheet.row_values(rowx)[1] company_name = sheet.row_values(rowx)[2] # print(row_count, stock_code, stock_name, company_name) data_list = node_matcher.match(STOCK_NAME_LABEL,name = stock_name) #检查1、是否有没在图谱中出现的新股票 #检查2、图谱中是否有重复股票名称(数据清洗) if len( list(data_list) ) < 1 : print("没有此股票名称") stock_node = Node(STOCK_NAME_LABEL, name = stock_name) # graph.push(stock_node) print(row_count, stock_code, stock_name, company_name) elif len( list(data_list) ) > 1 : for data in data_list : print(data) print(row_count, stock_code, stock_name, company_name) else : stock_node = data_list.first() stock_node[STOCK_CODE_PROPERTY] = stock_code company_node = Node(COMPANY_NAME_LABEL, name = company_name) graph.create(company_node) company_stock_relation = Relationship(company_node, COMPANY_STOCK_RELATIONSHIP, stock_node) graph.create(company_stock_relation) print(row_count, company_node)
def __init__(self, *args, **kwargs): super(IntegrationTestCase, self).__init__(*args, **kwargs) self.graph = Graph() self.node_matcher = NodeMatcher(self.graph) self.db = self.graph.database self.schema = self.graph.schema self.unique_string = self.unique_string_generator()
def match(): """这里的节点是正常的,它有两个属性name和age name是Liz age是34 match("Person").where(age=34).first() 正常 match("Person").where(name='Liz').first() 正常 match("Person", name="Liz").first() 正常 match("Person", age=34).first() 正常 match("Person", age=34).where(name="Liz").first() None match("Person", name="Liz").where(age=34).first() None """ matcher_1 = NodeMatcher(graph) matcher_2 = RelationshipMatcher(graph) # TODO: 这里的 age 属性使用后返回结果为 None node = matcher_1.match("Person", name="Liz").where(age=34).first() relation = matcher_2.match(r_type='FRIENDS') return list(relation), node, type(relation)
def get_command_last_run(course_key, graph): """ This information is stored on the course node of a course in neo4j Args: course_key: a CourseKey graph: a py2neo Graph Returns: The datetime that the command was last run, converted into text, or None, if there's no record of this command last being run. """ matcher = NodeMatcher(graph) course_node = matcher.match("course", course_key=str(course_key)).first() last_this_command_was_run = None if course_node: last_this_command_was_run = course_node['time_last_dumped_to_neo4j'] return last_this_command_was_run
def nodes(self): """ A :class:`.NodeMatcher` for this graph. This can be used to find nodes that match given criteria: >>> graph = Graph() >>> graph.nodes[1234] (_1234:Person {name: 'Alice'}) >>> graph.nodes.get(1234) (_1234:Person {name: 'Alice'}) >>> graph.nodes.match("Person", name="Alice").first() (_1234:Person {name: 'Alice'}) Nodes can also be efficiently counted using this attribute: >>> len(graph.nodes) 55691 >>> len(graph.nodes.match("Person", age=33)) 12 """ return NodeMatcher(self)
#coding: utf-8 from py2neo import Graph, Node, Relationship from py2neo.matching import NodeMatcher, RelationshipMatcher from py2neo.ogm import Label from com.wdk.stock.knowledgegraph.py2neo_advanced_tools import create_node # 参考: https://blog.csdn.net/sinat_26917383/article/details/79901207 # 连接neo4j数据库 graph = Graph("http://127.0.0.1:7474", username="******", password="******") node_matcher = NodeMatcher(graph) create_node(graph, "自然人", "王麻子") create_node(graph, "美食", "鸡翅") """ # *********START:Label操作集及示例************ # node关于label的API: # labels() 返回node的标签的集合 # has_label(label) node是否有这个标签 # add_label(label) 给node添加标签 # remove_label(label) 删除node的标签 # clear_labels() 清楚node的所有标签 # update_labels(labels) 添加多个标签,注labels为可迭代的 # START: 创建Label,可以Label(),但似乎也要和node在一起操作,否则没办法push到graph # 一个Node上叠加多个Label:node.add_label('labelname_x') # 删除graph的label暂时无解,只能把所有节点的该label删除
def new_keanu_acted_in(self): keanu_node = NodeMatcher(self.graph).match( "Person", name="Keanu Reeves").first() keanu_acted_in = RelatedObjects(keanu_node, OUTGOING, "ACTED_IN", Film) return keanu_acted_in
def movie_matcher(movie_graph): return NodeMatcher(movie_graph)
def __init__(self): self.tx = g.begin() self.schema = Schema(g) self.n_matcher = NodeMatcher(g) self.r_matcher = RelationshipMatcher(g)
def __post_init__(self): self.nmatcher = NodeMatcher(self.graph) self.rmatcher = RelationshipMatcher(self.graph)
def setUp(self): self.graph.delete_all() with open(path_join(dirname(__file__), "..", "resources", "movies.cypher")) as f: cypher = f.read() self.graph.run(cypher) self.matcher = NodeMatcher(self.graph)