class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self, value): answer = self.graph.match_one(label="Item", property_key="title", property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self, value): answer = self.graph.match_one(label="HudongItem", property_key="title", property_value=value) return answer # 返回所有已经标注过的互动百科item filename为labels.txt def getLabeledHudongItem(self, filename): labels = readCSV2(filename) List = [] i = 0 for line in labels: ctx = self.graph.match_one(label="HudongItem", property_key="title", property_value=line[0]) if ctx == None: continue cur = HudongItem(ctx) cur.label = line[1] List.append(cur) print('load LabeledHudongItem over ...') return List # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List #test = Neo4j() #test.connectDB() #answer = test.graph.match_one(label="HudongItem",property_key="title",property_value='火龙果') #print(answer) #a = test.getLabeledHudongItem('labels.txt') #print(a[10].openTypeList)
class DiseasePipeline(object): def __init__(self): self.graph = Graph(NEO4J_URL, auth = (NEO4J_USERNAME, NEO4J_PASSWORD)) self.graph.delete_all() # self.file = open('test.txt', "a+") def process_item(self, item, spider): # self.file.write(str(item) + '\n\n') # self.file.flush() item['name'] = item['name'].strip() node = self.graph.nodes.match('disease', name = item['name']).first() if node is None: # 如果不存在这种疾病,那就创建它 node = Node('disease', **item) self.graph.create(node) node = self.graph.nodes.match('disease', name = item['name']).first() else: # 如果已经存在了这个疾病,那就更新它 node.update(item) self.graph.merge(node, 'disease', 'name') # 建立相关疾病的联系 relatedDiseases = item['relatedDisease'] for disease in relatedDiseases: disease = disease.strip() newNode = self.graph.nodes.match('disease', name = disease).first() if newNode is None: # 如果不存在这种疾病,那就创建它,从而能够建立联系 newNode = Node('disease', name = disease) self.graph.create(newNode) newNode = self.graph.nodes.match('disease', name = disease).first() # 查询两种疾病之间是否存在相关联系,若不存在,则创建这个联系 r = Relationship(node, "ralate", newNode) if self.graph.match_one((node, newNode), r_type = 'relate') is None: self.graph.create(r) # 建立疾病与症状之间的联系 symptoms = item['typicalSymptom'].split('、') for symptom in symptoms: symptom = symptom.strip() # 消除多余的空格 newNode = self.graph.nodes.match('symptom', name = symptom).first() if newNode is None: # 如果不存在这个症状,那就创建它 newNode = Node('symptom', name = symptom) self.graph.create(newNode) newNode = self.graph.nodes.match('symptom', name = symptom).first() # 查询两种疾病之间是否存在伴随联系,若不存在,则创建这个联系 r = Relationship(node, 'have', newNode) if self.graph.match_one((node, newNode), r_type = 'have') is None: self.graph.create(r)
class UserGraphService(UserGraph): def __init__(self): self.graph = None self.label = "Account" self.user_dao = UserDAO() self.address_dao = AddressDAO() self.transaction_dao = TransactionDAO() def get_connect(self): self.graph = Graph("http://127.0.0.1:7474", username="******", password="******") def get_pay_chain_for_two_address(self, source_address, destination_address): # show graph in browser # MATCH (n:Account{name:'cdda6a23-3280-43fe-a159-07385c12b9ca'})-->(m:Account{name:'4f4cf283-db85-490f-9b95-ad87ac21e7c6'}) RETURN n, m; source_code = self.address_dao.get_address_by_address( source_address)['user']['code'] dest_code = self.address_dao.get_address_by_address( destination_address)['user']['code'] account_source_node = self.get_a_user_node_by_code(source_code) account_dest_node = self.get_a_user_node_by_code(dest_code) source_node_pay_destination_node = self.graph.match_one( start_node=account_source_node, end_node=account_dest_node, bidirectional=False) print(source_node_pay_destination_node)
def insertDiseaseAndDrug(disease_name, drug_name): graph = Graph(host="52.15.135.11", username="******", password="******") #test if the disease exist disease = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=disease_name, y="Disease").evaluate() if disease == None: disease = Node(label="Disease", name=disease_name) graph.create(disease) #test if the drug exist drug = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=drug_name, y="Drug").evaluate() if drug == None: drug = Node(label="Drug", name=drug_name) graph.create(drug) disease2drug = graph.match_one(start_node=disease, end_node=drug) if disease2drug == None: disease2drug = Relationship(disease, "CALL", drug) disease2drug['count'] = 1 graph.create(disease2drug) else: disease2drug['count'] += 1 graph.push(disease2drug)
def insertSympAndDisease(symptom_name, disease_name): graph = Graph(host="52.15.135.11", username="******", password="******") #test if the symptom exist symptom = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=symptom_name, y="Symptom").evaluate() if symptom == None: symptom = Node(label="Symptom", name=symptom_name) graph.create(symptom) #test if the disease exist disease = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=disease_name, y="Disease").evaluate() if disease == None: disease = Node(label="Disease", name=disease_name) graph.create(disease) symptom2disease = graph.match_one(start_node=symptom, end_node=disease) if symptom2disease == None: symptom2disease = Relationship(symptom, "CALL", disease) symptom2disease['count'] = 1 graph.create(symptom2disease) else: symptom2disease['count'] += 1 graph.push(symptom2disease)
def main(): bolt_url = default(prompt('bolt url(default=bolt://localhost:7687): '), 'bolt://localhost:7687') username = default(prompt('username(defalt=neo4j): '), 'neo4j') password = prompt('password: '******'MATCH (n) DETACH DELETE n') tx = graph.begin() alice = Node("Person", name="Alice") tx.create(alice) bob = Node("Person", name="Bob") relationship = Relationship(alice, "KNOWS", bob) tx.create(relationship) tx.commit() for rel in graph.match(start_node=alice, rel_type="KNOWS"): print(rel.end_node()["name"]) print(graph.exists(relationship)) a = graph.find_one(label="Person", property_key="name", property_value="Alice") r = graph.match_one(start_node=a, rel_type="KNOWS") print(r.end_node()['name'])
class NeoAccessor(): ## tools 类型 EntityType = 'Person' ## 匹配Node使用的key名称 MatchKey = 'name' def __init__(self): self.graph = Graph(host='localhost', port=7687, password='') self.nodeMatcher = NodeMatcher(self.graph) def matchNode(self, key, value): return self.graph.nodes.match(self.EntityType).where( ' _.{} = "{}" '.format(key, value)) #return self.nodeMatcher.match(self.graph).where(' _.{} = "{}" '.format(key, value)) def nodeMap(self, node): key = self.MatchKey value = node[key] node_list = self.matchNode(key, value) if node_list.__len__() > 0: ## 源Entity -> Neo4j Entity映射计算,获得对应的Entity ID return node_list.first() else: ## 如果Neo4j 暂无当前Entity,则添加新的Node return self.addNode(node) return None def findNode(self, uuid): this_node = self.graph.nodes.match(self.EntityType).where( ' _.{} = "{}" '.format('uuid', uuid)) if not this_node: return None if this_node.__len__() != 1: print sys.stderr << 'multiple node has same uuid.' return None return this_node.first() def findNodeOne(self, key, value): this_node = self.graph.nodes.match(self.EntityType).where( ' _.{} = "{}" '.format(key, value)) return this_node def addNode(self, node): self.graph.create(node) return self.findNode(node.__uuid__) def findRelationOne(self, start_node, end_node, r_type): #return self.graph.relationships.match().first() return self.graph.match_one(nodes=[start_node, end_node], r_type=r_type)
def locus_list_to_locations(cls, graph: Graph, locus_df: pd.DataFrame, column_name: str): """locus_list_to_locations - lookup H37Rv coordinates of a list of gene/pseudogene/rrnas graph - py2neo Graph object locus_df - pandas DataFrame with names of loci column_name - name of column in locus_df to use for locus name """ matcher = NodeMatcher(graph) info = [] for i, row in locus_df.iterrows(): locus = row[column_name] gene_match = matcher.match("Gene", uniquename=locus) pseudogene_match = matcher.match("PseudoGene", uniquename=locus) rrna_match = matcher.match("RRna", name=locus) if gene_match: info.append(gene_match.first()) elif pseudogene_match: info.append(pseudogene_match.first()) elif rrna_match: info.append(rrna_match.first()) else: print("not found", locus) assert len(info) == len( locus_df ), "Failed to find all the loci in question {} vs {}".format( len(locus_df), len(info)) locations = [] for item in info: location_r = graph.match_one((item, ), r_type="LOCATED_AT") location = location_r.end_node locations.append( Location( locus=item["uniquename"], start=location["fmin"], end=location["fmax"], strand=location["strand"], )) return locations
class PopItToNeo(object): def __init__(self): config = yaml.load(open("config.yaml")) self.endpoint = "https://sinar-malaysia.popit.mysociety.org/api/v0.1" # you know so that you can override this. why? I am not sure self.membership_field = "memberships" self.person_field = "persons" self.organization_field = "organizations" self.post_field = "posts" self.graph = Graph(config["graph_db"]) if config["refresh"] == True: self.graph.delete_all() # Because I am still not familiar to query with cypher # So lets cache here. Hopefully the memory usage don't kill me self.organization_processed = {} self.person_processed = {} self.post_processed = {} def process_membership(self): # So lets start from membership membership_url = "%s/%s" % (self.endpoint, self.membership_field) while True: logging.warning("Processing %s" % membership_url) data = self.fetch_entity(membership_url) logging.warning("Processing membership") entries = data["result"] for entry in entries: # a membership have 3 important field, person_id, organization_id, posts_id if not (entry.get("person_id") and entry.get("organization_id")): continue person = self.fetch_person(entry["person_id"]) if not person: continue role = entry.get("role","member") if not role: role = "member" logging.warning("Role: %s" % role) params = [] # This happens only once anyway kwparams = {} kwparams["popit_id"] = entry["id"] start_date = get_timestamp(entry.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entry.get("end_date")) if end_date: kwparams["end_date"] = end_date post_exist = False if entry.get("post_id"): post = self.fetch_post(entry["post_id"]) if not post: continue if self.graph.match_one(person, role, post): post_exist = True logging.warning("Already exist, skipping") if not post_exist: relationship = Relationship(person, role, post, **kwparams) self.graph.create(relationship) organization_exist = False if entry.get("organization_id"): organization = self.fetch_organization(entry["organization_id"]) if not organization: continue if self.graph.match_one(person, role, organization): logging.warning("Already exist, skipping") organization_exist = True if not organization_exist: relationship = Relationship(person, role, organization, **kwparams) self.graph.create(relationship) if data.get("next_url"): membership_url = data.get("next_url") else: break def fetch_person(self, person_id): if person_id in self.person_processed: logging.warning("Person %s fetch from cache" % person_id) return self.person_processed[person_id] node = self.graph.find_one("Persons", "popit_id", person_id) if node: logging.warning("Already exist, skipping") self.person_processed[person_id] = node return node person_url = "%s/%s/%s" % (self.endpoint, self.person_field, person_id) data = self.fetch_entity(person_url) if not data: # Don't assume that this id won't be created the next time logging.warning("person not exist %s" % person_id) return None logging.warning("Fetching person") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] logging.warning("Name: %s" % name) kwparam = {} birth_date = get_timestamp(entity.get("birth_date")) if birth_date: kwparam["birth_date"] = birth_date death_date = get_timestamp(entity.get("death_date")) if death_date: kwparam["death_date"] = death_date kwparam["name"] = name kwparam["popit_id"] = entity["id"] node = Node("Persons", **kwparam) self.graph.create(node) self.person_processed[entity["id"]] = node return node def fetch_organization(self, organization_id): if organization_id in self.organization_processed: logging.warning("Organization %s fetch from cache" % organization_id) return self.organization_processed[organization_id] node = self.graph.find_one("Organization", "popit_id", organization_id) if node: logging.warning("Already exist, skipping") self.organization_processed[organization_id] = node return node organization_url = "%s/%s/%s" % (self.endpoint, self.organization_field, organization_id) data = self.fetch_entity(organization_url) if not data: logging.warning("Organization don't exist %s" % organization_id) return None logging.warning("Fetch orgnanization") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] kwparams = {} logging.warning("Name: %s" % name) kwparams["name"] = name kwparams["popit_id"] = entity["id"] founding_date = get_timestamp(entity.get("founding_date")) if founding_date: kwparams["founding_date"] = founding_date dissolution_date = get_timestamp(entity.get("dissolution_date")) if dissolution_date: kwparams["dissolution_date"] = dissolution_date if "classification" in entity: logging.warning("Classification:%s" % entity["classification"]) kwparams["classification"] = entity["classification"] node = Node("Organization", **kwparams) self.graph.create(node) self.organization_processed[entity["id"]] = node return node def fetch_post(self, post_id): if post_id in self.post_processed: logging.warning("Post %s fetch from cache" % post_id) return self.post_processed[post_id] node = self.graph.find_one("Posts", "popit_id", post_id) if node: logging.warning("Already exist, skipping") self.post_processed[post_id] = node return node post_url = "%s/% s/%s" % (self.endpoint, self.post_field, post_id) data = self.fetch_entity(post_url) if not data: logging.warning("Post don't exist %s" % post_id) return None logging.warning("Fetch post") entity = data["result"] # Fetch organization node, because post is link to organization # What is the implication of post without organization? try: if entity.get("organization_id"): organization = self.fetch_organization(entity["organization_id"]) else: organization = None except Exception as e: logging.warning(e.message) organization = None logging.warning("Label: %s" % entity["label"]) kwparams = {} kwparams["name"] = entity["label"] kwparams["popit_id"] = entity["id"] start_date = get_timestamp(entity.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entity.get("end_date")) if end_date: kwparams["end_date"] = end_date node = Node("Posts", **kwparams) self.graph.create(node) self.post_processed[entity["id"]] = node if organization: temp_param = {} if start_date: temp_param["start_date"] = start_date if end_date: temp_param["end_date"] = end_date relation = Relationship(node, "of", organization, **kwparams) self.graph.create(relation) return node def process_parent_company(self): organizations_url = "%s/%s" % (self.endpoint, self.organization_field) while True: data = self.fetch_entity(organizations_url) entries = data["result"] for entry in entries: if not entry.get("parent_id"): logging.warning("No parent id, moving on") continue else: logging.warning(entry.get("parent_id")) # TODO: Dafuq this is not DRY. parent_node = self.fetch_organization(entry["parent_id"]) if not parent_node: continue child_node = self.fetch_organization(entry["id"]) parent_relationship = Relationship(parent_node, "parent_of", child_node) if self.graph.match_one(parent_node, "parent_of", child_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue self.graph.create(parent_relationship) if self.graph.match_one(child_node, "child_of", parent_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue child_relationship = Relationship(child_node, "child_of", parent_node) self.graph.create(child_relationship) if "next_url" in data: organizations_url = data["next_url"] logging.warning(organizations_url) else: break def process_posts(self): post_url = "%s/%s" % (self.endpoint, self.post_field) while True: data = self.fetch_entity(post_url) entries = data["result"] for entry in entries: node = self.fetch_post(entry["id"]) self.graph.create(node) # Since creating organization relationship is already part of getting post # ourjob is done here if "next_url" in data: post_url = data["next_url"] logging.warning(post_url) else: break def fetch_entity(self, url): r = requests.get(url) time.sleep(0.1) if r.status_code != 200: # Just to make output consistent, excception did not kill the script anyway return {} return r.json()
print("------------> selector.select('Person')") persons = selector.select('Person') print(list(persons)) # 错误 print( "----------*****--> selector.select('Person').where('_.name =~ \"A.*\"').first()" ) persons2 = selector.select('Person').where('_.name =~\'A.*\'').first() print(persons2) print("------------> KNOWS") # persons2 = selector.select().where(rel_type='KNOWS') # print(persons2) relationship = graph.match_one(rel_type='KNOWS') print(relationship) relationship = graph.match_one(a, rel_type='KNOWS') print(relationship) relationship = graph.match_one(a, 'KNOWS', b) print(relationship) for rel in graph.match(start_node=a, rel_type="KNOWS"): print(rel.end_node()["name"]) for rel in graph.match(rel_type="KNOWS", end_node=b): print('--------> start_node', rel.start_node()["name"]) for rel in graph.match(start_node=a, end_node=b):
class Cq(object): def __init__(self): """ :return: """ self.id = '' self.subject = '' self.message = '' self.created_date = '' self._graph_db = Graph(settings.DATABASE_URL) @property def cq_properties(self): """ :return: """ properties_dict = dict(self.__dict__) del properties_dict['_graph_db'] return properties_dict @property def cq_node(self): """ :return: """ if self.id != '': return self._graph_db.find_one(GraphLabel.CQ, property_key='id', property_value=self.id) @property def response_list(self): """ list of responses to this CQ :return: list of responses """ cq_response_relationship = self._graph_db.match(start_node=self.cq_node, rel_type=GraphRelationship.TO, end_node=None) response_list = [] for rel in cq_response_relationship: response = rel.end_node.properties user_response_relationship = self._graph_db.match_one(start_node=None, rel_type=GraphRelationship.RESPONDED, end_node=self.cq_node) user_node = user_response_relationship.start_node response['by'] = '%s / %s' % (user_node.properties['name'], user_node.properties['call_sign']) response_list.append(response) return response_list @staticmethod def create_cq(user_node, cq_dict): cq_dict['id'] = str(uuid.uuid4()) cq_dict['created_date'] = datetime.date.today() cq_node = Node.cast(GraphLabel.CQ, cq_dict) cq_node, = Graph(settings.DATABASE_URL).create(cq_node) cq_relationship = Relationship(user_node, GraphRelationship.SENT, cq_node) Graph(settings.DATABASE_URL).create_unique(cq_relationship) @staticmethod def most_recent_cqs(): params = { } cypher_str = "" match_results = Graph(settings.DATABASE_URL).cypher.execute(statement=cypher_str, parameters=params) cq_list = [] cq = {} for item in match_results: cq['id'] = item.id cq['subject'] = item.subject cq['message'] = item.message cq['created_date'] = item.created_date cq_list.append(cq) root = {} root['cqs'] = cq_list return root def response(self, response_id): """ response dictionary details including user details :param response_id: :return: dict with response details and a dict of the user who made the response """ response_node = self._graph_db.find_one(GraphLabel.RESPONSE, property_key='id', property_value=response_id) response_user_relationship = self._graph_db.match_one(start_node=None, rel_type=GraphRelationship.RESPONDED, end_node=response_node) response_dict = {} response_dict['response'] = response_node.auto_sync_properties response_dict['user'] = response_user_relationship.start_node.properties return response_dict
tempUser = row[0] userNeo = neo4jGraph.find_one("User", property_key="userID", property_value=row[0]) #Caching persona for quick retreival if row[1] not in personaDict: personaNeo = neo4jGraph.find_one("Persona", property_key="personaID", property_value=row[1]) personaDict[row[1]] = personaNeo else: personaNeo = personaDict[row[1]] #avoiding having empty user or persona as not all users come in if not userNeo or not personaNeo: continue #Find if relationship already exists relationshipNeo = neo4jGraph.match_one(start_node=userNeo, rel_type="IS_A", end_node=personaNeo) #makes relationship if not if not relationshipNeo: relationship = Relationship(userNeo, "IS_A", personaNeo) if args.verbose: printToLog("Creating", relationship) neo4jGraph.create(relationship) ##Users_Friends if not mysqlConnection.is_connected(): mysqlConnection.reconnect(attempts=2, delay=1) cursor.execute("select user_id, friend_id from user_friends where friend_id in (select user_id from user) and user_id in (select user_id from user where join_date between %s and %s)", (twodays, yesterday)) if args.verbose:
class DBConc(object): def __init__(self): self.graph = Graph("http://liublack.cn:7474", auth=("neo4j", "200001")) def search_one(self, label, name): node = self.graph.nodes.match(label, name=name).first() if label == 'disease': return Disease(Node) elif label == 'symptom': return Symptom(node) return None def search(self, label, **keys): pass def exist(self, label, name): node = self.graph.nodes.match(label, name=name).first() return node is not None def insertDisease(self, disease): if self.exist('disease', disease['name']): return if type(disease) != Disease: raise Exception('type(disease) not equals Disease') diseaseNode = Node('disease', **disease.data) self.graph.create(diseaseNode) def insertSymptom(self, symptom): if self.exist('symptom', symptom['name']): return if type(symptom) != Symptom: raise Exception('type(symptom) not equals Symptom') symptomNode = Node('symptom', **symptom.data) self.graph.create(symptomNode) def deleteRelationships(self, rtype): try: subG = Subgraph(relationships=self.graph.relationships.match( r_type=rtype)) # self.graph.create(subG) self.graph.separate(subG) except ValueError as e: print(e) def establishRelationship(self, left, right, rtype, pname, correlation=None): self.deleteRelationships(rtype) nodes = self.graph.nodes.match(left) for lnode in nodes: print(type(lnode)) names = lnode[pname] for name in names: rnode = self.graph.nodes.match(right, name=name).first() if rnode is None: continue if self.graph.match_one((lnode, rnode), r_type=rtype) is None: if correlation is not None: # 计算相关性 try: value = correlation.similarity( lnode['name'], rnode['name']) value = (value + 1) / 2 except KeyError as e: print(e) value = 1 else: value = 1 r = Relationship(lnode, rtype, rnode, value=value) self.graph.create(r) def establishAllRelationship(self, correlation=None): self.establishRelationship('disease', 'disease', 'd-d', 'relatedDiseases', correlation=correlation) self.establishRelationship('disease', 'symptom', 'd-s', 'typicalSymptoms', correlation=correlation) self.establishRelationship('symptom', 'symptom', 's-s', 'relatedSymptoms', correlation=correlation) def getDSCorrelation(self, label='correlate', alpha=0.3, maxDepth=5, wvmodel=None): symptomSet = set() diseaseSet = set() result = {} with open(Configurer.SYMPTOM_DICT_PATH, 'r') as f: for line in f.readlines(): symptomSet.add(line.split(' ')[0]) logging.info('症状集加载完毕') with open(Configurer.DISEASE_DICT_PATH, 'r') as f: for line in f.readlines(): diseaseSet.add(line.split(' ')[0]) logging.info('疾病集合加载完毕') f = open(Configurer.DS_CORRELATION_PATH, 'w') for disease in diseaseSet: result[disease] = {} for symptom in symptomSet: result[disease][symptom] = '' try: statement = 'match (p1:disease {name: "%s"}), (p2:symptom {name:"%s"}), p = shortestpath((p1)-[*..%d]-(p2)) return p' % ( disease, symptom, maxDepth) cursor = self.graph.run(statement) path = cursor.current['p'] if cursor.forward() else None except Exception as e: path = None if path: value, frac, n = (0, 0, 0) for entity in walk(path): if isinstance(entity, Relationship): value += entity['value'] frac += (1 + alpha)**n n += 1 value /= frac result[disease][symptom] = (value, 'shortest path') elif wvmodel: try: value = wvmodel.similarity(disease, symptom) value = (value + 1) / 4 result[disease][symptom] = (value, 'w2vModel') except KeyError as e: logging.warning(str(e)) if result[disease][symptom] == '': value = 0.1 result[disease][symptom] = (0.1, 'cannot compute') logging.info('%s - %s - %s by %s\n' % (disease, result[disease][symptom][0], symptom, result[disease][symptom][1])) f.write(str(result[disease]) + '\n') f.flush() # f = open(Configurer.DS_CORRELATION_PATH, 'w') f.write('\n\n\n\n' + str(result)) return result def clearDB(self): self.graph.delete_all() def getSymptomsFromDisease(self, diseaseList): symptomSet = set() for disease in diseaseList: node = self.graph.nodes.match('disease', name=disease).first() rels = self.graph.match((node, ), r_type='have') for r in rels: symptomSet.add(r.end_node['name']) return symptomSet def getDiseaseFromSymptoms(self, symptomList): diseaseSet = set() for symptom in symptomList: node = self.graph.nodes.match('symptom', name=symptom).first() rels = self.graph.match((None, node), r_type='have') for r in rels: diseaseSet.add(r.start_node['name']) return diseaseSet def existHaveRelationship(self, disease, symptom): ndisease = self.graph.nodes.match('disease', name=disease).first() nsymptom = self.graph.nodes.match('symptom', name=symptom).first() if ndisease is None or nsymptom is None: return False rel = self.graph.match_one((ndisease, nsymptom), r_type='have') return rel is not None def getDiseaseDetails(self, diseaseList): details = [] for disease in diseaseList: node = self.graph.nodes.match('disease', name=disease).first() diseaseItem = dict() for pname in self.diseaseItemProerties: diseaseItem[pname] = node[pname] details.append(diseaseItem) return details def getRelatedSymptoms(self, symptomList): symptoms = set() for symptom in symptomList: node = self.graph.nodes.match('symptom', name=symptom).first() rels = self.graph.match((node, None), r_type='relate') relatedSymptoms = [rel.end_node['name'] for rel in rels] symptoms.update(relatedSymptoms) return list(symptoms)
for i in range(len(authors)): # current paper author list j = i + 1 fromAuthor = graph.find_one("Author", "name", authors[i]) if fromAuthor is None: fromAuthor = Node("Author", name=authors[i]) author_publish_paper = Relationship(fromAuthor, "PUBLISH", paper) graph.create(author_publish_paper) while (j < len(authors)): toAuthor = graph.find_one("Author", "name", authors[j]) if toAuthor is None: toAuthor = Node("Author", name=authors[j]) j = j + 1 graph.create(toAuthor) ifExist = graph.match_one(fromAuthor, "CO", toAuthor) if (ifExist is None): fromAuthor_To_toAuthor = Relationship( fromAuthor, "CO", toAuthor) toAuthor_To_fromAuthor = Relationship( toAuthor, "CO", fromAuthor) graph.create(fromAuthor_To_toAuthor) graph.create(toAuthor_To_fromAuthor) else: continue
class NeoRepo(): def __init__(self): self._host = "140.82.17.30" self.g = Graph("http://140.82.17.30", username="******", password="******") def add_user(self, user): n = Node("User", name=user) self.g.merge(n) def add_repo(self, repo): n = Node("Repo", name=repo) self.g.merge(n) def get_user(self, user): n = self.g.find_one("User", property_key='name', property_value=user) return n def get_repo(self, repo): n = self.g.find_one("Repo", property_key='name', property_value=repo) return n def add_rel(self, user, repo, rel_type): user = Node("User", name=user) repo = Node("Repo", name=repo) rel = Relationship(user, rel_type, repo) self.g.merge(rel) def match_user(self, user, rel_type='star'): if isinstance(user, str): user = self.get_user(user) if user is None: return [] match = self.g.match(start_node=user, bidirectional=False, rel_type=rel_type) return match def match_repo(self, repo, rel_type='star'): if isinstance(repo, str): repo = self.get_repo(repo) if repo is None: return [] match = self.g.match(end_node=repo, bidirectional=False, rel_type=rel_type) return match def match_one(self, user, repo, rel_type='star'): if isinstance(user, str): user = self.get_user(user) if isinstance(repo, str): repo = self.get_repo(repo) if user is None: return None if repo is None: return None match = self.g.match_one(start_node=user, end_node=repo, bidirectional=False, rel_type=rel_type) return match def suggest(self, repo): match_repo = self.match_repo(repo) count = {} for item_repo in match_repo: user = item_repo.start_node() # count[user['name']] = 0 match_user = self.match_user(user) for item_user in match_user: repo_suggest = item_user.end_node() if repo_suggest['name'] in count: count[repo_suggest['name']] += 1 else: count[repo_suggest['name']] = 1 if count == {}: return [] return (sorted(count.items(), key=lambda item: item[1], reverse=True))[1:51]
class BayesDiagnoser(): def __init__(self, description = None, symptoms = None, age = None, gender = None): self.description = description self.symptoms = symptoms self.age = age self.gender = gender self.correlation = None self.symptomSet = self.getSymptomSet() # 获取所有可能的症状集合 self.graph = Graph("http://liublack.cn:7474",auth=("neo4j","200001")) # self.connec = DBConc() # self.diseaseItemProerties = ['name', 'department','description', 'position','reason', # 'symptom', 'examination','treatment', 'complication', # 'prevention', 'care', 'typicalSymptoms', 'relatedDiseases'] self.diseaseItemProerties = ['name'] self.diseasesForMoreSymptoms = 3 jieba.set_dictionary(SYMPTOM_DICT_PATH) def diagnose(self, filteNum = None): # 第一步,根据病情描述获取症状列表 symptomList = self.getSymptoms() print('识别到的症状:', symptomList) # 第二步,根据症状列表获取所有相关的疾病 diseaseSet = self._getDiseaseFromSymptoms(symptomList) diseaseList = list(diseaseSet) print('可能的疾病:', diseaseList) # 第三步,计算所有疾病的置信度 scores = self._calDiseaseScore(diseaseList, symptomList) # 第四步,根据置信度得到预测疾病 diseaseList = list(zip(diseaseList, scores)) diseaseList.sort(key = lambda x: x[1], reverse = True) print('疾病的置信度为:', diseaseList) # 第五步,筛选出前diseasesForMoreSymptoms个疾病,并查找相关症状 relatedSymptoms = self._getSymptomsFromDisease(diseaseList[0 : self.diseasesForMoreSymptoms]) relatedSymptoms = list(relatedSymptoms - set(symptomList)) # 第六步,给出最终结果 if filteNum is not None: diseaseList = diseaseList[0 : filteNum] diseaseDetials = self._getDiseaseDetails(diseaseList) # relatedSymptoms = self._getRelatedSymptoms(symptomList) result = {"disease": diseaseDetials, "symptoms":relatedSymptoms} result = json.dumps(result, ensure_ascii = False) print('最终结果:', result) return result def loadCorrelation(self): correlation = dict() with open(DS_CORRELATION_PATH, 'r') as f: for line in f.readlines(): d = eval(line) correlation[d['name']] = d['data'] self.correlation = correlation return correlation def getSymptomSet(self): s = set() with open(SYMPTOM_DICT_PATH, 'r') as f: for line in f.readlines(): s.add(line.strip().split(' ')[0]) return s def getSymptoms(self): if self.symptoms is None: self.symptoms = [] else: map(lambda s: s.strip(), self.symptoms) self.symptoms.extend(self.processDesc()) return self.symptoms def processDesc(self): if self.description is None: return [] self.description = ''.join(self.description.split(' ')) symptoms = [] words = list(jieba.cut(self.description)) print('分词结果:', words) for word in words: if word in self.symptomSet: symptoms.append(word) return symptoms def _calDiseaseScore(self, diseaseList, symptomList): scores = [] if self.correlation is None: self.loadCorrelation() for disease in diseaseList: score = 1.0 for symptom in symptomList: try: score *= self.correlation[disease][symptom][0] except Exception: score *= 0.1 # score *= 1 scores.append(score) return scores def _getSymptomsFromDisease(self, diseaseList): symptomSet = set() for disease in diseaseList: try: statement = 'MATCH (disease {name: "%s"})-[]->(n:symptom) RETURN n.name as name' %disease[0] cursor = self.graph.run(statement) while cursor.forward(): symptomSet.add(cursor.current['name']) except Exception as e: print(e) return symptomSet def _getDiseaseFromSymptoms(self, symptomList): diseaseSet = set() for symptom in symptomList: try: statement = 'MATCH (n:disease)-[]->(symptom {name: "%s"}) RETURN n.name as name' %symptom cursor = self.graph.run(statement) while cursor.forward(): diseaseSet.add(cursor.current['name']) except Exception as e: print(e) return diseaseSet def _existHaveRelationship(self, disease, symptom): ndisease = self.graph.nodes.match('disease', name = disease).first() nsymptom = self.graph.nodes.match('symptom', name = symptom).first() if ndisease is None or nsymptom is None: return False rel = self.graph.match_one((ndisease, nsymptom), r_type = 'd-s') return rel is not None def _getDiseaseDetails(self, diseaseList): details = [{'name' : d[0]} for d in diseaseList ] return details def _getRelatedSymptoms(self, symptomList): symptoms = set() for symptom in symptomList: node = self.graph.nodes.match('symptom', name = symptom).first() rels = self.graph.match((node, None) ,r_type = 's-s') relatedSymptoms = [rel.end_node['name'] for rel in rels ] symptoms.update(relatedSymptoms) return list(symptoms)
#node = graph.data('MATCH (p:Person) return p') flag = 0 matcher = NodeMatcher(graph) a = matcher.match("Director", name=line[0]).first() b = matcher.match("Actor", name=line[1]).first() if (a == None): flag = 1 a = Node('Director', name=line[0]) if (b == None): flag = 1 b = Node('Actor', name=line[1]) if (flag == 1): r = Relationship(a, 'Cooperate', b) r['times'] = 1 s = a | b | r print(s) graph.create(s) if (flag == 0): relation = graph.match_one([a, b]) print(a) print(b) if (relation == None): r = Relationship(a, 'Cooperate', b) r['times'] = 1 s = a | b | r print(s) graph.create(s) else: relation['times'] += 1 graph.push(relation)
#!/usr/bin/env python # -*- coding:utf-8 -*- from pymongo import MongoClient from bson.son import SON from py2neo import Graph, Node, Relationship test_graph = Graph("http://127.0.0.1:7474", username="******", password="******") #test_graph.delete_all() conn = MongoClient('127.0.0.1', 27017) db = conn.Diplomaticdata db.authenticate("Diplomaticer", "77777") posts = db.EventNews #cursor = posts.find({}); cnt = 0 #find_node_1 = Node("Entity", name='习近平') #find_node_2 = Node("Entity", name='李克强') find_node_1 = test_graph.find_one('Entity', 'name', '李克') find_node_2 = test_graph.find_one('Entity', 'name', '习近') rel = test_graph.match_one(start_node=find_node_1, end_node=find_node_2, bidirectional=True) print rel rel['value'] += 1 test_graph.push(rel)
class PopItToNeo(object): def __init__(self): config = yaml.load(open("config.yaml")) self.endpoint = "https://sinar-malaysia.popit.mysociety.org/api/v0.1" # you know so that you can override this. why? I am not sure self.membership_field = "memberships" self.person_field = "persons" self.organization_field = "organizations" self.post_field = "posts" self.graph = Graph(config["graph_db"]) if config["refresh"] == True: self.graph.delete_all() # Because I am still not familiar to query with cypher # So lets cache here. Hopefully the memory usage don't kill me self.organization_processed = {} self.person_processed = {} self.post_processed = {} def process_membership(self): # So lets start from membership membership_url = "%s/%s" % (self.endpoint, self.membership_field) while True: logging.warning("Processing %s" % membership_url) data = self.fetch_entity(membership_url) logging.warning("Processing membership") entries = data["result"] for entry in entries: # a membership have 3 important field, person_id, organization_id, posts_id if not (entry.get("person_id") and entry.get("organization_id")): continue person = self.fetch_person(entry["person_id"]) if not person: continue role = entry.get("role", "member") if not role: role = "member" logging.warning("Role: %s" % role) params = [] # This happens only once anyway kwparams = {} kwparams["popit_id"] = entry["id"] start_date = get_timestamp(entry.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entry.get("end_date")) if end_date: kwparams["end_date"] = end_date post_exist = False if entry.get("post_id"): post = self.fetch_post(entry["post_id"]) if not post: continue if self.graph.match_one(person, role, post): post_exist = True logging.warning("Already exist, skipping") if not post_exist: relationship = Relationship(person, role, post, **kwparams) self.graph.create(relationship) organization_exist = False if entry.get("organization_id"): organization = self.fetch_organization( entry["organization_id"]) if not organization: continue if self.graph.match_one(person, role, organization): logging.warning("Already exist, skipping") organization_exist = True if not organization_exist: relationship = Relationship(person, role, organization, **kwparams) self.graph.create(relationship) if data.get("next_url"): membership_url = data.get("next_url") else: break def fetch_person(self, person_id): if person_id in self.person_processed: logging.warning("Person %s fetch from cache" % person_id) return self.person_processed[person_id] node = self.graph.find_one("Persons", "popit_id", person_id) if node: logging.warning("Already exist, skipping") self.person_processed[person_id] = node return node person_url = "%s/%s/%s" % (self.endpoint, self.person_field, person_id) data = self.fetch_entity(person_url) if not data: # Don't assume that this id won't be created the next time logging.warning("person not exist %s" % person_id) return None logging.warning("Fetching person") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] logging.warning("Name: %s" % name) kwparam = {} birth_date = get_timestamp(entity.get("birth_date")) if birth_date: kwparam["birth_date"] = birth_date death_date = get_timestamp(entity.get("death_date")) if death_date: kwparam["death_date"] = death_date kwparam["name"] = name kwparam["popit_id"] = entity["id"] node = Node("Persons", **kwparam) self.graph.create(node) self.person_processed[entity["id"]] = node return node def fetch_organization(self, organization_id): if organization_id in self.organization_processed: logging.warning("Organization %s fetch from cache" % organization_id) return self.organization_processed[organization_id] node = self.graph.find_one("Organization", "popit_id", organization_id) if node: logging.warning("Already exist, skipping") self.organization_processed[organization_id] = node return node organization_url = "%s/%s/%s" % ( self.endpoint, self.organization_field, organization_id) data = self.fetch_entity(organization_url) if not data: logging.warning("Organization don't exist %s" % organization_id) return None logging.warning("Fetch orgnanization") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] kwparams = {} logging.warning("Name: %s" % name) kwparams["name"] = name kwparams["popit_id"] = entity["id"] founding_date = get_timestamp(entity.get("founding_date")) if founding_date: kwparams["founding_date"] = founding_date dissolution_date = get_timestamp(entity.get("dissolution_date")) if dissolution_date: kwparams["dissolution_date"] = dissolution_date if "classification" in entity: logging.warning("Classification:%s" % entity["classification"]) kwparams["classification"] = entity["classification"] node = Node("Organization", **kwparams) self.graph.create(node) self.organization_processed[entity["id"]] = node return node def fetch_post(self, post_id): if post_id in self.post_processed: logging.warning("Post %s fetch from cache" % post_id) return self.post_processed[post_id] node = self.graph.find_one("Posts", "popit_id", post_id) if node: logging.warning("Already exist, skipping") self.post_processed[post_id] = node return node post_url = "%s/% s/%s" % (self.endpoint, self.post_field, post_id) data = self.fetch_entity(post_url) if not data: logging.warning("Post don't exist %s" % post_id) return None logging.warning("Fetch post") entity = data["result"] # Fetch organization node, because post is link to organization # What is the implication of post without organization? try: if entity.get("organization_id"): organization = self.fetch_organization( entity["organization_id"]) else: organization = None except Exception as e: logging.warning(e.message) organization = None logging.warning("Label: %s" % entity["label"]) kwparams = {} kwparams["name"] = entity["label"] kwparams["popit_id"] = entity["id"] start_date = get_timestamp(entity.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entity.get("end_date")) if end_date: kwparams["end_date"] = end_date node = Node("Posts", **kwparams) self.graph.create(node) self.post_processed[entity["id"]] = node if organization: temp_param = {} if start_date: temp_param["start_date"] = start_date if end_date: temp_param["end_date"] = end_date relation = Relationship(node, "of", organization, **kwparams) self.graph.create(relation) return node def process_parent_company(self): organizations_url = "%s/%s" % (self.endpoint, self.organization_field) while True: data = self.fetch_entity(organizations_url) entries = data["result"] for entry in entries: if not entry.get("parent_id"): logging.warning("No parent id, moving on") continue else: logging.warning(entry.get("parent_id")) # TODO: Dafuq this is not DRY. parent_node = self.fetch_organization(entry["parent_id"]) if not parent_node: continue child_node = self.fetch_organization(entry["id"]) parent_relationship = Relationship(parent_node, "parent_of", child_node) if self.graph.match_one(parent_node, "parent_of", child_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue self.graph.create(parent_relationship) if self.graph.match_one(child_node, "child_of", parent_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue child_relationship = Relationship(child_node, "child_of", parent_node) self.graph.create(child_relationship) if "next_url" in data: organizations_url = data["next_url"] logging.warning(organizations_url) else: break def process_posts(self): post_url = "%s/%s" % (self.endpoint, self.post_field) while True: data = self.fetch_entity(post_url) entries = data["result"] for entry in entries: node = self.fetch_post(entry["id"]) self.graph.create(node) # Since creating organization relationship is already part of getting post # ourjob is done here if "next_url" in data: post_url = data["next_url"] logging.warning(post_url) else: break def fetch_entity(self, url): r = requests.get(url) time.sleep(0.1) if r.status_code != 200: # Just to make output consistent, excception did not kill the script anyway return {} return r.json()
# -*- coding: utf-8 -*- from py2neo import Graph, Node, Relationship, NodeSelector graph = Graph("http://139.224.129.150:7474/browser/", username="******", password="******") # 用CQL进行查询,返回的结果是list data1 = graph.data('MATCH(p:Tag) return p') print("data1 = ", data1, type(data1)) # 用find_one()方法进行node查找,返回的是查找node的第一个node data2 = graph.find_one(label='Form') print("data2 = ", data2, type(data2)) # 用find()方法进行node查找,需要遍历输出,类似于mongodb data3 = graph.find(label='Form') for data in data3: print("data3 = ", data) # Relationship查询 relationship = graph.match_one(rel_type='Sub') print(relationship, type(relationship))
class User(object): def __init__(self, graph_db=None): self.name = '' self.call_sign = '' self.first_name = '' self.last_name = '' self.id = '' self.mission_statement = '' self.about = '' self.email = '' self.is_mentor = False self.is_tutor = False self.is_visible = True self.is_available_for_in_person = True # self._interests_list = None # self.is_admin = False # self.password = '' # self.salt = '' # self.permanent_web_token = '' # self.temporary_web_token = '' self.join_date = None self.last_active_date = '' self._graph_db = Graph(settings.DATABASE_URL) @property def user_properties(self): """ setup user properties :return: dictionary of properties """ properties_dict = dict(self.__dict__) del properties_dict['_graph_db'] return properties_dict def set_user_properties(self, user_properties): """ :param user_properties: :return: """ for key, value in user_properties.iteritems(): setattr(self, key, value) def get_user(self): user_node = self.user_node if user_node is not None: user_properties = dict(user_node.properties) for key, value in user_properties.iteritems(): setattr(self, key, value) return True else: return False def create_user(self, user_properties=None): """ create a new user based on the attributes :return: node """ #TODO exception handling self.join_date = datetime.date.today() self.last_active_date = self.join_date self.id = str(uuid.uuid4()) if user_properties is not None: self.set_user_properties(user_properties) new_user_node = Node.cast(GraphLabel.USER, self.user_properties) try: self._graph_db.create(new_user_node) except: pass # print 'node probably found. see message' # print sys.exc_info() return new_user_node @property def user_node(self): """ get a user Node :return: py2neo Node """ if self.email != '': return self._graph_db.find_one(GraphLabel.USER, property_key='email', property_value=self.email) elif self.id != '': return self._graph_db.find_one(GraphLabel.USER, property_key='id', property_value=self.id) # return self.graph_db.get_or_create_indexed_node(index_name=GraphLabel.USER, # key='email', value=self.email) @property def user_interests(self): """ get user interests :return: dictionary of interests """ user_interests = self._graph_db.match(start_node=self.user_node, rel_type=GraphRelationship.INTERESTED_IN, end_node=None) #create a list of tuples of interests and the users's relationship to them interests_list = [] for rel in user_interests: interest_dict = dict(rel.end_node.properties, **rel.properties) interests_list.append(dict(rel.end_node.properties)) return interests_list @property def user_goals(self): """ get user interests :return: list of interests """ #TODO do not need a list of interests -- HATEOAS -- MMD 3/8/2015 user_goals = self._graph_db.match(start_node=self.user_node, rel_type=GraphRelationship.HAS_GOAL, end_node=None) goals_list = [] goal_interests_list = [] for rel in user_goals: goal_properties = dict(rel.end_node.properties) goal = Goal() goal.id = goal_properties['id'] interests = goal.goal_interests interests_list = [] for interest in interests: interests_list.append(interest['name']) goal_properties['interests'] = interests_list goals_list.append(goal_properties) return goals_list @property def user_groups(self): """ :return: list of tuples of the groups """ #TODO add list of related interests user_groups = self._graph_db.match(start_node=self.user_node, rel_type=GraphRelationship.STUDIES_WITH, end_node=None) # create a list of tuples of interests and the users's relationship to them groups_list = [] for rel in user_groups: group_properties = dict(rel.end_node.properties) group = Group() group.id = group_properties['id'] interests = group.group_interests group_interests_list = [] for interest in interests: group_interests_list.append(interest['name']) group_properties['interests'] = group_interests_list groups_list.append(group_properties) return groups_list @property def user_orgs(self): """ :return: """ user_orgs = self._graph_db.match(start_node=self.user_node, rel_type=GraphRelationship.MEMBER_OF, end_node=None) orgs_list = [] for rel in user_orgs: org_properties = dict(rel.end_node.properties) org = Organization() org.id = org_properties['id'] interests = org.org_interests interests_list = [] for interest in interests: interests_list.append(interest['name']) org_properties['interests'] = interests_list orgs_list.append(org_properties) return orgs_list @property def user_locations(self): """ :return: """ user_locations = self._graph_db.match(start_node=self.user_node, rel_type=GraphRelationship.LOCATED_IN, end_node=None) locations_list = [] for rel in user_locations: locations_list.append(rel.end_node.properties) return locations_list def get_local_users_shared_interests_near_location(self): #, location_node): """ get a dictionary of user with shared interests with this user :param : :return: dictionary of {interests: [users]} """ # users_shared_interests params = { 'email': '*****@*****.**' } cypher_str = "MATCH (u:USER {email:{email}})-[url:LOCATED_IN]->(l:LOCATION)" cypher_str += "<-[orl:LOCATED_IN]-(o:USER) " cypher_str += "WITH u, o, l, url, orl " cypher_str += "MATCH (u)-[ru:INTERESTED_IN]->" cypher_str += "(i:INTEREST)<-[ro:INTERESTED_IN]-(o) " cypher_str += "RETURN i.name as interest_name, i.id as interest_id, " \ "o.name as user_name, o.id as user_id" #, u, ru, ro, l, url, orl" # print cypher_str results = Graph().cypher.execute(cypher_str, params) # self.graph_db.cypher.stream(cypher_str) # self.graph_db.cypher.execute(cypher_str) interest_users_dict = {} print results for item in results: interest = item['interest_name'] user = item['user_name'] if interest_users_dict.has_key(interest): interest_users_dict[interest].append(user) else: interest_users_dict[interest] = [] interest_users_dict[interest].append(user) # user = item['user_name'] # cur_users_list.append(interest_users_dict.get(interest)) # if interest_users_dict.has_key(interest): # # if interest in interest_users_dict.keys(): # cur_users_list = interest_users_dict[interest] # # cur_users_list = interest_users_dict.get(interest) # else: # interest_users_dict[interest] = [] # cur_users_list.append(user) # interest_users_dict[interest] = cur_users_list # interest_users_dict[interest] = interest_users_dict.get(interest) # user_details = (user_node['name'], user_node['email'], user_node['id']) # user_list.append(user_details) return interest_users_dict def add_interest(self, interest_id, experience_properties_dict=None): """ Add interest to user :param interest id:string uuid :return: List of interests """ #TODO add exception handling interest = Interest() interest.id = interest_id interest_node = interest.interest_node_by_id user_interest_relationship = Relationship(self.user_node, GraphRelationship.INTERESTED_IN, interest_node) for key, value in experience_properties_dict.iteritems(): user_interest_relationship[key] = value try: self._graph_db.create_unique(user_interest_relationship) except: pass return self.user_interests def update_interest(self, interest_id, experience_properties_dict): interest = Interest() interest.id = interest_id interest_node = interest.interest_node_by_id user_interest_relationship = self._graph_db.match_one(start_node=self.user_node, rel_type=GraphRelationship.INTERESTED_IN, end_node=interest_node) for key, value in experience_properties_dict.iteritems(): user_interest_relationship.properties[key] = value user_interest_relationship.push() def delete_interest(self, interest_id): """ drop interest relationship from user given the interest_id :param interest_id: str(uuid.uuid4()) :return: """ #TODO exception handling interest = Interest() interest.id = interest_id interest_node = interest.interest_node_by_id user_interest_relationship = self._graph_db.match_one(start_node=self.user_node, rel_type=GraphRelationship.INTERESTED_IN, end_node=interest_node) self._graph_db.delete(user_interest_relationship) def update_user(self): user_node = self.user_node user_properties = dict(self.user_properties) for key, value in user_properties.iteritems(): user_node[key] = value # user_properties[key] user_node.push() # def make_admin(self): # #new_user = self.graph_db.get_or_create_indexed_node(index_name=GraphLabel.USER, key='email', value=self.email) # self.user_node.add_labels(GraphLabel.ADMIN) def add_goal(self, goal_properties): """ Add goal to user :param goal_id: string uuid :return: List of user goals """ #TODO exception handling goal = Goal() goal.set_goal_properties(goal_properties=goal_properties) goal.create_goal() # create relationship between user and interest node user_goal_relationship = Relationship(self.user_node, GraphRelationship.HAS_GOAL, goal.goal_node) self._graph_db.create_unique(user_goal_relationship) #TODO set properties on the relationship -- may use a unique id as the key return self.user_goals def delete_goal(self, goal_id): user_node = self.user_node goal = Goal() goal.id = goal_id # have to remove all relationships before deleteing a node goal.delete_all_interests() goal_node = goal.goal_node user_goal_rel = self._graph_db.match_one(start_node=user_node, rel_type=GraphRelationship.HAS_GOAL, end_node=goal_node) self._graph_db.delete(user_goal_rel) self._graph_db.delete(goal_node) def join_group(self, group_id, group_relationship_properties=None): """ Add user as member of group :param group_id: string uuid :return: """ #TODO exception handling group = Group() group.id = group_id # relationship properties join_properties = { 'join_date': datetime.date.today() } user_group_relationship = Relationship(self.user_node, GraphRelationship.STUDIES_WITH, group.group_node) # properties=join_properties) for key, value in join_properties.iteritems(): user_group_relationship[key] = value try: self._graph_db.create_unique(user_group_relationship) except: pass #TODO set properties on the relationsip # group_relationship_properties["id"] = str(uuid.uuid4()) def leave_group(self, group_id): """ remove relationship between user and study group :param group_id: string uuid :return: None """ #TODO exception handling group = Group() group.id = group_id user_group_relationship = self._graph_db.match_one(start_node=self.user_node, rel_type=GraphRelationship.MEMBER_OF, end_node=group.group_node) self._graph_db.delete(user_group_relationship) def delete_group(self, group_id): pass def join_organization(self, organization_id): """ add user to organization :param organization_id: string uuid :return: list of tuple of interests """ #TODO exception handling org = Organization() org.id = organization_id user_org_relationship = Relationship(self.user_node, GraphRelationship.MEMBER_OF, org.org_node) try: self._graph_db.create_unique(user_org_relationship) except: print sys.exc_info()[0] def leave_organization(self, organization_id): """ remove relationship between user and organization :param organization_id: :return: """ #TODO exception handling org = Organization() org.id = organization_id user_org_relationship = self._graph_db.match_one(start_node=self.user_node, rel_type=GraphRelationship.MEMBER_OF, end_node=org.org_node) self._graph_db.delete(user_org_relationship) def add_location(self, location_json): """ link user to location nodes :param locations_place_id: :return: """ #TODO exception handling #TODO do in location and pass in the node from the actual object (better pattern) location_place_id = location_json['id'] location = Location() location.id = location_place_id location_node = location.location_node_by_place_id if not location_node: location.set_location_properties(location_json) location.create_location() location_node = location.location_node_by_place_id() user_location_relationship = Relationship(self.user_node, GraphRelationship.LOCATED_IN, location_node) # try: self._graph_db.create_unique(user_location_relationship) # except: # pass def create_cq(self, cq_dict, cq_interests_dict=None): Cq.create_cq(user_node=self.user_node, cq_dict=cq_dict) def create_converation_between_users(self, user_id_started, user_id_with, conversation_properties): # self.id = uuid.uuid4() conversation_properties['id'] = str(uuid.uuid4()) new_convo_node = Node.cast(GraphLabel.CONVERSATION, conversation_properties) try: convo_node, = self._graph_db.create(new_convo_node) # create new conversation node user_started = User() user_started.id = user_id_started user_with = User() user_with.id = user_id_with # create started conversation relationship user_started_relationship = Relationship(user_started.user_node, GraphRelationship.STARTED, convo_node) self._graph_db.create(user_started_relationship) # create started conversation with relationship convo_with_relationship = Relationship(convo_node, GraphRelationship.WITH, user_with.user_node) self._graph_db.create(convo_with_relationship) return convo_node.properties['id'] except: pass #TODO add exception handling # @staticmethod def matched_users(self, match_string, limit): """ :param match_string: :param limit: :return: dictionary of search results """ params = { 'match': '(?i)%s.*' % match_string, 'limit': limit } cypher_str = "MATCH (user:USER ) " \ "WHERE user.name =~ {match} " \ "RETURN user.name as name, user.id as id " \ "LIMIT {limit}" match_results = self._graph_db.cypher.execute(statement=cypher_str, parameters=params) root = {} root['count'] = 0 user_found = {} users_list = [] for item in match_results: user_found['id'] = item.id user_found['name'] = item.name # self.id = item['id'] # self.get_user() # users_list.append(dict(self.user_properties)) users_list.append(dict(user_found)) root['count'] += 1 root['users'] = users_list return root def register_user(self, email): verification_email = notifications.Notifications() verification_email.recipients = [email] s = URLSafeTimedSerializer(secret_key=settings.TOKEN_SECRET_KEY) payload = s.dumps(email) verification_email.subject = settings.ACTIVATION_SUBJECT verification_email.message = settings.ACTIVATION_MESSAGE verification_email.url = self.construct_verification_url(payload=payload) verification_email.send_by_gmail() def activate_user(self, payload, email): s = URLSafeTimedSerializer(secret_key=settings.TOKEN_SECRET_KEY) payload_email = s.loads(payload, max_age=settings.TOKEN_EXPIRES_IN) # 10 minutes if email == payload_email: self.email = email self.get_user() self.permanent_web_token = self.create_web_token() if self.id == '': self.create_user() else: self.update_user() else: raise BadSignature('bad email') def update_last_active_date(self): self.last_active_date = datetime.date.today() user_node = self.user_node user_node['last_active_date'] = self.last_active_date user_node.push() def construct_verification_url(self, payload): return settings.SITE_URL + settings.ACTIVATION_ROUTE + "/%s" % payload def create_web_token(self): s = URLSafeSerializer(secret_key=settings.TOKEN_SECRET_KEY) return s.dumps(self.id) def user_relationships_for_json(self, auth_id): root = self.user_profile_for_json() root['__class'] = self.__class__.__name__ root['interests'] = self.user_interests root['locations'] = self.user_locations root['goals'] = self.user_goals root['groups'] = self.user_groups root['organizations'] = self.user_orgs root['is_owner'] = (auth_id == self.id) root['allow_edit'] = (auth_id == self.id) root['allow_message'] = (auth_id is not None) return root def user_profile_for_json(self): root = self.user_properties return root def user_interests_for_json(self): root = {} root['__class'] = self.__class__.__name__ root['id'] = self.id root['email'] = self.email root['interests'] = self.user_interests return root def user_goals_for_json(self): root = {} root['__class'] = self.__class__.__name__ root['id'] = self.id root['email'] = self.email root['goals'] = self.user_goals # root['interests'] = self.user_goals['interests'] return root def user_groups_for_json(self): root = {} root['__class'] = self.__class__.__name__ root['id'] = self.id root['email'] = self.email root['groups'] = self.user_groups return root def user_locations_for_json(self, auth_id): root = {} root['__class'] = self.__class__.__name__ root['id'] = self.id if self.id == auth_id: root ['allow_edit'] = True return root def local_users_with_shared_interests_for_json(self): root = {} root['__class'] = self.__class__.__name__ root['id'] = self.id root['email'] = self.email root['users'] = self.get_local_users_shared_interests_near_location() return root def activated_user_for_json(self): root = {} root['__class'] = self.__class__.__name__ root['x_auth_key'] = self.permanent_web_token return root
# | >> > selected = selector.select("Person", name="Keanu Reeves") # | >> > list(selected) # | [(f9726ea:Person{born: 1964, name: "Keanu Reeves"})] # | >> > selected = selector.select("Person").where("_.name =~ 'J.*'", "1960 <= _.born < 1970") # | >> > list(selected) # | [(a03f6eb:Person{born: 1967, name: "James Marshall"}), # | (e59993d:Person{born: 1966, name: "John Cusack"}), # | (c44901e:Person{born: 1960, name: "John Goodman"}), # | (b141775:Person{born: 1965, name: "John C. Reilly"}), # | (e40244b:Person{born: 1967, name: "Julia Roberts"})] # 六、通过节点/关系查找相关联的节点/关系 # 如果已经确定了一个节点或者关系,想找到和它相关的关系和节点,就可以使用match和match_one。 find_relationship = test_graph.match_one(start_node=find_code_1, end_node=find_code_2, bidirectional=False) print(find_relationship) # 如以上代码所示,match和match_one的参数包括start_node,Relationship,end_node中的至少一个。 # bidirectional参数的意义是指关系是否可以双向。 # 如果为False,则起始节点必须为start_node,结束节点必须为end_node。如果有Relationship参数,则一定按照Relationship对应的方向。 # 如果为True,则不需要关心方向问题,会把两个方向的数据都返回。 match_relation = test_graph.match(start_node=find_code_1, bidirectional=True) for i in match_relation: print(i) i['count'] += 1 test_graph.push(i) # 如以上代码所示,查找和find_code_1相关的关系。 # match里面的参数只写了start_node,bidirectional的值为True,则不会考虑方向问题,返回的是以find_code_1为起始节点和结束节点的所有关联关系。 # 如果,bidirectional的值为False,则只会返回以find_code_1为起始节点的所有关联关系。
class UserGraph: def __init__(self): self.graph = None self.label = "Account" self.user_dao = UserDAO() self.address_dao = AddressDAO() self.transaction_dao = TransactionDAO() def init_connect(self): self.graph = Graph("http://127.0.0.1:7474", username="******", password="******") def clear_data(self): self.graph.delete_all() def create_a_user_node(self, name_code, btc): a_user = Node(self.label, name=name_code, btc=btc) return self.graph.create(a_user) def get_a_user_node_by_code(self, name_code): user_node = self.graph.find_one(self.label, property_key='name', property_value=name_code) return user_node def add_a_transaction(self, transaction_dict): if not transaction_dict['source']: # check the user node, if no create destination = transaction_dict['destination'] temp_result = self.address_dao.get_address_by_address(destination) if not temp_result: # this destination address haven't recorded in address table new_user_for_dest = self.user_dao.create_user() temp_result = self.address_dao.create_address( new_user_for_dest['id'], destination) # add a new node account_dest = self.user_dao.get_user_by_id(temp_result['user_id']) account_node = self.get_a_user_node_by_code(account_dest['code']) if account_node: account_node['btc'] += transaction_dict['value'] self.graph.push(account_node) else: self.create_a_user_node(account_dest['code'], transaction_dict['value']) return # if have source and destination address, not only need to check node but also add relation source = transaction_dict['source'] destination = transaction_dict['destination'] # source address must be exit, so skip to check temp_source_result = self.address_dao.get_address_by_address(source) temp_dest_result = self.address_dao.get_address_by_address(destination) if not temp_dest_result: new_user_for_dest = self.user_dao.create_user() temp_dest_result = self.address_dao.create_address( new_user_for_dest['id'], destination) # get source acount and check and update node account_source = self.user_dao.get_user_by_id( temp_source_result['user_id']) account_source_node = self.get_a_user_node_by_code( account_source['code']) if account_source_node: account_source_node['btc'] -= transaction_dict['value'] self.graph.push(account_source_node) else: self.create_a_user_node(account_source['code'], -transaction_dict['value']) account_source_node = self.get_a_user_node_by_code( account_source['code']) # get destination acount and check and update node account_destination = self.user_dao.get_user_by_id( temp_dest_result['user_id']) account_destination_node = self.get_a_user_node_by_code( account_destination['code']) if account_destination_node: account_destination_node['btc'] += transaction_dict['value'] self.graph.push(account_destination_node) else: self.create_a_user_node(account_destination['code'], transaction_dict['value']) account_destination_node = self.get_a_user_node_by_code( account_destination['code']) # add the relationship for two node source_node_pay_destination_node = self.graph.match_one( start_node=account_source_node, end_node=account_destination_node, bidirectional=False) if source_node_pay_destination_node: source_node_pay_destination_node['btc'] += transaction_dict[ 'value'] self.graph.push(source_node_pay_destination_node) else: source_node_pay_destination_node = Relationship( account_source_node, "Pay", account_destination_node) source_node_pay_destination_node['btc'] = transaction_dict['value'] self.graph.create(source_node_pay_destination_node) def generate_user_graph(self): page_total = 100 for page_num in range(page_total): print("====Generate User Graph 处理第 " + str(page_num) + " / " + str(page_total) + " 页交易(100/page)") transaction_list = self.transaction_dao.paginate_list_resource( models.Transaction, page_num) count = 1 for item_transaction in transaction_list: # print("==处理本页第 " + str(count) + " / 100" + " 个交易") count += 1 # print(item_transaction) # deal each transaction transaction_dict = { 'source': item_transaction['source'], 'destination': item_transaction['destination'], 'value': item_transaction['value'] } self.add_a_transaction(transaction_dict) #user_graph = UserGraph() #user_graph.init_connect() #user_graph.generate_user_graph()
class TwitterGraph(): def __init__(self): self.graph = Graph("http://*****:*****@54.191.171.209:7474/db/data/") self.popularity_heap = [] self.reassess_popularity() def add_user(self, user): new_user = Node("User", token=user.token.session_id, user_id=user.id) return self.graph.create(new_user) def is_cached(self, screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is not None: return True def get_RT_recommendations(self, user): recommendations = Counter() user_node = self.graph.find_one("User", 'user_id', user.id) following = user_node.match_outgoing("FOLLOWS", limit=5) for rel in following: retweets = rel.end_node.match_outgoing("RETWEETED", limit=5) for r in retweets: recommendations[r.end_node.properties['screen_name']] += 1 return [str for (str, count) in recommendations.most_common(10)] def get_generic_recommendations(self): return [screen_name for (count, screen_name) in heapq.nlargest(10, self.popularity_heap)] def reassess_popularity(self): # NOTE: expensive calculation, to be run threaded at multiples of x actions to graph or hourly/daily job all_twitter_users = self.graph.find("TwitterUser") for tu in all_twitter_users: incoming_count = sum(1 for _ in tu.match_incoming()) heapq.heappush(self.popularity_heap, (incoming_count, tu.properties['screen_name'])) def add_twitter_user(self, screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: new_twitter_user = Node("TwitterUser", screen_name=screen_name) self.graph.create(new_twitter_user) def add_follow(self, screen_name, user): user_node = self.graph.find_one("User", 'user_id', user.id) if user_node is None: # this shouldn't happen, just for testing while transitioning db self.add_user(user) user_node = self.graph.find_one("User", 'user_id', user.id) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) follow_relationship = Relationship(user_node, "FOLLOWS", twitter_user) self.graph.create(follow_relationship) self.reassess_popularity() def remove_follow(self, screen_name, user): user_node = self.graph.find_one("User", 'user_id', user.id) if user_node is None: # this shouldn't happen, just for testing while transitioning db self.add_user(user) user_node = self.graph.find_one("User", 'user_id', user.id) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) follow_relationship = self.graph.match_one(user_node, "FOLLOWS", twitter_user) if follow_relationship is not None: self.graph.delete(follow_relationship) def add_retweet(self, screen_name, retweeted_screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) self.add_twitter_user(retweeted_screen_name) retweeted_twitter_user = self.graph.find_one("TwitterUser", 'screen_name', retweeted_screen_name) retweet = self.graph.match_one(twitter_user, "RETWEETED", retweeted_twitter_user) if retweet is None: retweet_relationship = Relationship(twitter_user, "RETWEETED", retweeted_twitter_user) retweet_relationship.properties['count'] = 1 self.graph.create(retweet_relationship) elif retweet.properties['count'] is None: # this shouldn't happen, just for testing while transitioning db retweet.properties['count'] = 1 retweet.push() else: retweet.properties['count'] = retweet.properties['count'] + 1 retweet.push()
class GraphDB(): def __init__(self, user=NEO4J_USER, pwd=NEO4J_PWD, host=NEO4J_HOST): self.graph = Graph("http://%s:%s@%s/db/data/" % (user, pwd, host)) def query(self, query_str, stream=False): if stream: return self.graph.cypher.stream(query_str) else: return self.graph.cypher.execute(query_str) def create_relation_user_to_topic(self, user, relation, topic_name): userNode = self.graph.find_one("user", 'id', user.id_str) if not userNode: userNode = self.create_node_from_user(user) self.graph.create(userNode) topicNode = self.graph.find_one("topic_name", 'name', topic_name) if not topicNode: topicNode = Node("topic_name", name = topic_name) self.graph.create(topicNode) relationship = self.graph.match_one(userNode, relation, topicNode) if not relationship: relationship = Relationship(userNode, relation, topicNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push() # Relations: follows eventuell favourites, retweets def create_relation_user_to_user(self, userA, relation, userB): userANode = self.graph.find_one("user", 'id', userA.id_str) userBNode = self.graph.find_one("user", 'id', userB.id_str) if not userANode: userANode = self.create_node_from_user(userA) self.graph.create(userANode) if not userBNode: userBNode = self.create_node_from_user(userB) self.graph.create(userBNode) relationship = self.graph.match_one(userANode, relation, userBNode) if not relationship: relationship = Relationship(userANode, relation, userBNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push() def increment_user_counter(self, user, counter, n): userNode = self.graph.find_one("user", 'id', user.id_str) if not userNode: userNode = self.create_node_from_user(user) self.graph.create(userNode) if counter in userNode.properties: userNode.properties[counter] += n else: userNode.properties[counter] = n userNode.push() def get_all_users(self): users = [] for u in self.graph.find('user'): users.append({'name': u.properties['screen_name'], 'id_str': u.properties['id']}) return users def create_node_from_user(self, user): userNode = Node("user", name=user.screen_name, id=user.id_str, followers_count=user.followers_count, friends_count=user.friends_count, statuses_count=user.statuses_count, favourites_count=user.favourites_count) return userNode def quicksearch(self, username, limit=10): cql_query = "match(u:user) WHERE u.name =~ '%s.*' RETURN DISTINCT u.name LIMIT %s;" return self.query(cql_query % (username, limit)) def get_user_count(self): cql_query = "match(u:user) RETURN count(DISTINCT u) AS c;" for row in self.query(cql_query): return row['c'] return 0
class NeoFourJ(object): """ - Add to friends - confirm friends - Get all friends for user_id - checking_friendship """ def __init__(self, neo4j_url=settings.NEO4J_URL): self.graph = Graph(neo4j_url) @staticmethod def person(user): return Node("Person", user_id=user.pk, name=u'{} {}'.format(user.first_name, user.last_name)) def create_person(self, node): return self.graph.create(node)[0] def get_person(self, user): if isinstance(user, int): user_id = user else: user_id = user.id return self.graph.find_one('Person', property_key='user_id', property_value=user_id) def add_to_friends(self, node1, node2): rel = Relationship(node1, "FRIENDS", node2, since=now(), seen=False) if self.check_friendship_rel(node2['user_id'], node1['user_id']): self._publish_to_redis_channel(node1['user_id'], node2['user_id']) self.graph.create_unique(rel) update_index_delay() def pass_friend(self, node1, node2): rel = Relationship(node1, "PASSES", node2, since=now(), seen=False) self.graph.create_unique(rel) update_index_delay() def remove_from_friends(self, user_id1, user_id2): result = self.graph.cypher.execute( """ MATCH (n)-[rel:FRIENDS]->(r) WHERE n.user_id={USER_ID1} AND r.user_id={USER_ID2} DELETE rel """, { 'USER_ID1': user_id1, 'USER_ID2': user_id2 }) update_index_delay() return result def get_my_friends(self, user_id): try: user_id = int(user_id) except (ValueError, TypeError) as err: logger.debug(err) return self.graph.cypher.execute( """ MATCH (Person { user_id:{USER_ID} })-[:FRIENDS]->(n) -[:FRIENDS]->(Person { user_id:{USER_ID} }) return ID(n) AS id, n.name AS node_name, n.user_id AS user_id """, {'USER_ID': user_id}) def get_my_friends_icontains_name(self, user_id, name): if not name: return [] result = self.graph.cypher.execute( "MATCH (Person { user_id:{USER_ID} })-[:FRIENDS]->(n)" "-[:FRIENDS]->(Person { user_id:{USER_ID} })" "WHERE n.name =~ '(?i).*" + name.lower() + ".*'" "return ID(n) AS id, n.name AS node_name, n.user_id AS user_id", {'USER_ID': user_id}) if result is None: return list() else: results = [] for record in result: results.append(record.user_id) return results def get_my_thumbed_up(self, user_id): return self.graph.cypher.execute( """ MATCH (Person { user_id:{USER_ID} })-[:FRIENDS|PASSES]->(n) return ID(n) AS id, n.name AS node_name, n.user_id AS user_id """, {'USER_ID': user_id}) def get_my_passes(self, user_id): return self.graph.cypher.execute( """ MATCH (Person { user_id:{USER_ID} })-[:PASSES]->(n) return ID(n) AS id, n.name AS node_name, n.user_id AS user_id """, {'USER_ID': user_id}) def check_friendship_rel(self, user_id1, user_id2): """ Check :FRIENDS rel in one direction :return: """ result = self.graph.cypher.execute( """ MATCH (n1:Person { user_id:{USER_ID1} })-[:FRIENDS]-> (n2:Person { user_id:{USER_ID2} }) return n1.user_id AS user_id1, n2.user_id AS user_id2 """, { 'USER_ID1': user_id1, 'USER_ID2': user_id2 }) if result.one is None: return False if result.one.user_id1 == user_id1 and result.one.user_id2 == user_id2: return True else: return False def get_my_friends_ids(self, user_id): my_friends = self.get_my_friends(user_id) results = [] for record in my_friends: results.append(record.user_id) return results def get_my_thumbed_up_ids(self, user_id): thumbed_up = self.get_my_thumbed_up(user_id) results = [] for record in thumbed_up: results.append(record.user_id) return results def update_rel_seen(self, user_id1, user_id2): n1 = self.get_person(user_id1) n2 = self.get_person(user_id2) rel = self.graph.match_one(n1, 'FRIENDS', n2) rel['seen'] = True rel.push() update_index_delay() def get_new_friends_count(self, user_id): result = self.graph.cypher.execute( """ MATCH (Person { user_id:{USER_ID} })-[r1:FRIENDS]-> (n)-[r2:FRIENDS]->(Person { user_id:{USER_ID} }) where r1.seen = FALSE return count(n.user_id) AS new_friend_count """, {'USER_ID': user_id}) if result.one: return result.one else: return 0 def check_friendship(self, user_id1, user_id2): return self.graph.cypher.execute( """ MATCH (Person { user_id:{USER_ID1} })-[:FRIENDS]-> (n:Person { user_id:{USER_ID2} })-[:FRIENDS]-> (Person { user_id:{USER_ID1} }) return n.name, n.user_id """, { 'USER_ID1': user_id1, 'USER_ID2': user_id2 }) def get_seen(self, user_id1, user_id2): return self.graph.cypher.execute( """ MATCH (Person { user_id:{USER_ID1} })-[r:FRIENDS]-> (n:Person { user_id:{USER_ID2} }) return r.seen """, { 'USER_ID1': user_id1, 'USER_ID2': user_id2 }) def get_or_create_node(self, user_id): """ This function return new person or get existing also return created flag :param user_id: :return: """ person = self.get_person(user_id) if person: return person, False else: person = self.create_person(self.person(user_id)) return person, True def _publish_to_redis_channel(self, user_id1, user_id2): # redis user1 = FacebookCustomUser.objects.get(pk=user_id1) user2 = FacebookCustomUser.objects.get(pk=user_id2) r = redis.StrictRedis(host='localhost', port=6379, db=0) user_1 = { 'friend_name': user2.first_name, 'friend_id': user2.id, 'friend_username': user2.username } r.publish('connection.{}'.format(user1.id), json.dumps(user_1)) user_2 = { 'friend_name': user1.first_name, 'friend_id': user1.id, 'friend_username': user1.username } r.publish('connection.{}'.format(user2.id), json.dumps(user_2)) def create_friendship(self, user1, user2): n1 = self.create_person(self.person(user1)) n2 = self.create_person(self.person(user2)) self.add_to_friends(n1, n2) self.add_to_friends(n2, n1) update_index_delay() def get_mutual_friends(self, user_id1, user_id2): mutual_friends = self.graph.cypher.execute( """ MATCH (p1:Person{user_id:{USER_ID1}})-[:FRIENDS]->(n) -[:FRIENDS]->(p1), (p2:Person{user_id:{USER_ID2}})-[:FRIENDS]->(n) -[:FRIENDS]->(p2) RETURN n.user_id AS user_id; """, { 'USER_ID1': user_id1, 'USER_ID2': user_id2 }) results = [] for record in mutual_friends: results.append(record.user_id) return results
test_graph.create(node_2_call_node_1) # 节点/关系的属性赋值以及属性值的更新 node_1_call_node_2['count'] += 1 test_graph.push(node_1_call_node_2) # 通过属性值来查找节点和关系(find,find_one) find_code_1 = test_graph.find_one(label="Person", property_key="name", property_value="test_node_1") find_code_3 = test_graph.find_one(label="Person", property_key="name", property_value="test_node_2") print(find_code_1['name']) # 通过节点/关系查找相关联的节点/关系 find_relationship = test_graph.match_one(start_node=find_code_1, end_node=find_code_3, bidirectional=False) print(find_relationship) # match和match_one的参数包括start_node,Relationship,end_node中的至少一个。 match_relation = test_graph.match(start_node=find_code_1, bidirectional=True) for i in match_relation: print(i) i['count'] += 1 test_graph.push(i)
j = i + 1 fromAuthor = graph.find_one("Author", "name", authors[i]) if fromAuthor is None: fromAuthor = Node("Author", name=authors[i]) author_publish_paper = Relationship(fromAuthor, "PUBLISH", paper) graph.create(author_publish_paper) while (j < len(authors)): toAuthor = graph.find_one("Author", "name", authors[j]) if toAuthor is None: toAuthor = Node("Author", name=authors[j]) j = j + 1 graph.create(toAuthor) ifExist = graph.match_one(fromAuthor, "CO", toAuthor) if(ifExist is None): fromAuthor_To_toAuthor = Relationship(fromAuthor, "CO", toAuthor) toAuthor_To_fromAuthor = Relationship(toAuthor, "CO", fromAuthor) graph.create(fromAuthor_To_toAuthor) graph.create(toAuthor_To_fromAuthor) else: continue
class Diagnoser(): def __init__(self, description=None, symptoms=None, age=None, gender=None): self.description = description self.symptoms = symptoms self.age = age self.gender = gender self.symptomSet = self.getSymptomSet() # 获取所有可能的症状集合 self.graph = Graph("http://liublack.cn:7474", auth=("neo4j", "200001")) self.diseaseItemProerties = [ 'name', 'otherName', 'department', 'description', 'position', 'reason', 'symptom', 'examination', 'treatment', 'complication', 'prevention', 'care', 'typicalSymptom', 'relatedDisease' ] self.diseasesForMoreSymptoms = 3 jieba.set_dictionary(SYMPTOM_DIC_PATH) def diagnose(self, filteNum=None): # 第一步,根据病情描述获取症状列表 symptomList = self.getSymptoms() print('识别到的症状:', symptomList) # 第二步,根据症状列表获取所有相关的疾病 diseaseSet = self._getDiseaseFromSymptoms(symptomList) diseaseList = list(diseaseSet) print('可能的疾病:', diseaseList) # 第三步,计算所有疾病的置信度 scores = self._calDiseaseScore(diseaseList, symptomList) # 第四步,根据置信度得到预测疾病 diseaseList = list(zip(diseaseList, scores)) print('疾病的置信度为:', diseaseList) # 第五步,筛选出前filteNum个疾病,并查找相关症状 diseaseList.sort(key=lambda x: x[1], reverse=True) relatedSymptoms = self._getSymptomsFromDisease( diseaseList[0:self.diseasesForMoreSymptoms]) relatedSymptoms = list(relatedSymptoms - set(symptomList)) # 第六步,给出最终结果 if filteNum is not None: diseaseList = diseaseList[0:filteNum] diseaseDetials = self._getDiseaseDetails(diseaseList) # relatedSymptoms = self._getRelatedSymptoms(symptomList) result = {"disease": diseaseDetials, "symptoms": relatedSymptoms} result = json.dumps(result, ensure_ascii=False) print('最终结果:', result) return result def getSymptomSet(self): s = set() with open(SYMPTOM_DIC_PATH, 'r') as f: for line in f.readlines(): s.add(line.strip().split(' ')[0]) return s def getSymptoms(self): if self.symptoms is None: self.symptoms = [] else: map(lambda s: s.strip(), self.symptoms) self.symptoms.extend(self.processDesc()) return self.symptoms def processDesc(self): if self.description is None: return [] self.description = ''.join(self.description.split(' ')) symptoms = [] words = list(jieba.cut(self.description)) print('分词结果:', words) for word in words: if word in self.symptomSet: symptoms.append(word) return symptoms def _calDiseaseScore(self, diseaseList, symptomList): scores = [] # 首先计算每一种症状对应的疾病数目 scoreSymptom = [] for symptom in symptomList: scoreSymptom.append(len(self._getDiseaseFromSymptoms([symptom]))) # 接下来计算每种疾病的置信度 for disease in diseaseList: score = 0 for idx, symptom in enumerate(symptomList): if self._existHaveRelationship(disease, symptom): score += 1.0 / scoreSymptom[idx] scores.append(score) return scores def _getSymptomsFromDisease(self, diseaseList): symptomSet = set() for disease in diseaseList: node = self.graph.nodes.match('disease', name=disease).first() rels = self.graph.match((node, ), r_type='d-s') for r in rels: symptomSet.add(r.end_node['name']) return symptomSet def _getDiseaseFromSymptoms(self, symptomList): diseaseSet = set() for symptom in symptomList: node = self.graph.nodes.match('symptom', name=symptom).first() rels = self.graph.match((None, node), r_type='d-s') for r in rels: diseaseSet.add(r.start_node['name']) return diseaseSet def _existHaveRelationship(self, disease, symptom): ndisease = self.graph.nodes.match('disease', name=disease).first() nsymptom = self.graph.nodes.match('symptom', name=symptom).first() if ndisease is None or nsymptom is None: return False rel = self.graph.match_one((ndisease, nsymptom), r_type='d-s') return rel is not None def _getDiseaseDetails(self, diseaseList): details = [] for disease in diseaseList: node = self.graph.nodes.match('disease', name=disease).first() diseaseItem = dict() for pname in self.diseaseItemProerties: diseaseItem[pname] = node[pname] details.append(diseaseItem) return details def _getRelatedSymptoms(self, symptomList): symptoms = set() for symptom in symptomList: node = self.graph.nodes.match('symptom', name=symptom).first() rels = self.graph.match((node, None), r_type='s-s') relatedSymptoms = [rel.end_node['name'] for rel in rels] symptoms.update(relatedSymptoms) return list(symptoms)
print() # 用find_one()方法进行node查找,返回的是查找node的第一个node data2 = graph.find_one(label='PersonTest', property_key='name', property_value="李四") print("data2 = ", data2, type(data2)) print() # 用find()方法进行node查找 data3 = graph.find(label='PersonTest') for data in data3: print("data3 = ", data) print() ''' 3 —— Relationship查询 ''' relationship = graph.match_one(rel_type='KNOWNS') print(relationship, type(relationship)) print() ''' 4 —— 更新Node的某个属性值,若node没有该属性,则新增该属性 ''' node1 = graph.find_one(label='PersonTest', property_key='name', property_value="张三") node1['age'] = 21 graph.push(node1) data4 = graph.find(label='PersonTest') for data in data4: print("data4 = ", data) print()
class NeoManager: def __init__(self, host, port, username, password): self.username = username self.host = host self.port = port self.password = password def connect(self): print("http://" + self.host + ":" + str(self.port), self.username, self.password) self.graph = Graph("http://" + self.host + ":" + str(self.port), username = self.username, password = self.password) if self.graph != None: print("Neo4j Database Connected.") self.selector = NodeSelector(self.graph) def createNode(self, nodelabel, nodename): nodename = str(nodename) nodelabel = str(nodelabel) node = Node(nodelabel, name = nodename) self.graph.create(node) return node def createRelation(self, nodeSrc, nodeDst, relationName): relationName = str(relationName) if nodeSrc == None or nodeDst == None: return relationship = Relationship(nodeSrc, relationName, nodeDst) print(relationship) # self.setRelationAttribute(relation, 'credential', 0.9) self.graph.create(relationship) return relationship def setRelationAttribute(self, relationship, attribute, val): relationship[attribute] = val return relationship[attribute] def getRelationAttribute(self, relationship, attribute): return relationship[attribute] def findByName(self, findName): findName = str(findName) trustable = self.graph.find_one(property_key = "name", property_value = findName, label = 'labelHolder') if trustable == None: untrustable = self.graph.find_one(property_key = "name", property_value = findName, label = 'Creditless') return False, untrustable else: return True, trustable def findAllByLabel(self, findLabel): findLabel = str(findLabel) selected = self.selector.select(findLabel) print(selected) def findNodeRelation(self, node): return self.graph.match_one(start_node = node, bidirectional = True) def hasStartToRelation(self, node, relstr): return self.graph.match(start_node=node, rel_type=relstr) def hasEndWithRelation(self, node, relstr): return self.graph.match(end_node=node, rel_type=relstr) def getRelationBetween(self, nodeA, nodeB): if nodeA == None or nodeB == None: return None else: return self.graph.match(start_node = nodeA, end_node = nodeB, bidirectional = True) # neo = NeoManager('localhost', 7474, 'neo4j', '123') # neo.connect() # with open('../Datasets/TrainSetUnique.csv', 'r', encoding = 'utf-8') as input: # reader = csv.reader(input) # # row: [0] entity1 [1] entity2 [2] relation [3] example # for row in reader: # node0 = neo.findByName(row[0]) # node1 = neo.findByName(row[1]) # print(node0) # if node0 == None: # node0 = neo.createNode("labelHolder", row[0]) # if node1 == None: # node1 = neo.createNode("labelHolder", row[1]) # relation = neo.getRelationBetween(node0, node1) # print(relation) # if relation == None or relation != row[2]: # print(row[2]) # relation = neo.createRelation(node0, node1, row[2])
class DBO(object): # 初始化,连接后台数据库 def __init__(self): self.graph = Graph(user='******', password='******') def list_organization_structure(self, Application=None, HostIP=None): condition = "where 1=1" if Application: condition += ' and a.Name="%s"' % Application if HostIP: condition += ' and n.IP="%s"' % HostIP cypher = 'MATCH (p:Project)-[]-(d:Department)-[]-(a:Application)-[]-(n:Host) %s RETURN p.name as Project,d.name as Department,a.name as Application' % condition return self.graph.data(cypher) def enum_vul(self, TaskID, Cypher_Conditions=None): if Cypher_Conditions: # selector.select.where not good for use , not support zh_cn just pure cypher cypher = 'MATCH (n:HostVul) where n.TaskID="%s" %s RETURN n ' % (TaskID, Cypher_Conditions) for data in self.graph.data(cypher): yield data["n"] else: selector = NodeSelector(self.graph) selected = selector.select("HostVul", TaskID=TaskID) for data in list(selected): yield data def add_vul(self, Vul_Data): if not self.HostVul_exists(Vul_Data): Host = self.graph.find_one("Host", "IP", Vul_Data[u"IP"]) vul = Node("HostVul") vul.update(Vul_Data) rel = Relationship(Host, "have", vul) self.graph.create(rel) def HostVul_exists(self, Vul_Data): cypher = "Match (n:HostVul) where n.TaskID='%s' and n.Scanner='%s' and n.IP='%s' and n.Port='%s' and n.ID='%s' return n.IP limit 1 " % ( Vul_Data[u"TaskID"], Vul_Data[u"Scanner"], Vul_Data[u"IP"], Vul_Data[u"Port"], Vul_Data[u"ID"]) result = self.graph.data(cypher) # 性能太差,使用其他简单方法 # selector = NodeSelector(self.graph) # selected = selector.select("HostVul", # IP=Vul_Data[u"IP"], # ID=Vul_Data[u"ID"]).limit(1) # .where("_.IP = '%s'" % Vul_Data[u"IP"], # "_.Port='%s'" % Vul_Data[u"Port"], # "_.ID='%s'" % Vul_Data[u"ID"]) return result def add_host(self, Application, host): self.node_simple_add("Host", "IP", host) host = self.graph.find_one("Host", "IP", host) app = self.graph.find_one("Application", "name", Application) self.rel_simple_add(app, "own", host) def add_department(self, Project, Department): self.node_simple_add("Project", "name", Project) self.node_simple_add("Department", "name", Department) pro = self.graph.find_one("Project", property_key="name", property_value=Project) dep = self.graph.find_one("Department", property_key="name", property_value=Department) self.rel_simple_add(pro, "own", dep) def add_app(self, Project, Department, Application): self.node_simple_add("Project", "name", Project) self.node_simple_add("Department", "name", Department) self.node_simple_add("Application", "name", Application) pro = self.graph.find_one("Project", property_key="name", property_value=Project) dep = self.graph.find_one("Department", property_key="name", property_value=Department) app = self.graph.find_one("Application", property_key="name", property_value=Application) self.rel_simple_add(pro, "own", dep) self.rel_simple_add(dep, "own", app) ### meta operate def node_exists(self, label, Key, Value): Find = self.graph.find_one(label, property_key=Key, property_value=Value) if Find: print "Node already exists: [%s: %s]" % (label, Find[Key]) return 2 else: return 0 def node_simple_add(self, label, Key, Value): Find = self.graph.find_one(label, property_key=Key, property_value=Value) if Find: print "Node already exists: [%s: %s]" % (label, Find[Key]) return 2 else: n = Node(label) n.update({Key: Value}) self.graph.create(n) return 1 def rel_exists(self, start_node, rel, end_node): Find = self.graph.match_one(start_node=start_node, rel_type=rel, end_node=end_node) if type(Find) == Relationship: print "Relationship already exists" return 2 else: return 0 def rel_simple_add(self, start_node, rel_type, end_node): Find = self.graph.match_one(start_node=start_node, rel_type=rel_type, end_node=end_node) if type(Find) == Relationship: print "Relationship already exists" return 2 else: rel = Relationship(start_node, rel_type, end_node) self.graph.create(rel) return 1
node_1_call_node_2 = Relationship(test_node_1, 'CALL', test_node_2) node_1_call_node_2['count'] = 1 node_2_call_node_1 = Relationship(test_node_2, 'CALL', test_node_1) node_2_call_node_1['count'] = 1 graph.create(node_1_call_node_2) graph.create(node_2_call_node_1) node_1 = Node('human', type="Asian", name="zhangsan") node_2 = Node('human', type="American", name="Merry") graph.create(node_1) graph.create(node_2) find_code_one = graph.find_one(label="human", property_key="type", property_value="Asian") start_one = graph.find_one(label='human', property_key='name', property_value='test_node_1') end_one = graph.find_one(label='human', property_key='name', property_value='test_node_2') find_relationship = graph.match_one(start_node=start_one, end_node=end_one, bidirectional=False) print(find_code_one['type'], find_code_one['name']) print(find_relationship) data = pd.DataFrame(graph.data('match(a:human) return a.name')) print(data)
print(node) print("------------------------") so=NodeSelector(graph) find_man=so.select("Person",name="李泽楷") print(list(find_man)) #更新节点属性 node1 = graph.find_one(label='Person', property_key='name', property_value="李泽钜") node1['age'] = 54 graph.push(node1) #删除单条关系,用run方法 graph.run('MATCH (a)-[r:兄弟]-(b) where a.name="李泽钜" and b.name="李泽楷" delete r;') #修改单条关系 graph.run('MATCH (a)-[:兄弟]-(b) where a.name="李泽钜" and b.name="李泽楷" merge(a)-[:弟弟]->(b);') #删除节点及其关系,采用run方法 graph.run('match(nonde2:Person{name:"李长治"}) detach delete nonde2') #查询全部 print("------------------------------") sum=graph.find(label="Person") for i in sum: print(i) #查找关系 print("----------------------------------") fr = graph.match_one(start_node=node,end_node=node1) print(fr)
class GraphAccessor(): ## tools 类型 #EntityType = 'Person' ## 匹配Node使用的key名称 #MatchKey = 'name' def __init__(self): self.graph = Graph(host='localhost', port=7687, password='') self.nodeMatcher = NodeMatcher(self.graph) ''' 根据 entity_type, key, value 匹配获得对应的 Node. @return: ''' def matchNode(self, entity_type, key, value): return self.graph.nodes.match(entity_type).where( ' _.{} = "{}" '.format(key, value)) #return self.nodeMatcher.match(self.graph).where(' _.{} = "{}" '.format(key, value)) ''' 根据输入Entity获得对应的Node映射结果,返回Node对象. ''' def nodeMap(self, node): key = self.MatchKey value = node[key] node_list = self.matchNode(key, value) if node_list.__len__() > 0: ## 源Entity -> Neo4j Entity映射计算,获得对应的Entity ID return node_list.first() else: ## 如果Neo4j 暂无当前Entity,则添加新的Node return self.addNode(node) return None ''' 根据uuid查询获得对应的Node. ''' def findNode(self, uuid): this_node = self.graph.nodes.match(self.EntityType).where( ' _.{} = "{}" '.format('uuid', uuid)) if not this_node: return None if this_node.__len__() != 1: print sys.stderr << 'multiple node has same uuid.' return None return this_node.first() ''' 根据 entity_type, key, value 匹配获得对应的 Node. ''' def findNodes(self, entity_type, key, value): this_node = self.graph.nodes.match(entity_type).where( ' _.{} = "{}" '.format(key, value)) return this_node def findNodeOne(self, entity_type, key, value): this_node = self.graph.nodes.match(entity_type).where( ' _.{} = "{}" '.format(key, value)) assert len(this_node) <= 1 return this_node.first() ''' Graph添加新的node, 返回node对象。 ''' def addNode(self, node): self.graph.create(node) return self.findNode(node.__uuid__) ''' 根据开始、结束结点,查询获得对应的边,可能为空;不考虑边关系。 ''' def findRelation(self, start_node, end_node): #return self.graph.relationships.match().first() return self.graph.match_one(nodes=[start_node, end_node]) ''' 根据开始、结束结点以及边关系,查询获得对应的边,可能为空。 ''' def findRelationOne(self, start_node, end_node, r_type): #return self.graph.relationships.match().first() return self.graph.match_one(nodes=[start_node, end_node], r_type=r_type) ''' 删除NodeMatch下所有Node ''' def deleteNodes(self, nodes): try: for n in nodes: self.graph.delete(n) except TypeError: try: self.graph.delete(nodes) except TypeError: print "deleteNodes nodes type error." def displyNodes(self, nodes): for n in nodes: print json.dumps(n, ensure_ascii=False)
print("indicator relationships...") sql = "select * from type_indicator" cursor.execute(sql) indicators = cursor.fetchall() for indicator in indicators: start_node = graph.find_one("task", property_key="id", property_value=indicator[2]) end_node = graph.find_one("indicator", property_key="type", property_value=indicator[3]) match = graph.match_one(start_node=start_node, rel_type="HASINDICATOR", end_node=end_node) if start_node and end_node and not match: gret = graph.create( Relationship(start_node, "HASINDICATOR", end_node, id=indicator[0], id_user=indicator[1], event_stamp=indicator[4])) #print(str.encode(str(gret), 'utf-8')) # import external datasets print("external datasets...") sql = "select * from external_datasets" cursor.execute(sql)