class TwitterGraph(object): '''A class for interfacing with the Neo4j Twitter network database''' # Initial setup and linking into the database def __init__(self, host_port, user, password): '''Makes connection to Neo4j database''' # set up authentication parameters authenticate(host_port, user, password) # connect to authenticated graph database url = 'http://{}/db/data/'.format(host_port) self.graph = Graph(url) try: self.graph.schema.create_uniqueness_constraint('User', 'id') except: #ConstraintViolationException print 'Unique id on Node User already exists' # Functions to add data to the database def add_following(self, user_id, following_ids, rec_count): '''Given a unique user id, adds the relationship for who they follow. Adds a User Node with the id if it doesn't exist.''' user = Node('User', id=user_id) self.graph.merge(user) # important to merge before doing anything rec = 1 + rec_count # preserving the order of the following. 1 = most recent for fid in following_ids: user2 = Node('User', id=fid) self.graph.merge(user2) self.graph.merge(Relationship(user, 'FOLLOWS', user2, rec=rec)) rec += 1 user['following_added'] = True self.graph.push(user) def add_followers(self, user_id, follower_ids, rec_count): '''Given a unique user id, adds the relationship for follows them. Adds a User Node with the id if it doesn't exist.''' user = Node('User', id=user_id) self.graph.merge(user) rec = 1 + rec_count for fid in follower_ids: user2 = Node('User', id=fid) self.graph.merge(user2) self.graph.merge(Relationship(user2, 'FOLLOWS', user, rec=rec)) rec += 1 user['followers_added'] = True self.graph.push(user) def add_user_properties(self, user): '''Given a unique user id, adds properties to the existing user Node''' try: user_id = user.id existing_user = Node('User', id=user_id) clean_prop_dict = self.__clean_user_dict(user.__dict__) self.graph.merge(existing_user) for k, v in clean_prop_dict.iteritems(): existing_user[k] = v # add additional label to verified accounts if clean_prop_dict['verified']: print True existing_user.add_label('Verified') except: # bad user id user_id = user['user_id'] error = user['error'] existing_user = Node('User', id=user_id) self.graph.merge(existing_user) existing_user['screen_name'] = 'INVALID' existing_user['error'] = error print 'Found invalid user id' self.graph.push(existing_user) def __clean_user_dict(self, user_prop_dict): '''Given the ''' keep = ['contributors_enabled', 'created_at', 'default_profile', 'default_profile_image', 'description', 'favourites_count', 'followers_count', 'friends_count', 'geo_enabled', 'id', 'id_str', 'is_translator', 'lang', 'listed_count', 'location', 'name', 'profile_image_url_https', 'protected', 'screen_name', 'statuses_count', 'time_zone', 'utc_offset', 'verified', 'withheld_in_countries', 'withheld_scope'] # only keep the above keys for inserting clean = {k: v for k, v in user_prop_dict.iteritems() if k in keep} image = os.path.splitext(clean['profile_image_url_https'])[0] ext = os.path.splitext(clean['profile_image_url_https'])[1] clean['profile_image_url_https'] = image.rstrip('_normal') + ext # convert date time to string clean['created_at_ord'] = clean['created_at'].toordinal() clean['created_at'] = clean['created_at'].strftime('%Y-%m-%d %H:%M:%S') return clean # Functions to query database def get_nodes_missing_props(self, limit=100): '''Returns the first 100 ids of nodes without user properties''' selector = NodeSelector(self.graph) selected = selector.select('User').where("_.screen_name IS NULL").limit(limit) return [s['id'] for s in selected] def get_nodes_missing_props_follb(self, limit=100): cypherq = """MATCH (n)-[r:FOLLOWS]->(m) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.screen_name) RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels(self, rel='FOLLOWING', limit=1): '''Returns ids missing the follower or following relationships. Valid inputs for rel is FOLLOWING or FOLLOWERS''' selector = NodeSelector(self.graph) if rel == 'FOLLOWING': selected = selector.select('User').where("_.following_added IS NULL").limit(limit) elif rel == 'FOLLOWERS': selected = selector.select('User').where("_.followers_added IS NULL").limit(limit) else: # TO DO: flesh out the exception calling raise Exception return [s['id'] for s in selected] def get_nodes_missing_rels_params(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)-[r:FOLLOWS]->(m:User) WHERE n.followers_count >= 1000 AND NOT EXISTS(n.following_added) AND m.screen_name = 'BernieSanders' RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels_bfriends(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.following_added) RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels_bfriends_step(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.following_added) RETURN n.id LIMIT 500;""" return [i['n.id'] for i in self.graph.run(cypherq).data()[-100:]]
def neo_data_update_trigger(type_: str, data: dict): """ 在更新数据库的同时触发该函数,同步更新本源neo库 Args: type_: data: 该数据或许需要经过一轮处理 Returns: """ try: # cypher = "MATCH a=({s_name:'{}'}) return a".format(data['s_name']) graph = Graph(f"bolt://{NEO_HOST}:7687", password="******") node_match = NodeMatcher(graph) node_iter = node_match.match(type_, s_name=data["s_name"]) node = list(node_iter) if len(node) == 1: node = node[0] node.update(**data) graph.push(node) return True else: return "failed, many data were queried" except Exception as _e: raise Neo4jUpdateFailedException(_e)
def dump_symptoms(): import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../../../") from med_base.storage.neo4j.models import Symptom from med_base.storage.es.models import EntitySymptom from elasticsearch_dsl.connections import connections from py2neo import Graph from conf.settings import NEO4J_URI, ES_HOST graph = Graph(NEO4J_URI) connections.create_connection(hosts=[ES_HOST]) cp = CrawlerSymptoms() for name, describe, source_url in cp.process(): logging.debug("name={}, describe={}".format(name, describe)) entity_symptom = EntitySymptom(name=name, describe=describe, source_url=source_url) entity_symptom.save() symptom_node = Symptom.match(graph).where(name=name).first() if symptom_node: pass else: symptom = Symptom() symptom.name = name symptom.id = str(entity_symptom._id) graph.push(symptom)
class NeoPipeline(object): def __init__(self, neo_uri, neo_username, neo_password): self.neo_uri = neo_uri self.neo_username = neo_username self.neo_password = neo_password @classmethod def from_crawler(cls, crawler): return cls(neo_uri=crawler.settings.get('NEO_URI'), neo_username=crawler.settings.get('NEO_USERNAME'), neo_password=crawler.settings.get('NeO_PASSWORD')) def open_spider(self, spider): self.graph = Graph(self.neo_uri, username=self.neo_username, password=self.neo_password) def process_item(self, item, spider): i = dict(item) art = Article() art.articleId = i['articleId'] art.title = i['title'] art.summary = i['summary'] art.author = i['author'] art.tag = i['tag'] art.url = i['url'] art.date = i['date'] art.star = i['star'] art.score = i['score'] art.views = i['views'] art.comments = i['comments'] art.source = i['source'] self.graph.push(art) return item
def update(cls, experiment_id: str, graph: Graph, neighbours: List[Neighbour] = None, leaf: NearestLeaf = None) -> 'SampleNode': node = cls.get(experiment_id, graph) if neighbours is not None: node.neighbours.clear() for neighbour in neighbours: n = SampleNode() n.experiment_id = neighbour.experiment_id if n.exists(graph): node.neighbours.add(n, distance=neighbour.distance) if leaf is not None: node.lineage.clear() n = LeafNode() n.leaf_id = leaf.leaf_id if n.exists(graph): node.lineage.add(n, distance=leaf.distance) else: raise NotFound graph.push(node) return node
def store_in_neo4j(triple): from py2neo import Graph, Node, Relationship ,NodeSelector graph = Graph('http://52.83.213.55:7474',user ='******',password = '******') # graph = Graph('http://localhost:7474',user = '******',password='******') select = NodeSelector(graph) # 加载entity with open('entity.pkl','rb') as f: entities = pickle.load(f) entities = list(flatten(entities)) # 添加所有实体为结点 for en in entities: node = Node('Entity',name= en) graph.create(node) # 遍历三元组,添加节点的属性,结点间关系等 for en, kw in triple.items(): node_1 = select.select('Entity').where(name = en).first() for item in kw: if item[1] in triple.keys(): node_2 = select.select('Entity').where(name = item[1]).first() relate = Relationship(node_1,item[0],node_2) graph.create(relate) else: node_1[item[0]] = item[1] graph.push(node_1) print('数据存储完毕')
def dump_parts(): import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../../../") from med_base.storage.neo4j.models import Bodypart, Department from conf.settings import NEO4J_URI from py2neo import Graph graph = Graph(NEO4J_URI) cp = CrawlerPart() for dt_name, dt_url, dd_name, dd_url in cp.crawler_bodyparts(): print(dt_name, dt_url, dd_name, dd_url) body_dt = Bodypart() body_dt.name = dt_name body_dt.id = to_uuid(dt_url) body_dd = Bodypart() body_dd.name = dd_name body_dd.id = to_uuid(dd_url) body_dd.partof.add(body_dt) graph.push(body_dd) for dt_name, dt_url, dd_name, dd_url in cp.crawler_departments(): print(dt_name, dt_url, dd_name, dd_url) depart_dt = Department() depart_dt.name = dt_name depart_dt.id = to_uuid(dt_url) depart_dd = Department() depart_dd.name = dd_name depart_dd.id = to_uuid(dd_url) depart_dd.partof.add(depart_dt) graph.push(depart_dd)
def insertDiseaseAndDrug(disease_name, drug_name): graph = Graph(host="52.15.135.11", username="******", password="******") #test if the disease exist disease = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=disease_name, y="Disease").evaluate() if disease == None: disease = Node(label="Disease", name=disease_name) graph.create(disease) #test if the drug exist drug = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=drug_name, y="Drug").evaluate() if drug == None: drug = Node(label="Drug", name=drug_name) graph.create(drug) disease2drug = graph.match_one(start_node=disease, end_node=drug) if disease2drug == None: disease2drug = Relationship(disease, "CALL", drug) disease2drug['count'] = 1 graph.create(disease2drug) else: disease2drug['count'] += 1 graph.push(disease2drug)
def insertSympAndDisease(symptom_name, disease_name): graph = Graph(host="52.15.135.11", username="******", password="******") #test if the symptom exist symptom = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=symptom_name, y="Symptom").evaluate() if symptom == None: symptom = Node(label="Symptom", name=symptom_name) graph.create(symptom) #test if the disease exist disease = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *", x=disease_name, y="Disease").evaluate() if disease == None: disease = Node(label="Disease", name=disease_name) graph.create(disease) symptom2disease = graph.match_one(start_node=symptom, end_node=disease) if symptom2disease == None: symptom2disease = Relationship(symptom, "CALL", disease) symptom2disease['count'] = 1 graph.create(symptom2disease) else: symptom2disease['count'] += 1 graph.push(symptom2disease)
def save_node(self, label, properties_dict, unique=True): ''' create neo4j node, with a label, and properties ''' if unique == True: length, lst = self.exists_node(label, properties_dict['name']) if length > 0: #exists update g = Graph(password=self.password) b = lst[0] g.merge(b) for k, v in properties_dict.items(): b[k] = v #b['age'] = properties_dict['age'] #b['x'] = 8 g.push(b) else: #does not exist, insert new g = Graph(password=self.password) tx = g.begin() a = Node(label, **properties_dict) tx.create(a) tx.commit() else: # allow new duplicate nodes, why??? raise Exception("do not allow duplicate named nodes")
def write_symptom_info(symptom_info): graph = Graph("bolt://localhost:7687", username="******", password='******') node_matcher = NodeMatcher(graph) try: # 症状 if symptom_info.get('症状') is not None: symptom = node_matcher.match("symptom").where( f"_.name = '{symptom_info['症状']}'").first() # 此节点还未创建 if symptom is None: symptom = Node('symptom', name=symptom_info['症状']) graph.create(symptom) # 概述 if symptom_info.get('概述') is not None: print(symptom_info['概述']) symptom['brief'] = symptom_info['概述'] graph.push(symptom) # 病因 if symptom_info.get('病因') is not None: symptom['cause'] = symptom_info['病因'] graph.push(symptom) # 检查 if symptom_info.get('检查') is not None: symptom['check'] = symptom_info['检查'] graph.push(symptom) # 诊断 if symptom_info.get('诊断') is not None: symptom['diagnose'] = symptom_info['诊断'] graph.push(symptom) # 预防 if symptom_info.get('预防') is not None: symptom['prevent'] = symptom_info['预防'] graph.push(symptom) # 可能患有的疾病 if symptom_info.get('可能患有的疾病') is not None: for jib in symptom_info['可能患有的疾病']: disease = node_matcher.match('disease').where( f"_.name = '{jib}'").first() if disease is None: disease = Node('disease', name=jib) symptom_disease = Relationship(symptom, 'symptom_disease', disease) graph.create(symptom_disease) # 常见症状 if symptom_info.get('常见症状') is not None: for zz in symptom_info['常见症状']: r_symptom = node_matcher.match('symptom').where( f"_.name = '{zz}'").first() if r_symptom is None: r_symptom = Node('symptom', name=zz) symptom_r_symptom = Relationship(symptom, 'r_symptom', r_symptom) graph.create(symptom_r_symptom) return True except Exception as e: with open('error.txt', 'a') as f: f.write(f"write_symptom_info:{symptom_info['症状']}\n{e}\n") return False return True
def lambda_handler(event, context): graph = Graph(host=os.environ["NAME_NEO_DOMAIN"], user=os.environ["USER"], password=os.environ["PASSWORD"]) user = Node("User", id=event['id']) graph.merge(user) for key, value in event['datas'].items(): user[key] = value graph.push(user)
def post(self, request): data = request.data graph = Graph("bolt://localhost:7687", auth=("neo4j", "ElectricWizard113")) node = Disease(name=data['name'], mesh_code=data['mesh_code'], uuid=str(uuid.uuid4())) graph.push(node) return HttpResponse(json.dumps(node.to_dict()), content_type='application/json')
def create(cls, model: Model, graph: Graph) -> 'BaseGraphObject': primary_value = getattr(model, cls.__primarykey__) node = cls() setattr(node, node.__primarykey__, primary_value) if node.exists(graph): raise AlreadyExisted graph.push(node) return node
def CreateChemicalEntityAndRelationship(file='data/CTD_chemicals.csv'): nodecount = 0 head = [ "name", "id", "CasRN", "Definition", "ParentIDs", "TreeNumbers", "ParentTreeNumbers", "Synonyms", "DrugBankIDs" ] type = "Chemical" Cgraph = Graph("http://localhost:7474") with open(file, mode='r', encoding='utf-8') as fr: while True: line = fr.readline() if not line: break if not line.startswith("#"): line_list = line.strip('\n').split(',') line_dict = dict(zip(head, line_list)) Parentslist = line_dict["ParentIDs"].strip().split('|') line_dict["ParentIDs"] = Parentslist #create node exitNodelist = list( Cgraph.nodes.match(type, id=line_dict['id'])) if len(exitNodelist) >= 1: exitNode = exitNodelist[0] for ele in line_dict: exitNode[ele] = line_dict[ele] Cgraph.push(exitNode) currentNode = exitNode else: NewNode = Node(type, **line_dict) Cgraph.create(NewNode) currentNode = NewNode #create relationship if line_dict["ParentIDs"]: for ParentEle in line_dict["ParentIDs"]: ParentExitNode = Cgraph.nodes.match( type, id=ParentEle).first() if ParentExitNode: childrelation = Relationship( currentNode, 'father', ParentExitNode) else: NewNodeEnd = Node(type, id=ParentEle) childrelation = Relationship( currentNode, 'father', NewNodeEnd) Cgraph.create(childrelation) nodecount += 1 if nodecount % 100 == 0: print(nodecount)
def create(cls, sample: Sample, graph: Graph) -> 'SampleNode': node = super().create(sample, graph) if sample.nearest_leaf_node: n = LeafNode() n.leaf_id = sample.nearest_leaf_node.leaf_id if n.exists(graph): node.lineage.add(n, distance=sample.nearest_leaf_node.distance) if sample.nearest_neighbours: for neighbour in sample.nearest_neighbours: n = cls() n.experiment_id = neighbour.experiment_id if n.exists(graph): node.neighbours.add(n, distance=neighbour.distance) graph.push(node) return node
def get(self, params): print('connecting') graph = Graph("bolt://localhost:7687", auth=("neo4j", "ElectricWizard113")) print('done') # Create new diagnosis object diag = Diagnosis() diag.name = 'dfgdfgs' diag.evaluation = True disease = Disease.match(graph).where(mesh_code='D003424').first() diag.diag_disease.add(disease) print(disease.to_dict()) # pp.name='testddd' graph.push(diag) aa = [a for a in Diagnosis.match(graph)] for a in aa: print(a.to_dict()) return HttpResponse(json.dumps('kk'), content_type='application/json')
print(key, ":",page['infobox'][key]) except: print("no infobox") indiv_cat = [] for i in range(1,len(page['parsetree'].split("[[Catégorie:"))): cat = page['parsetree'].split("[[Catégorie:")[i].split("]]")[0].split("|*")[0] indiv_cat.append(cat) if cat not in all_cat: all_cat.append(cat) wiki_n["categories"] = indiv_cat print(indiv_cat) wiki_n['wikibase'] = page['wikibase'] print(page['wikibase']) try: graph.push(wiki_n) except: print("cannot push node :", wiki_n) print("**************************************************************************") len(all_keys) len(all_cat) with open(wikipath + "all_infobox_keys.txt" , "w", encoding='utf-8') as file: file.write(str(all_keys)) with open(wikipath + "all_categories.txt" , "w", encoding='utf-8') as file: file.write(str(all_cat)) # %% import wikipedia Summary: results = graph.nodes.match("Wikipedia")
class NetworkGraph: def __init__(self): path = os.path.realpath(ROOT_DIR + '/neo4j_creds.json') with open(path) as f: data = json.load(f) username = data['username'] password = data['password'] self.graph = Graph(host="localhost", username=username, password=password) def add_node_by_name(self, name, age=None, gender=None, node_type="PERSON"): if name == 'USER': node_type = 'user' node = Node(node_type, name=name, age=age, gender=gender) self.graph.create(node) return node def get_node_by_name(self, name): matcher = NodeMatcher(self.graph) node = matcher.match(name=name).first() return node def add_relationship(self, node1, node2, rel_type='KNOWS'): first_node = self.get_node_by_name(node1) second_node = self.get_node_by_name(node2) if not first_node: first_node = self.add_node_by_name(node1) if not second_node: second_node = self.add_node_by_name(node2) self.graph.create(Relationship(first_node, rel_type, second_node)) def add_rel_tuple(self, ent1, ent2): """ Pushes a new central user 'Me' to the graph Gets a username, creats an Me object and pushes it to the graph :param username: string username :return: me object (see ogm pkg) """ # define nodes node1 = Misc() node1.name = ent1 node2 = Misc() node2.name = ent2 # add relationship to nodes node1.related_ent.add(node2) node2.related_ent.add(node1) # save to neo4j self.graph.create(node1) self.graph.create(node2) def search_node_by_name(self, node_name): # replace white spaces _node_name = node_name.replace(" ", "") query = 'MATCH (n) WHERE n.name={node_name} RETURN n;' result = self.graph.run( query, node_name=_node_name, ).data() if result: node = result[0]['n.name'] else: node = None return node def add_me_w_firstname(self, username, age="", gender=""): """ Pushes a new central user 'Me' to the graph Gets a username, creats an Me object and pushes it to the graph :param username: string username :return: me object (see ogm pkg) """ # OGM me = Me() me.firstname = username.title() me.lastname = "" me.age = age me.gender = gender self.graph.push(me) return me def add_me_w_lastname(self, username, age="", gender=""): """ Pushes a new central user 'Me' to the graph Gets a username, creats an Me object and pushes it to the graph :param username: string username :return: me object (see ogm pkg) """ # OGM me = Me() me.firstname = "" me.lastname = username.title() me.age = age me.gender = gender self.graph.push(me) return me def get_me_by_firstname(self, me_name): """ return me object by firstname :param me_name: string with firstname of me :return: me object """ result = self.graph.run('MATCH (n:Me) WHERE n.firstname="' + me_name.title() + '" RETURN n.firstname').data() me = Me() if result: me.firstname = result[0]['n.firstname'] return me else: return None def get_me_by_lastname(self, me_name): """ return me object by firstname :param me_name: string with firstname of me :return: me object """ result = self.graph.run('MATCH (n:Me) WHERE n.lastname="' + me_name.title() + '" RETURN n.lastname').data() me = Me() if result: me.firstname = result[0]['n.lastname'] return me else: return None def add_contact(self, me_name, contactname, relationship): """ adds a new contact to the central user i.e. 'Me' in graph :param me: name of the centraluser object :param contact: string will be converted to contact object :param relationship: string will be converted to object property :return: """ # select central user 'Me' me = self.get_me_by_firstname(me_name) contact = Contact() contact.firstname = contactname relationship = relationships[relationship] if relationship == 'freund': me.friend.add(contact) contact.friend.add(me) elif relationship == 'bruder': me.brother.add(contact) contact.brother.add(me) elif relationship == 'schwester': me.sister.add(contact) contact.sister.add(me) elif relationship == 'mutter': me.mother.add(contact) elif relationship == 'vater': me.father.add(contact) elif relationship == 'sohn': me.son.add(contact) elif relationship == 'tocher': me.daughter.add(contact) #TODO other relationships self.graph.push(me) def search_relationship_by_contactname(self, me_name, contact_name): mename = me_name.replace(" ", "") contactname = contact_name.replace(" ", "") query = 'MATCH (n:Me)-[r]->(c:Contact) WHERE n.firstname={me_name} AND c.firstname={contactname} RETURN type(r);' result = self.graph.run(query, me_name=mename, contactname=contactname).data() if result: relationship = result[0]['type(r)'] else: relationship = None return relationship def search_contactname_by_relationship(self, me_name, relationship): relationship = relationships[relationship] if relationship: result = self.graph.run('MATCH (u:Me)-[:' + relationship + ']->(c:Contact) RETURN c.firstname;', rel=relationship).data() else: return None if result: contactname = result[0]['c.firstname'] else: contactname = None return contactname
class Neo4jOperate(object): def __init__(self, host, user, password): self.graph = Graph(host, auth=(user, password)) self.relationship_matcher = RelationshipMatcher(self.graph) def create_one_relationship(self, start_node={}, target_node={}, relationship=None, **prop): """ 创建两个节点间关系: 若已存在 :param start_node: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: Node or dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relationship: str 关系名 :param prop: 关系属性 :return: True or False """ s_node = self.search_node(start_node) t_node = self.search_node(target_node) if s_node is None or t_node is None: return False relation = self.search_relationship(s_node, t_node, relationship) if relation: return self.update_one_relationship(s_node, t_node, relationship, **prop) return self.create_relation(start_node=s_node, relationship=relationship, target_node=t_node, **prop) def update_one_relationship(self, start_node={}, target_node={}, relation=None, cover=True, **prop): """ 修改两个节点间关系 :param start_node: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: Node or dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relation: 关系名 or Relationship :param cover: 新传入的属性值是否覆盖原值, 默认True :param prop: 关系属性 :return: True or False """ if not isinstance(relation, Relationship): relation = self.search_relationship(start_node, target_node, relation) if relation is None: return False for key, value in dict(prop).items(): if key not in relation or cover is True: relation[key] = value else: relation[key] += value try: self.graph.push(relation) return True except Exception as e: print(e) return False def delete_one_relationship(self, start_node={}, target_node={}, relationship=None, properety=None): """ 删除一条节点间关系, 返回关系上的所有属性 :param start_node: dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relationship: 关系名 :param properety: set 属性集合,若不为空,返回被删除关系的对应属性值: {"weight", "paper",...} :return: 关系属性:{} or False """ relation = self.search_relationship(start_node, target_node, relationship) if relation is None: return True try: # 关系存在 ==> 删除关系,返回关系属性 if properety is not None and len(properety) > 0: back = {} for key in set(properety): if key in relation: back[key] = relation[key] return back self.graph.separate(relation) return True except Exception as e: return False def migrate_relationship(self, source_node={}, self_node={}, target_node={}, r_type=None, **property): """ 断开某节点与当前节点的关系,并将该关系转移到另一节点中, 保存关系上的所有属性 A-B ==> B-C :param source_node: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param self_node: Node or dict :param target_node: Node or dict :param r_type: 关系名 :param property: set, 需要转移的属性集合 :return: True or False """ relation = self.delete_one_relationship(source_node, self_node, r_type, **property) return self.create_one_relationship(target_node, self_node, **relation) def search_relationship(self, start_node={}, target_node={}, relationship=None): """ 查找节点间关系 :param start_node: dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relationship: 关系名 :return: Relationship or None """ # 查找结点 start_node = self.search_node(start_node) if start_node is None: return None target_node = self.search_node(target_node) if target_node is None: return None return self.relationship_matcher.match(nodes=[start_node, target_node], r_type=relationship).first() def search_node(self, search_dict): """ 根据给定参数字典返回节点 :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :return: Node or None """ if type(search_dict) is Node: return search_dict elif type(search_dict) is dict: if "label" in search_dict and "search" in search_dict: node_search = self.graph.nodes.match( search_dict["label"]).where( **search_dict["search"]).first() return node_search else: return None def update_node(self, search_dict, **prop): """ 根据给定参数字典更新节点属性 :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param prop: 节点属性 :return: True or False """ node = self.get_node(search_dict=search_dict, create_if_not_exist=False) if node is None: return False for key, value in prop.items(): node[key] = value try: self.graph.push(node) return True except Exception as e: return False def get_node(self, search_dict, create_if_not_exist=True, **prop): """ 从数据库中搜索节点,若数据库中不存在,创建节点再返回 :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param create_if_not_exist: boolean型, 若查询不到节点则 创建, 默认 True :param prop: 节点属性 :return: Node or None ==> 表示创建节点失败 """ node = self.search_node(search_dict) if node is None and create_if_not_exist: return self.create_node(search_dict=search_dict, **prop) # else: # # print("节点已存在") # Logger.info("node exist") return node def create_node(self, search_dict, **prop): node = Node(search_dict["label"], **prop) try: self.graph.create(node) return self.search_node(search_dict) except Exception as e: # print("创建节点失败,%s" % e) return None def create_relation(self, start_node, relationship, target_node, **prop): relation = Relationship(start_node, relationship, target_node) for key, value in dict(prop).items(): relation[key] = value try: self.graph.create(relation) return True except Exception as e: # print("创建关系失败:%s" % e) return False return False def run(self, cql): return self.graph.run(cql).data() def run_without_data(self, cql): return self.graph.run(cql)
node_2_call_node_1 = Relationship(test_node_2, 'CALL', test_node_1) node_2_call_node_1['count'] = 2 test_graph.create(node_1_call_node_2) test_graph.create(node_2_call_node_1) # 如以上代码,分别建立了test_node_1指向test_node_2和test_node_2指向test_node_1两条关系,关系的类型为"CALL",两条关系都有属性count,且值为1。 # 在这里有必要提一下,如果建立关系的时候,起始节点或者结束节点不存在,则在建立关系的同时建立这个节点。 # 四、节点/关系的属性赋值以及属性值的更新 # 节点和关系的属性初始赋值在前面节点和关系的建立的时候已经有了相应的代码,在这里主要讲述一下怎么更新一个节点/关系的属性值。 # 我们以关系建立里的 node_1_call_node_2 为例,让它的count加1,再更新到图数据库里面。 node_1_call_node_2['count'] += 1 test_graph.push(node_1_call_node_2) # 更新属性值就使用push函数来进行更新即可。 # 五、通过属性值来查找节点和关系(find,find_one) # 通过find和find_one函数,可以根据类型和属性、属性值来查找节点和关系。 find_code_1 = test_graph.find_one( label="Person", property_key="name", property_value="test_node_1" ) print(find_code_1['name']) # find和find_one的区别在于: # find_one的返回结果是一个具体的节点/关系,可以直接查看它的属性和值。如果没有这个节点/关系,返回None。 # find查找的结果是一个游标,可以通过循环取到所找到的所有节点/关系。
if to_node: from_node.related_exam.add(to_node) elif rel_type == "related_drug": to_node = get_node_by_id("drug", to_id) if to_node: from_node.related_drug.add(to_node) elif rel_type == "related_operation": to_node = get_node_by_id("operation", to_id) if to_node: from_node.related_operation.add(to_node) elif from_type == "exam": from_node = get_node_by_id("exam", from_id) if rel_type == "related_bodypart": to_node = get_node_by_id("bodypart", to_id) if to_node: from_node.related_bodypart.add(to_node) elif from_type == "operation": from_node = get_node_by_id("operation", from_id) if rel_type == "related_depart": to_node = get_node_by_id("depart", to_id) if to_node: from_node.related_depart.add(to_node) elif rel_type == "related_bodypart": to_node = get_node_by_id("bodypart", to_id) if to_node: from_node.related_bodypart.add(to_node) graph.push(from_node)
class Graph(object): def __init__(self, neo4j_uri): self.graph = NeoGraph(neo4j_uri) self.nodes = Nodes(self.graph) self.links = Links(self.graph) def create_user(self, args): node = self.nodes.find("User", args["username"]) if not node: passhash = Authenticate.hashgen(args["username"], args["password"]) properties = dict( node_id=args["username"], name=args["name"], city=args["city"], passhash=passhash ) node = Node("User", **properties) self.graph.create(node) return node, True return node, False def user_rank(self, args, node_type): # success = False # errors = [] user = self.nodes.find("User", args["user_id"]) if not user: return False, "invalid user_id" node = self.nodes.find(node_type, args["node_id"]) if not node: return False, "invalid node_id" link = self.links.find(user, node, "RANKS") if link and ("issue_id" not in args or link.properties["issue_id"] == args["issue_id"]): link.properties["rank"] = args["rank"] link.push() else: properties = {"rank": args["rank"]} if "issue_id" in args: properties["issue_id"] = args["issue_id"] self.graph.create(Relationship(user, "RANKS", node, **properties)) return True, "" def user_map(self, args, src_node, dst_node): # TODO refactor this into smaller units # success = False errors = [] # retrieve nodes and existing links user = self.nodes.find("User", args["user_id"]) if not user: errors.append("invalid user_id") src = self.nodes.find(src_node, args["src_id"]) if not src: errors.append("invalid src_id") dst = self.nodes.find(dst_node, args["dst_id"]) if not dst: errors.append("invalid dst_id") src_link = self.links.find(user, src, "RANKS") if not src_link: errors.append("user has not ranked src_node") dst_link = self.links.find(user, dst, "RANKS") if not dst_link: errors.append("user has not ranked dst_node") if errors: return False, ", ".join(errors) src_rank = src_link.properties["rank"] dst_rank = dst_link.properties["rank"] # fetch map node or create if it doesn't exist map_id = "{0}-{1}".format(args["src_id"], args["dst_id"]) map_node = self.nodes.find("Map", map_id) if not map_node: properties = dict(node_id=map_id) map_node = Node("Map", **properties) self.graph.create(map_node) self.graph.create(Relationship(src, "MAPS", map_node, **{})) self.graph.create(Relationship(map_node, "MAPS", dst, **{})) user_map_link = self.links.find(user, map_node, "MAPS") if user_map_link: # link already exists, update strength user_map_link.properties["strength"] = args["strength"] user_map_link.properties["src_rank"] = src_rank user_map_link.properties["dst_rank"] = dst_rank self.graph.push() else: # create new link from user to map node properties = dict( strength=args["strength"], src_rank=src_rank, dst_rank=dst_rank ) self.graph.create(Relationship(user, "MAPS", map_node, **properties)) return True, "" def get_summary(self, issue_id, node_type): issue = self.nodes.find("Issue", issue_id) if not issue: return False, "issue <{0}> does not exist".format(issue_id), [] # TODO only grab nodes that are connected to issue node cypher = self.graph.cypher query = """ MATCH (u:User)-[r:RANKS]-(v:`{0}`) RETURN r.rank AS rank, v.node_id AS node_id, count(u.node_id) AS count ORDER BY node_id, rank """.format(node_type) results = cypher.execute(query) nodes = {} invalid = [] for row in results: if row.node_id not in nodes: nodes[row.node_id] = [0, 0, 0, 0, 0] if row.rank in range(-2, 3): nodes[row.node_id][row.rank + 2] = row.count else: invalid.append(row.rank) return True, nodes, invalid
# encoding=utf8 #用于统一命令参数格式(hadoop) from py2neo import Graph, Node, Relationship graph = Graph("http://localhost:7474", auth=("neo4j", "12345")) com_node = graph.nodes.match("命令", platform='hadoop') for ele in com_node: parameter = ele['parameter'] if parameter: parameter2 = str(parameter).replace('| ', '||| ') print(parameter2) ele.update({'parameter': parameter2}) graph.push(ele)
class Command(BaseCommand): help = 'port group data from sql server to neo4j.' def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) self._sql_server_conn = pymssql.connect(server='SX-DEV') self._init_graph() def handle(self, *args, **options): self._start_import() def _init_graph(self): self._graph = Graph(host=settings.NEO4J['HOST'], http_port=settings.NEO4J['PORT'], user=settings.NEO4J['USER'], password=settings.NEO4J['PWD']) def _start_import(self): self.stdout.write('Start to migrate data from sql server to neo4j') group_db, person_db = self._get_databases() # create all the group nodes for db in group_db: for table in self._get_db_tables(db): self._create_group(db, table) # create all group users nodes and build relations for db in person_db: for table in self._get_db_tables(db): self._create_person(db, table) self._close_mssql_conn() self.stdout.write( self.style.SUCCESS( 'Successfully imported all data to neo4j server')) def _close_mssql_conn(self): self._sql_server_conn.close() def _get_databases(self): cursor = self._sql_server_conn.cursor() cursor.execute('SELECT name FROM sys.databases;') dbs = cursor.fetchall() group_db = [] person_db = [] for db in dbs: db_name = db[0] if 'GroupData' in db_name: person_db.append(db_name) elif 'QunInfo' in db_name: group_db.append(db_name) return group_db, person_db def _get_db_tables(self, db_name): cursor = self._sql_server_conn.cursor() cursor.execute( "SELECT TABLE_NAME FROM %s.INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';" % db_name) return [ tb[0] for tb in cursor.fetchall() if 'QunList' in tb[0] or 'Group' in tb[0] ] def _create_group(self, db_name, table_name, start_id=0): curr_id = start_id cursor = self._sql_server_conn.cursor() cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' % (db_name, table_name, start_id)) total = cursor.fetchall()[0][0] cursor = self._sql_server_conn.cursor() cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' % (db_name, table_name, start_id)) pbar = tqdm(desc='Creating Group Nodes from [%s.%s]' % (db_name, table_name), total=total) try: g = cursor.fetchone() while g: curr_id = g[0] group = Group() group.number = g[1] group.mastqq = g[2] group.date = g[3] group.title = g[4] group.groupclass = g[5] group.intro = g[6] self._graph.merge(group) pbar.update(1) g = cursor.fetchone() except: print('Catch an Exception, resume group creating from id: %d' % (curr_id - 1)) pbar.close() self._init_graph() self._create_group(db_name, table_name, curr_id - 1) pbar.close() def _create_person(self, db_name, table_name, start_id=0): curr_id = start_id cursor = self._sql_server_conn.cursor() cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' % (db_name, table_name, start_id)) total = cursor.fetchall()[0][0] cursor = self._sql_server_conn.cursor() cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' % (db_name, table_name, start_id)) pbar = tqdm(desc='Creating Person Nodes and Relations from [%s.%s]' % (db_name, table_name), total=total) try: p = cursor.fetchone() while p: curr_id = p[0] person = Person() person.qq = p[1] person.nick = p[2] person.age = p[3] person.gender = p[4] person.auth = p[5] group_number = p[6] # get group node group = Group.select(self._graph, group_number).first() if group: # build relations person.groups.add(group) group.members.add(person) # update group node self._graph.push(group) self._graph.merge(person) pbar.update(1) p = cursor.fetchone() except: print('Catch an Exception, resume person creating from id: %d' % (curr_id - 1)) pbar.close() self._init_graph() self._create_person(db_name, table_name, curr_id - 1) pbar.close()
entity_out.comment = e_out.comment a = pyproms.ProvActivity('NEXIS processing', startedAtTime=startedAtTime, endedAtTime=endedAtTime, wasAssociatedWith=rs, used_entities=[e1, e2], generated_entities=[e_out]) actity_a = Activity() actity_a.uri = a.uri actity_a.startedAtTime = a.startedAtTime actity_a.endedAtTime = a.endedAtTime actity_a.wasAssociatedWith = a.wasAssociatedWith actity_a.wasInformedBy = a.wasInformedBy gh.push(actity_a) gh.push(entity_out) gh.push(entity_2) gh.push(entity_) gh.push(agent_) # Report generation r = pyproms.PromsExternalReport( 'NEXIS Report', wasReportedBy=pyproms.PromsReportingSystem('Fake RS'), nativeId='NEXIS run #34', reportActivity=a, generatedAtTime=endedAtTime) #time.ctime()) with open('report.1908.ttl', 'wb') as f: f.write(r.get_graph().serialize(format='turtle'))
graph.schema.create_uniqueness_constraint("Album", "slug") # graph.schema.create_uniqueness_constraint("Word", "slug") # graph.schema.create_uniqueness_constraint("Genre", "slug") for i,r in df.iterrows(): s_id = r['spotify_id'] cover = r['albumcover'] artist = r['artist_column'] album = r['album_column'] slug = r['slug_column'] print [album] album = Node('Album', name = album, artist = artist, albumcover = cover, s_id = s_id, slug = slugify(slug)) graph.create(album) graph.push() if pd.Series(r['genre_column']).any(): for each in r['genre_column']: genre = graph.find_one('Genre', 'slug', slugify(each[0])) if not genre: genre = Node('Genre', name=each[0], slug=slugify(each[0])) rel = Relationship(album, 'Performs', genre, dist=each[1]) graph.create(rel) for value in zip(r.index[5:],r[5:]): if value[1] != 1: word = graph.find_one('Word', 'slug', slugify(value[0])) if not word: word = Node('Word', name=value[0], slug=slugify(value[0])) rel = Relationship(word, 'Describes', album, dist=value[1]) graph.create(rel)
# 创建节点(均可直接赋值变量) a = Node('User', name='yaim') #第1个参数为节点类型,第2个参数为节点属性和值 b = Node('User', name='fyl') # 创建关系 r = Relationship(a, 'SAME', b) # 将节点和关系加入到数据库 s = a | b | r graph.create(s) #查询节点 print(graph.nodes[0]) #根据节点id,返回节点 print(graph.nodes.get(0)) #同上 print(list(graph.nodes.match('User'))) #根据条件,返回节点列表 print(graph.nodes.match('User', name='yaim').first()) #根据条件,返回第1个节点 # 查询关系 rel_matcher = RelationshipMatcher(graph) rel_all = list(rel_matcher.match()) #获取所有关系,返回列表 rel_this = list(rel_matcher.match(r_type='SAME')) #获取某一关系,返回列表 # 删除节点 node_del = graph.nodes.match('User', name='temp').first() #先查询到某一个节点 graph.delete(node_del) #再删除 # ***删除关系 rel_del = rel_matcher.match(r_type='SAME').first() #先查询到某一个关系 graph.delete(rel_del) #再删除,*连带删除节点?* # 更新节点 node_update = graph.nodes.match('User', name='yaim').first() #先查询到某一个节点 node_update['name'] = 'yaim_new' #更新该节点某一属性值 graph.push(node_update) #提交更新 # ***更新关系
__author__ = "Andrei" from py2neo import Graph from py2neo import Node, Relationship graph = Graph("http://*****:*****@localhost:7474/db/data") alice = Node("Person", name="Alice") bob = Node("Person", name="Bob") alice_knows_bob = Relationship(alice, "KNOWS", "bob") graph.create(alice_knows_bob) alice.properties["age"] = 33 bob.properties["age"] = 44 graph.push(alice, bob)
class Graph(object): def __init__(self, neo4j_uri): self.graph = NeoGraph(neo4j_uri) self.nodes = Nodes(self.graph) self.links = Links(self.graph) def execute_raw(self, cqlfile): cypher = self.graph.cypher with open(cqlfile, 'r') as query: return cypher.execute(query.read()) return [] def create_user(self, args): node = self.nodes.find("User", args["username"]) if not node: passhash = Authenticate.hashgen(args["username"], args["password"]) properties = dict( node_id=args["username"], name=args["name"], city=args["city"], passhash=passhash ) node = Node("User", **properties) self.graph.create(node) return node, True return node, False def create_issue_nodes( self, parent, names, node_type, link_type="HAS", link_prop={}): # support function for create_issue # create nodes of 1 type (value/objective/policy) # and link those to the sourceNode, with specified linkType and properties nodes = [] for name in names: properties = dict( node_id=str(uuid.uuid4()), name=name ) node = Node(node_type, **properties) self.graph.create(node) self.graph.create(Relationship(parent, link_type, node, **link_prop)) nodes.append(node) return nodes def create_issue(self, args): # create a new issue Node # assign a random node_id using python uuid module # below try uuid4, uuid1 works as well issue_properties = dict( node_id=str(uuid.uuid4()), name=args["issue_name"], desc=args["desc"] ) issue_node = Node("Issue", **issue_properties) self.graph.create(issue_node) # create new nodes and links for values/objectives/policies # associated with the new issue self.create_issue_nodes(issue_node, args["values"], "Value") self.create_issue_nodes(issue_node, args["objectives"], "Objective") self.create_issue_nodes(issue_node, args["policies"], "Policy") return issue_properties["node_id"] def user_rank(self, args, node_type): # success = False # errors = [] user = self.nodes.find("User", args["user_id"]) if not user: return False, "invalid user_id" node = self.nodes.find(node_type, args["node_id"]) if not node: return False, "invalid node_id" link = self.links.find(user, node, "RANKS") if link: link.properties["rank"] = args["rank"] link.push() else: properties = {"rank": args["rank"]} if "issue_id" in args: properties["issue_id"] = args["issue_id"] self.graph.create(Relationship(user, "RANKS", node, **properties)) return True, "" def user_map(self, args, src_node, dst_node): # TODO refactor this into smaller units # success = False errors = [] # retrieve nodes and existing links user = self.nodes.find("User", args["user_id"]) if not user: errors.append("invalid user_id") src = self.nodes.find(src_node, args["src_id"]) if not src: errors.append("invalid src_id") dst = self.nodes.find(dst_node, args["dst_id"]) if not dst: errors.append("invalid dst_id") src_link = self.links.find(user, src, "RANKS") if not src_link: errors.append("user has not ranked src_node") dst_link = self.links.find(user, dst, "RANKS") if not dst_link: errors.append("user has not ranked dst_node") if errors: return False, ", ".join(errors) src_rank = src_link.properties["rank"] dst_rank = dst_link.properties["rank"] # fetch map node or create if it doesn't exist map_id = "{0}-{1}".format(args["src_id"], args["dst_id"]) map_node = self.nodes.find("Map", map_id) if not map_node: properties = dict(node_id=map_id) map_node = Node("Map", **properties) self.graph.create(map_node) self.graph.create(Relationship(src, "MAPS", map_node, **{})) self.graph.create(Relationship(map_node, "MAPS", dst, **{})) user_map_link = self.links.find(user, map_node, "MAPS") if user_map_link: # link already exists, update strength user_map_link.properties["strength"] = args["strength"] user_map_link.properties["src_rank"] = src_rank user_map_link.properties["dst_rank"] = dst_rank self.graph.push() else: # create new link from user to map node properties = dict( strength=args["strength"], src_rank=src_rank, dst_rank=dst_rank ) self.graph.create(Relationship(user, "MAPS", map_node, **properties)) return True, "" def get_summary(self, issue_id, node_type): issue = self.nodes.find("Issue", issue_id) if not issue: return False, "issue <{0}> does not exist".format(issue_id), [] # TODO only grab nodes that are connected to issue node cypher = self.graph.cypher query = """ MATCH (u:User)-[r:RANKS]-(v:`{0}`) RETURN r.rank AS rank, v.node_id AS node_id, v.name AS name, count(u.node_id) AS count ORDER BY node_id, rank """.format(node_type) results = cypher.execute(query) nodes = {} invalid = [] for row in results: if row.node_id not in nodes: nodes[row.node_id] = dict(name=row.name, data=[0, 0, 0, 0, 0]) if row.rank in range(-2, 3): nodes[row.node_id]["data"][row.rank + 2] = row.count else: invalid.append(row.rank) return True, nodes, invalid
n['p'][ 'int_ratio2'] = int_ratio2 # ratio of numeric matches in attribute 2 n['p'][ 'str_ratio2'] = str_ratio2 # ratio of string matches in attribute 2 n['p'][ 'date_ratio2'] = date_ratio2 # ratio of date matches in attribute 2 n['p'][ 'no_unique_values1'] = no_unique_values1 # number of unique values in attribute 1 n['p'][ 'no_unique_values2'] = no_unique_values2 # number of unique values in attribute 2 n['p'][ 'top_value1'] = top_value1 # most frequently occuring value in attribute 1 n['p'][ 'top_value2'] = top_value2 # most frequently occuring value in attribute 2 n['p']['values_update_timestamp'] = get_timestamp() graph.push(n['p']) newly_computed_count += 1 print() print() print('NEWLY CALCULATED') print('--------------------------------------------') print('Attribute 1: ' + facet1) print('Attribute 2:' + facet2) print('--------------------------------------------') print('Exact Score:', exact_score) print('Type Match:', type_match) print('Magnitude Difference:', magnitude_difference) print('Jaro Score:', jaro_score) print() print('No. of missing pairs so far: ', missing_count)
len(graph.nodes.match("Website")) len(graph.relationships.match()) ******************************************************************************** # This imports in db data from 202005Websites01_D0.csv WebD0_df=pd.read_csv("C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\HypheExport20200520\\202005Websites01_D0.csv") for index, row in WebD0_df.iterrows(): node = graph.nodes.match(D0_id = str(row['ID'])).first() try: node['D0_home_page'] = row['HOME PAGE'] node['D0_start_pages'] = row['START PAGES'] graph.push(node) except: print(row['ID']) # This imports crawl data : Crawl_D0_df = pd.read_csv("C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\HypheExport20200520\\202005Websites01_D0_crawls.csv") for index, row in Crawl_D0_df.iterrows(): node = graph.nodes.match(D0_id = str(row['webentity_id'])).first() try: node['D0_max_depth']=row['max_depth'] node['D0_nb_pages']=row['nb_pages'] node['D0_nb_crawled_pages']=row['nb_crawled_pages'] node['D0_nb_pages_indexed']=row['nb_pages_indexed'] node['D0_nb_unindexed_pages']=row['nb_unindexed_pages'] node['D0_nb_links']=row['nb_links']
customers = rds.smembers(key) c_s_relatiions[key.split(':')[2]] = customers ''' sid is the key of c_s_relatiions, for example S0000003 ''' selector = NodeSelector(graph) for sid in c_s_relatiions: c_set = c_s_relatiions[sid] # if sid != 'S0000003': # continue _snode = selector.select('Servicer', servicer_id=sid).first() _node = Node('Servicer', servicer_id=sid) _servicer = rds.hgetall('qxy:servicer:' + sid) if _snode: _snode.update(_servicer) graph.push(_snode) else: _node.update(_servicer) graph.create(_node) _snode = _node for c in c_set: # if c != '13551472168': # continue _cnode = selector.select('Customer', pid=c).first() if not _cnode: _node = Node('Customer', pid=c) graph.create(_node) _cnode = _node _msg_keys = [] # msg:sid:pid:*
class Database(): """Manage Database. 管理数据库。 It support python command line parameter processing of relational database and graph database. You can view all the features by 'python xxx.py -h'. 支持关系数据库和图形数据库的python命令行参数处理。 可以通过'python xxx.py -h'查看所有功能。 Public attributes: - rdb: Relational database. 关系数据库。 - graph: Graph database. 图数据库。 """ def __init__(self, password="******", userid="userid", is_admin=True): self.is_admin = is_admin self.rdb = None self.graph = Graph("http://localhost:7474/db/data", password=password) self.selector = NodeSelector(self.graph) # DeprecationWarning: Graph.find_one is deprecated, use NodeSelector instead. 2017-5-18 # self.gconfig = self.graph.find_one("User", "userid", userid) # 用法1:subgraph = selector.select("Label", property=value) # 用法2:subgraph = selector.select("Person").where("_.name =~ 'J.*'", "1960 <= _.born < 1970") self.gconfig = self.selector.select("User", userid=userid).first() self.usage = "usage: python %prog [options] arg" self.version = "%prog 1.0" self.parser = OptionParser(usage=self.usage, version=self.version) self.parser.add_option("-v", "--verbose", action="store_true", dest="verbose") self.parser.add_option("-q", "--quiet", action="store_false", dest="verbose") self.parser.add_option("-b", "--batch", dest="batch", action="store_true", \ help="batch processing of graph database") self.parser.add_option("-f", "--file", dest="filename", \ help="read data from filename") self.parser.add_option("-p", "--path", dest="filepath", \ help="read data from filepath") self.parser.add_option("-a", "--add", dest="add", \ help="add subgraph to graph database") self.parser.add_option("-d", "--delete", dest="delete", \ help="delete subgraph of graph database") self.parser.add_option("-e", "--edit", dest="edit", \ help="edit subgraph of graph database") self.parser.add_option("-s", "--search", dest="search", \ help="search subgraph of graph database") (self.options, self.args) = self.parser.parse_args() # if len(self.args) == 0: # self.parser.error("incorrect number of arguments") if self.options.verbose: print("reading %s..." % self.options.filename) if self.options.delete: for label in self.args: self.delete(pattern=self.options.delete, label=label) def delete(self, pattern="n", label=None): """Batch delete data or subgraph in database. 在数据库中批量删除数据或者子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ if pattern == "all": self.graph.delete_all() elif pattern == "n": self.graph.run("MATCH(n:" + label + ") DETACH DELETE n") elif pattern == "r": self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r") elif pattern == "nr": self.graph.run("MATCH (n)<-[r:" + label + "]-(m) DETACH DELETE r DELETE n") elif pattern == "rm": self.graph.run("MATCH (n)-[r:" + label + "]->(m) DETACH DELETE r DELETE m") elif pattern == "nrm": self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r DELETE n DELETE m") def reset(self, pattern="n", label=None, filename=None): """Reset data of label in database. 重置数据库子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ assert filename is not None, "filename can not be None." self.delete(pattern="n", label="NluCell") print("Delete successfully!") if os.path.exists(filename): self.handle_excel(filename) else: print("You can set 'filename=<filepath>' when you call 'Database.reset.'") print("Reset successfully!") def reset_ts(self, pattern="n", label="TestStandard", filename=None): """Reset data of label in database. 重置数据库子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ assert filename is not None, "filename can not be None." self.delete(pattern="n", label=label) print("Delete test standard successfully!") if os.path.exists(filename): self.handle_ts(filename) else: print("You can set 'filename=<filepath>' when you call 'Database.reset.'") print("Reset test standard successfully!") def add_qa(self, label="NluCell", name=None, content=None, topic="", \ behavior="", parameter="", url="", tag="", keywords="", api="", txt="", \ img="", chart="", delimiter=None): """ Add qa node in graph. """ assert name is not None, "name must be string." assert content is not None, "content must be string." questions = name.split(delimiter) for question in questions: if question: # 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对 tag = get_tag(question, self.gconfig) node = Node(label, name=question, content=content, topic=topic, \ behavior=behavior, parameter=parameter, url=url, tag=tag, \ keywords=keywords, api=api, txt=txt, img=img, chart=chart, hot="0") self.graph.create(node) def add_ts(self, label="TestStandard", question=None, content=None, context="", \ behavior="", parameter="", url=""): """ Add test standard node in graph. """ assert question is not None, "question must be string." assert content is not None, "content must be string." for item in question.split(): if item: # 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对 node = Node(label, question=item, content=content, context=context, \ behavior=behavior, parameter=parameter, url=url) self.graph.create(node) def handle_ts(self, filename=None, custom_sheets=None): """Processing data of test standard. """ assert filename is not None, "filename can not be None." data = read_excel(filename) data_sheets = data.sheet_names() if custom_sheets: sheet_names = list(set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets for sheet_name in sheet_names: # 可自定义要导入的子表格 table = data.sheet_by_name(sheet_name) # 1.Select specified table # table = data.sheet_by_index(0) if data: # 2.Select specified column col_format = ['A', 'B', 'C', 'D', 'E', 'F'] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头 for i in range(2, nrows): question = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value context = table.cell(i, col_index[2]).value behavior = table.cell(i, col_index[3]).value parameter = table.cell(i, col_index[4]).value url = table.cell(i, col_index[5]).value self.add_ts(question=question, content=content, context=context, \ behavior=behavior, parameter=parameter, url=url) except Exception as error: print('Error: %s' %error) return None else: print('Error! Data of %s is empty!' %sheet_name) return None def handle_excel(self, filename=None, custom_sheets=None): """Processing data of excel. """ assert filename is not None, "filename can not be None" data = read_excel(filename) data_sheets = data.sheet_names() if custom_sheets: sheet_names = list(set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets for sheet_name in sheet_names: # 可自定义要导入的子表格 table = data.sheet_by_name(sheet_name) topics = [] # 1.Select specified table # table = data.sheet_by_index(0) if data: # 2.Select specified column col_format = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M'] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头 for i in range(2, nrows): name = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value # TODO 确定用户可以自定义哪些内容 topic = table.cell(i, col_index[2]).value if self.is_admin else "user_chat" behavior = table.cell(i, col_index[3]).value parameter = table.cell(i, col_index[4]).value url = table.cell(i, col_index[5]).value tag = table.cell(i, col_index[6]).value keywords = table.cell(i, col_index[7]).value api = table.cell(i, col_index[8]).value txt = table.cell(i, col_index[9]).value img = table.cell(i, col_index[10]).value chart = table.cell(i, col_index[11]).value # hot = 0 table.cell(i, col_index[12]).value # 3.Your processing function of excel data here self.add_qa(name=name, content=content, topic=topic, \ behavior=behavior, parameter=parameter, url=url, tag=tag, \ keywords=keywords, api=api, txt=txt, img=img, chart=chart, \ delimiter="|") # 添加到场景标签列表 topics.append(topic) except Exception as error: print('Error: %s' %error) return None else: print('Error! Data of %s is empty!' %sheet_name) return None # Modify in 2017.4.28 # 若子表格名字不存在,新建配置子图,否则只修改topic属性 # DeprecationWarning: Graph.find_one is deprecated, use NodeSelector instead. 2017-5-18 # config_node = self.graph.find_one("Config", "name", sheet_name) config_node = self.selector.select("Config", name=sheet_name).first() if not config_node: self.graph.run('MATCH (user:User {userid: "' + self.gconfig["userid"] + \ '"})\nCREATE (config:Config {name: "' + sheet_name + '", topic: "' + \ ",".join(set(topics)) + '"})\nCREATE (user)-[:has {bselected: 1, available: 1}]->(config)') else: alltopics = config_node["topic"].split(",") alltopics.extend(topics) config_node["topic"] = ",".join(set(alltopics)) self.graph.push(config_node) def handle_txt(self, filename=None): """ Processing text file to generate subgraph. """ assert filename is not None, "filename can not be None!" with open(filename, encoding="UTF-8") as file: question = file.readline().rstrip() while question: answer = file.readline().rstrip() print("question: " + question) print("answer: " + answer) self.add_qa(name=question, content=answer, delimiter="|") question = file.readline().rstrip() def register_subgraph(self, *, label="Config", name=None, topic=None): """注册子知识库 """ assert name is not None, "Subgraph name can not be None!" assert topic is not None, "Subgraph topic can not be None!" subgraph = self.selector.select(label, name=name).first() if subgraph: topics = subgraph["topic"].split(",") topics.extend(topic.split(",")) subgraph["topic"] = ",".join(set(topics)) self.graph.push(subgraph) else: node = Node(label, name=name, topic=topic) self.graph.create(node) def register_user(self, *, label="User", profile=None): """注册用户 """ userid = input("\n欢迎注册!请输入userid: ") while not userid: userid = input("userid不能为空!请输入userid: ") while self.graph.run("MATCH (user:User {userid: '" + userid + "'}) RETURN user").data(): userid = input("用户已存在!请输入新的userid: ") username = input("username: "******"robotname: ") robotage = input("robotage: ") robotgender = input("robotgender: ") mother = input("mother: ") father = input("father: ") companyname = input("companyname: ") companytype = input("companytype: ") servicename = input("servicename: ") director = input("director: ") address = input("address: ") province = input("province: ") city = input("city: ") node = Node(label, userid=userid, username=username, robotname=robotname, \ robotage=robotage, robotgender=robotgender, mother=mother, father=father, \ companyname=companyname, companytype=companytype, servicename=servicename, \ director=director, address=address, province=province, city=city) self.graph.create(node) print("注册成功!") # 设置知识库权限 subgraph_names = [item["name"] for item in self.selector.select("Config")] print("可配置知识库列表:", subgraph_names) for name in subgraph_names: self.manage_user(userid=userid, name=name) def manage_user(self, *, userid=None, name=None): """管理用户 """ assert userid is not None, "Userid can not be None!" assert name is not None, "Subgraph name can not be None!" user = self.selector.select("User", userid=userid).first() if not user: print("用户不存在,建议您先注册!") return subgraph = self.selector.select("Config", name=name).first() if not subgraph: print("知识库不存在,建议您先注册!") return print("\n待配置知识库:", name) bselected = input("是否选择 [1/0]: ") if not bselected: bselected = "1" available = input("是否可用 [1/0]: ") if not available: available = "1" set_string = "MATCH (user:User {userid: '" + userid + "'}), (subgraph:Config {name: '" \ + name + "'}) CREATE UNIQUE (user)-[r:has]->(subgraph) SET r.bselected=" \ + bselected + ", r.available=" + available self.graph.run(set_string)
for row in f.readlines(): row = row.strip() rowDict = row.split(':') bingming.append(rowDict[1]) dingyi = [] with open("简介.txt", encoding="utf-8") as f: for row in f.readlines(): row = row.strip() dingyi.append(row) bingyibingji = [] with open("病因病机.txt", encoding="utf-8") as f: for row in f.readlines(): row = row.strip() bingyibingji.append(row) bingzhengtezheng = [] with open("临床表现.txt", encoding="utf-8") as f: for row in f.readlines(): row = row.strip() bingzhengtezheng.append(row) matcher = NodeMatcher(graph) for i in range(0, 44): n = matcher.match("皮肤病", 皮肤病名称=bingming[i]) for node in n: #print(dict(node)['皮肤病名称']) node['皮肤病定义'] = dingyi[i] node['皮肤病病因病理'] = bingyibingji[i] node['皮肤病症状特点'] = bingzhengtezheng[i] graph.push(node)
class DataBase: def __init__(self): py2neo.authenticate("localhost:7474", "neo4j", "st1215") self.graph = Graph("http://localhost:7474/db/data/") def get_all_news_from(self, site): # news=set() all_news = self.graph.run( 'MATCH (s:Site)-[:PUBLICOU]-(n:News)-[:E]-(t:Tipo) WHERE s.name="' + site + '" RETURN n,t').data() dataSet = list() for n in all_news: dataSet.append( (n['n']['title'], removerAcentosECaracteresEspeciais(n['n']['content']), n['t']['description'])) return dataSet def get_all_news_from_no_class(self, site): all_news = self.graph.run( 'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE s.name="' + site + '" RETURN n').data() dataSet = list() for n in all_news: dataSet.append( (n['n']['title'], removerAcentosECaracteresEspeciais(n['n']['content']), '')) return dataSet def get_news_by_title(self, title): all_news = self.graph.run( 'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE n.title="' + title + '" RETURN n').data() news = News() for n in all_news: news.title = n['n']['title'] news.url = news.title = n['n']['url'] return news def get_all_data_set(self, sites): dataSet = list() for s in sites: dataSet.extend(self.get_all_news_from(s)) return dataSet # def get_queue(self, site_url): # queue=set() # for s in SiteQueue.select(self.graph).where(site=site_url): # queue.add(s.page) # return queue # # def save_queue(self, site, page): # queue=SiteQueue() # queue.site=site # queue.page=page # self.graph.push(queue) def get_site(self, name): sites = Site.select(self.graph).where(name=name) for site in sites: return site def get_clazz(self, name): tipos = Tipo.select(self.graph).where(description=name) for tipo in tipos: return tipo def save_site(self, site_name, url): site = Site() site.name = site_name site.url = url self.graph.push(site) def save_news(self, site, url, title, sub_title, content, tipo): s = self.get_site(site) t = self.get_clazz(tipo) news = News() news.site.add(s) news.tipo.add(t) news.title = title news.sub_title = sub_title news.content = content news.url = url self.graph.merge(news) def create_rel(self, node1, node2): self.graph.create("(s:Site)-[:PUBLICOU]->(n:News)") def install(self): self.graph.run("MATCH (n) DETACH DELETE n") self.graph.run("MATCH (n) DETACH DELETE n") def delete(self): self.graph.delete_all() tipo = Tipo() tipo.description = 'False' self.graph.merge(tipo) tipo = Tipo() tipo.description = 'True' self.graph.merge(tipo)
class TestNeoDBHandler(unittest.TestCase): def setUp(self): self.graph = Graph(TEST_GRAPH_DB) self.node_list = [Node("TEST", test_id=i) for i in xrange(5)] # Nodes # ----- for i, node in enumerate(self.node_list): node.labels.add("Twirp") node.properties.update({ "user_id": i*100000, "username":"", "name":"", "handle":"", "followers_count":i*100, "friends_count":i*50, "tweet_count":i*10, "retweet_count":i*5, "been_retweeted_count":i*3, "favourite_hashtag":"", "hashtag_count":i*2, "archipelago_id":i*1, "subscribed": True, "constituency":"CB"+str(i), "offices":["office"+str(i), "sedge steward"], }) self.node_list[0].properties.update({"username":"******", "name":"Michael Blue Eyes", "handle":"MBEyes", "favourite_hashtag":"#roth", "party":"DC" }) self.node_list[1].properties.update({"username":"******", "name":"Little Richard", "handle":"LRichy", "favourite_hashtag":"#rawls", "party":"DC" }) self.node_list[2].properties.update({"username":"******", "name":"The Boy Wonder", "handle":"tBW", "favourite_hashtag":"#richyfeynman", "party":"Marvel" }) self.node_list[3].properties.update({"username":"******", "name":"Kendog Lamar", "handle":"Kdog", "favourite_hashtag":"#kanye", "party":"Marvel"}) self.node_list[4].properties.update({"username":"******", "name":"Tiny Hands", "handle":"tinyhands", "favourite_hashtag":"#ihavetinyhands", "party":"Beano" }) # Relationships # -------------- # mbe -[MENTION]> lrich # mbe -[REPLIES]> ken # lrich -[REPLIES]> mbe # tbw -[RETWEETS]> lrich # tbw -[MENTIONS_BY_PROXY]> mbe # ken -!-> # th -!-> defaults = { "mentions":0, "mention_last":"", "mention_date":"", "replies":0, "reply_last":"", "reply_date":"", "retweets":0, "retweet_last":"", "retweet_date":"" } mbe1 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[1], **defaults) mbe2 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[3], **defaults) lrich = Relationship(self.node_list[1], "DIRECT", self.node_list[0], **defaults) tbw = Relationship(self.node_list[2], "DIRECT", self.node_list[1], **defaults) tbw2 = Relationship(self.node_list[2], "INDIRECT", self.node_list[0], **defaults) mbe1.properties.update({ "mentions":5, "mention_last":"1000000", "mention_date":"today" }) mbe2.properties.update({ "replies":10, "reply_last":"2000000", "reply_date":"tommorow" }) lrich.properties.update({ "replies":15, "reply_last":"3000000", "reply_date":"yesterday" }) tbw.properties.update({ "retweets":20, "retweet_last":"4000000", "retweet_date":"thismorning" }) tbw2.properties.update({ "mentions":1, "mention_last":"3000000", "mention_date":"yesterday" }) for node in self.node_list: self.graph.create(node) self.graph.create(mbe1) self.graph.create(mbe2) self.graph.create(lrich) self.graph.create(tbw) self.graph.create(tbw2) self.graph.push() def tearDown(self): # remove test items self.graph.cypher.execute("MATCH (n:TEST) DETACH DELETE n") empty_list = [ _ for _ in self.graph.find('TEST') ] self.assertEqual( empty_list, []) ######################################################################## # CYPHER QUERIES # ######################################################################## def test_get_party_nodes(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"Kendog Lamar", "handle":"Kdog", "party":"Marvel", "constituency":"CB3", "offices":["office3", "sedge steward"], "tweets": 30, "friends": 150, "followers": 300, "archipelago_id": 3, "tweeted":[], "mentions":[], "mention_last":[], "mention_date":[], "replies":[], "reply_last":[], "reply_date":[], "retweets":[], "retweet_last":[], "retweet_date":[], "tweet_type":[] }, { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['MBEyes','LRichy'], "mentions":[1, 0], "mention_last":['3000000', ""], "mention_date":['yesterday', ""], "replies":[0,0], "reply_last":["",""], "reply_date":["",""], "retweets":[0, 20], "retweet_last":["",'4000000'], "retweet_date":["", 'thismorning'], "tweet_type":["INDIRECT", "DIRECT"] } ] # Make request results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 0) ] # Test against reference self.assertEqual(len(results), 2) for i in range(2): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) def test_get_party_nodes_min_tweet(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"Kendog Lamar", "handle":"Kdog", "party":"Marvel", "constituency":"CB3", "offices":["office3", "sedge steward"], "tweets": 30, "friends": 150, "followers": 300, "archipelago_id": 3, "tweeted":[], "mentions":[], "mention_last":[], "mention_date":[], "replies":[], "reply_last":[], "reply_date":[], "retweets":[], "retweet_last":[], "retweet_date":[], "tweet_type":[] }, { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['LRichy'], "mentions":[0], "mention_last":[""], "mention_date":[""], "replies":[0], "reply_last":[""], "reply_date":[""], "retweets":[20], "retweet_last":['4000000'], "retweet_date":['thismorning'], "tweet_type":["DIRECT"] } ] # Make request results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 5) ] # Test against reference self.assertEqual(len(results), 2) for i in range(2): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) def test_get_cross_party_nodes_default(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['MBEyes','LRichy'], "mentions":[1, 0], "mention_last":['3000000', ""], "mention_date":['yesterday', ""], "replies":[0,0], "reply_last":["",""], "reply_date":["",""], "retweets":[0, 20], "retweet_last":["",'4000000'], "retweet_date":["", 'thismorning'], "tweet_type":["INDIRECT", "DIRECT"] } ] results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 0 ) ] # Test against reference self.assertEqual(len(results), 1) for i in range(1): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) def test_get_cross_party_nodes_min_tweets(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['LRichy'], "mentions":[0], "mention_last":[""], "mention_date":[""], "replies":[0], "reply_last":[""], "reply_date":[""], "retweets":[20], "retweet_last":['4000000'], "retweet_date":['thismorning'], "tweet_type":["DIRECT"] } ] results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 5) ] # Test against reference self.assertEqual(len(results), 1) for i in range(1): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) ######################################################################## # ADDING TO DB (TWIRPS CLASSES)->(PY2NEO OBJS) # ######################################################################## def test_add_Twirp_to_database(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_twirp = Twirp(None, 'test') new_twirp.id = 314150000000 new_twirp.username = '******' new_twirp.name = 'Bilbo Baggins' new_twirp.handle = 'bilbo' new_twirp.followers_count = 20 new_twirp.friends_count = 30 new_twirp.tweet_count = 40 new_twirp.retweet_count = 50 new_twirp.been_retweet_count = 60 new_twirp.favourite_hashtag = '#onering' new_twirp.hashtag_count = 70 new_twirp.archipelago_id = 80 new_twirp.twirps_type = -1 new_twirp.subscribed = False new_twirp.geo = False # Add to database (with 'TEST' label) neo_db_handler.add_Twirp_to_database(new_twirp, is_test_mode=True) # Check results results = [ _ for _ in self.graph.cypher.execute( "MATCH (n {handle:'bilbo'}) RETURN n")] self.assertEqual(len(results), 1) node = results[0][0] # Interrogate Node self.assertEqual(node.get_labels(), [u'TEST', u'Twirp', u'Other']) self.assertEqual(node["user_id"],314150000000) self.assertEqual(node["username"],'BilboBagginsMP') self.assertEqual(node["name"],'Bilbo Baggins') self.assertEqual(node["handle"],'bilbo') self.assertEqual(node["followers_count"],20) self.assertEqual(node["friends_count"],30) self.assertEqual(node["tweet_count"],40) self.assertEqual(node["retweet_count"],50) self.assertEqual(node["been_retweeted_count"],60 ) self.assertEqual(node["favourite_hashtag"],'#onering') self.assertEqual(node["hashtag_count"],70) self.assertEqual(node["archipelago_id"],80 ) self.assertEqual(node["subscribed"],False) def test_add_Tweet_to_database__mention(self): # TEST: (LRich)->(tinyhands) - mention: ("Hey @tinyhands") neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands')] new_tweet.content = 'Generic tweet @tinyhands' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b {handle:'tinyhands'}) RETURN r""")] self.assertEqual(len(results), 1) relationship = results[0][0] # In depth check self.assertEqual(relationship.type, u'DIRECT') self.assertEqual(relationship["mentions"], 1) self.assertEqual(relationship["mention_last"], '1') self.assertEqual(relationship["mention_date"], 'a date string') self.assertEqual(relationship["replies"], 0) self.assertEqual(relationship["reply_last"], '') self.assertEqual(relationship["reply_date"], '') self.assertEqual(relationship["retweets"], 0) self.assertEqual(relationship["retweet_last"], '') self.assertEqual(relationship["retweet_date"], '') def test_add_Tweet_to_database__reply(self): # TEST: (LRich) ->(tBW) - reply & mention; # (LRich) ->(tinyhands) mention EG: (reply->tBW):"Hey @tBW, @tinyhands" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands'), (200000, 'tBW')] new_tweet.content = 'Generic tweet @tinyhands @tBW' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = True new_tweet.in_reply_to_user = (200000, 'tBW') new_tweet.in_reply_to_status_id = 2 new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b) WHERE b.handle<>'MBEyes' RETURN r, b.name ORDER BY b.name""")] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'DIRECT') self.assertEqual(results[0][1], 'The Boy Wonder') self.assertEqual(results[0][0]["mentions"], 0) self.assertEqual(results[0][0]["mention_last"], '') self.assertEqual(results[0][0]["mention_date"], '') self.assertEqual(results[0][0]["replies"], 1) self.assertEqual(results[0][0]["reply_last"], '1') self.assertEqual(results[0][0]["reply_date"], 'a date string') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Tiny Hands') self.assertEqual(results[1][0]["mentions"], 1) self.assertEqual(results[1][0]["mention_last"], '1') self.assertEqual(results[1][0]["mention_date"], 'a date string') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 0) self.assertEqual(results[1][0]["retweet_last"], '') self.assertEqual(results[1][0]["retweet_date"], '') def test_add_Tweet_to_database__retweet(self): # TEST: (tiny) ->(MBEyes) - reply & mention; # (tiny) ->(Kdog) mention_by_proxy EG: (ret->MBE):"Hey @MBE, @Kdog" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 400000 new_tweet.handle = 'tinyhands' new_tweet.mentions = [(300000, 'Kdog')] new_tweet.content = 'Generic tweet @Kdog' # not stored here new_tweet.is_retweet = True new_tweet.retweeted_user = (0, 'MBEyes') new_tweet.retweet_status_id = 2 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = [] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'tinyhands'})-[r]->(b) RETURN r, b.name ORDER BY b.name""")] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'INDIRECT') self.assertEqual(results[0][1], 'Kendog Lamar') self.assertEqual(results[0][0]["mentions"], 1) self.assertEqual(results[0][0]["mention_last"], '1') self.assertEqual(results[0][0]["mention_date"], 'a date string') self.assertEqual(results[0][0]["replies"], 0) self.assertEqual(results[0][0]["reply_last"], '') self.assertEqual(results[0][0]["reply_date"], '') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Michael Blue Eyes') self.assertEqual(results[1][0]["mentions"], 0) self.assertEqual(results[1][0]["mention_last"], '') self.assertEqual(results[1][0]["mention_date"], '') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 1) self.assertEqual(results[1][0]["retweet_last"], '1') self.assertEqual(results[1][0]["retweet_date"], 'a date string')