def sync_meetup_data(group): graph = Graph(host=config['neo4j']['host'], user=config['neo4j']['user'], password=config['neo4j']['password']) location = get_group_location(group) tx = graph.begin() location_node = Node('Location', city=location['city'], state=location['state'], country=location['country']) tx.create(location_node) tx.commit() meetup_groups = get_groups_in_location(location, category=34) logger.info('Finding upcoming meetup events at {} meetup groups'.format(len(meetup_groups))) for group in meetup_groups: time.sleep(2) group, events = get_group_events(group) tx = graph.begin() group_node = Node("Group", name=group) tx.create(group_node) location_relation = Relationship(location_node, 'HAS MEETUP', group_node) tx.create(location_relation) for event in events: event_node = Node('Event', name=event['name'], time=event['time']) tx.create(event_node) rel = Relationship(group_node, "HAS EVENT", event_node) tx.create(rel) tx.commit() logger.info('Transaction ({}) status: {}'.format(group, str(tx.finished())))
def handle(self, *args, **options): # pylint: disable=unused-argument """ Iterates through each course, serializes them into graphs, and saves those graphs to neo4j. """ # first, make sure that there's a valid neo4j configuration if settings.NEO4J_CONFIG is None: raise CommandError( "No neo4j configuration (NEO4J_CONFIG) defined in lms.auth.json." ) auth_params = ["{host}:{https_port}", "{user}", "{password}"] authenticate(*[param.format(**settings.NEO4J_CONFIG) for param in auth_params]) graph = Graph(**settings.NEO4J_CONFIG) mss = ModuleStoreSerializer() total_number_of_courses = len(mss.all_courses) for index, course in enumerate(mss.all_courses): # first, clear the request cache to prevent memory leaks RequestCache.clear_request_cache() log.info( "Now exporting %s to neo4j: course %d of %d total courses", course.id, index + 1, total_number_of_courses ) nodes, relationships = mss.serialize_course(course.id) log.info( "%d nodes and %d relationships in %s", len(nodes), len(relationships), course.id ) transaction = graph.begin() try: # first, delete existing course transaction.run( "MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format( six.text_type(course.id) ) ) # now, re-add it self.add_to_transaction(nodes, transaction) self.add_to_transaction(relationships, transaction) transaction.commit() except Exception: # pylint: disable=broad-except log.exception( "Error trying to dump course %s to neo4j, rolling back", six.text_type(course.id) ) transaction.rollback()
def expot_data(cid, data): """ 将数据导入到neo4j,给每个导入的实体添加一个标签cid. :param cid: :param data: :return: """ title = data[0] host, http_port, bolt_port, user, password = '******', 7474, 7687, 'neo4j', 'gswewf' graph = Graph(host=host, http_port=http_port, bolt_port=bolt_port, user=user, password=password) # title = ["_id", "_labels", "tagline", "title", "released", "name", "born", "_start", "_end", "_type", "roles"] _start_index = title.index('_start') node_property = title[2:_start_index] relation_property = title[_start_index + 3:] nodes = {} relationships = [] tx = graph.begin() for line in data[1:]: _id, _labels = line[:2] node_property_value = line[2:_start_index] _start, _end, _type = line[_start_index:_start_index + 3] relation_property_value = line[_start_index + 3:] _labels = [label for label in _labels.strip().split(':') if label] _labels.append(cid.capitalize()) # print(line) # nodes = {"a": Node("person", name="weiyudang", age=13), "b": Node("person", name="wangjiaqi")} if _id and not _start and not _end: property_dict = {k: v for k, v in zip(node_property, node_property_value) if v} _cid = "{}_{}".format(cid.lower(), _id) updatetime = int(time.time() * 1000) # 与 neo4j的timestamp()一致 node = Node(*_labels, _cid=_cid, updatetime=updatetime, **property_dict) # graph.merge(node) nodes.setdefault(_cid, node) tx.create(node) elif not _id and _start and _end: property_dict = {k: v for k, v in zip(relation_property, relation_property_value) if v} start_cid = "{}_{}".format(cid.lower(), _start) end_cid = "{}_{}".format(cid.lower(), _end) # a = Node(_cid=start_cid) # b = Node(_cid=end_cid) a = nodes.get(start_cid) b = nodes.get(end_cid) a_knows_b = Relationship(a, _type, b, **property_dict) # graph.merge(a_knows_b) relationships.append(a_knows_b) tx.create(a_knows_b) else: raise ValueError("数据有误: {}".format(line)) print(len(nodes), len(relationships)) # sub_graph = Subgraph(nodes=nodes, relationships=relationships) # graph.create(sub_graph) tx.commit()
class BaseUploader(object): __metaclass__ = abc.ABCMeta def __init__(self, graph_url, file_to_process): #watch("httpstream") self.graph = Graph(graph_url) self.setup(self.graph) dir = os.path.dirname(os.path.dirname(__file__)) self.input_file = os.path.join(dir, file_to_process) self.idx = 0 print('connected to graph db at : ' + str(self.graph)) @abc.abstractmethod def setup(self, graph): """Process the file. :rtype : None """ @abc.abstractmethod def add_query(self, record, tx): """Process the file.""" return def process(self): """Process the file.""" print('start processing') with open(self.input_file, 'rt', encoding='utf-8') as infile: reader = csv.DictReader(infile, quoting=csv.QUOTE_NONE) tx = self.graph.begin() for row in reader: if self.idx % 1000 == 0 and self.idx != 0: tx.commit() tx = self.graph.begin() print('commited 1000 rows till row:' + str(self.idx)) self.add_query(row, tx) self.idx += 1 tx.commit()
def neo4j(user,password,hostname,data): try: # Authenticate for server and connect it authenticate (hostname, user, password) graph=Graph() # If server is not connected : except Exception: print ("Unable to reach server.") sys.exit() graph.data("MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r") #start graph operations start=graph.begin() # Create node for Movies for movie in data.movies: movie_node=Node("Movies", mov_id=movie.ID, title=movie.title, released_year=movie.year, rating=movie.rating, genre=movie.genre) start.merge(movie_node) # Create node for every director in data.directors for director in data.directors: director_node=Node("Directors",userid=director.ID, fullname=director.name) start.merge(director_node) # Create node for every actor in data.actors for actor in data.actors: actor_node=Node("Actors", userid=actor.ID, fullname=actor.name) start.merge(actor_node) # Create node for every collector in data.collectors for collector in data.collectors: collector_node = Node("Collectors",userid=collector.ID, fullname=collector.name, email=collector.email) start.merge(collector_node) start.commit() relation(data,graph) queries(data,graph)
class Neo4j(): def __init__(self, host='localhost:7474', username='******', password='******'): if not has_py2neo: raise Exception('py2neo is required, please install: pip install py2neo') authenticate(host, username, password) self.graph = Graph("http://{}/db/data/".format(host)) def load_events_directory(self, directory): self.events = [] for path in glob.glob(os.path.join(directory, '*.json')): e = MISPEvent() e.load(path) self.import_event(e) def del_all(self): self.graph.delete_all() def import_event(self, event): tx = self.graph.begin() event_node = Node('Event', uuid=event.uuid, name=event.info) # event_node['distribution'] = event.distribution # event_node['threat_level_id'] = event.threat_level_id # event_node['analysis'] = event.analysis # event_node['published'] = event.published # event_node['date'] = event.date.isoformat() tx.create(event_node) for a in event.attributes: attr_node = Node('Attribute', a.type, uuid=a.uuid) attr_node['category'] = a.category attr_node['name'] = a.value # attr_node['to_ids'] = a.to_ids # attr_node['comment'] = a.comment # attr_node['distribution'] = a.distribution tx.create(attr_node) member_rel = Relationship(event_node, "is member", attr_node) tx.create(member_rel) val = Node('Value', name=a.value) ev = Relationship(event_node, "has", val) av = Relationship(attr_node, "is", val) s = val | ev | av tx.merge(s) #tx.graph.push(s) tx.commit()
def update_neo4j_db(_neo_data, _print_status_freq): # neo4j access stuff neo_uri = r"bolt://localhost:7687" auth = (r"neo4j", r"abc") stmt1 = r'MERGE (:Image {name: $_in_img_name, dataset: $_in_img_dataset})' stmt2 = r'MERGE (:Object {name: $_in_obj_det})' stmt3 = r'MATCH (i1:Image{name: $_in_img_name, dataset: $_in_img_dataset}) MATCH (o1:Object{name: $_in_obj_det}) CREATE (i1)-[:HAS{score: $_in_det_score}]->(o1)' total_images_info_count = len(_neo_data) try: graph = Graph(uri="bolt://localhost:7687", auth=("neo4j", "abc")) for count, each_img_info in enumerate(_neo_data): if count % _print_status_freq == 0: print( f"\t\tProcessing image {count + 1} of {total_images_info_count}" ) tx = graph.begin() # create Image node if not already existing tx.run(stmt1, parameters={ "_in_img_name": each_img_info["img"], "_in_img_dataset": each_img_info["datasource"] }) for each_detection in each_img_info["det"]: # create Object node if not already existing tx.run(stmt2, parameters={"_in_obj_det": each_detection[0]}) # create HAS relation between above nodes. Note by now the image and object nodes must exist tx.run(stmt3, parameters={ "_in_img_name": each_img_info["img"], "_in_img_dataset": each_img_info["datasource"], "_in_obj_det": each_detection[0], "_in_det_score": each_detection[1] }) tx.commit() while not tx.finished(): pass # tx.finished return True if the commit is complete except Exception as error_msg_neo_write: print(f"\n\nUnexpected ERROR attempting entry to neo4j.") print(f"\nMessage:\n{error_msg_neo_write}") print(f"\nFunction call return with RC=1000.\n\n") return (1000) # return with RC = 0 as successful processing return 0
def leave_group(self, group_name, user_name): graph = Graph('bolt://localhost:7687', username='******', password='******') g = graph.begin() g.run("MATCH (a:Group) WHERE a.groupName=$gname DETACH DELETE a", gname=group_name) myGroups = self.my_groups(user_name) for group in myGroups: if group != group_name: groups = group groups = group + "/n" graphdb = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "facebook"), encrypted=False) graphdb.session().run( "MATCH (a:User {userName:$uname}) SET a.groups=$group", uname=user_name, group=groups)
def add_to_graph(yearcaseno1, id1, title1, yearcaseno2, id2): graph = Graph("http://localhost:7474/db/data/", password=NEO4J_PASSWORD) if id1 is not None: node_1 = Node("CASE", yearcaseno=yearcaseno1, id=id1, title=title1) else: node_1 = Node("CASE", yearcaseno=yearcaseno1) if id2 is not None: node_2 = Node("CASE", yearcaseno=yearcaseno2, id=id2) else: node_2 = Node("CASE", yearcaseno=yearcaseno2) rel = Relationship(node_1, "REFER", node_2) tx = graph.begin() tx.merge(node_1, primary_label='CASE', primary_key='yearcaseno') tx.merge(node_2, primary_label='CASE', primary_key='yearcaseno') tx.merge(rel) tx.commit()
def create_node_and_rel(node): """对于一个包含子图所有信息的node,将子图生成到Neo4j中去。 Args: node(Nodes): 一个`trie.Nodes`对象 Returns: """ graph = Graph(neo4j_url, auth=auth) tx = graph.begin() root_node = create_node(tx, node.value, node.label, 0) def func(p_node: Node, nodes: Nodes, i: int): """一个递归调用的函数。 Args: p_node: 一个`py2neo.Node`对象 nodes: 一个`trie.Nodes`对象 i: 记录层级 Returns: """ if isinstance(nodes, list): return data = nodes.children if not data: tx.create(p_node) return for j in data: # j也是一个`trie.Nodes`对象 node_ = create_node(tx, j.value, j.label, i) rel = j.rel tx.create(Relationship(p_node, rel, node_)) if not j.children: continue else: # node_存在子节点,因此递归调用 k = i + 1 func(node_, j, k) func(root_node, node, 1) tx.commit()
def calcPR(): # print('start') # fo = open("graph.txt", "w") graph = Graph('bolt://47.113.103.137:10087', auth=('neo4j', 'pedia_search')) G = nx.DiGraph() node_matcher = NodeMatcher(graph) nodes = node_matcher.match('Entry').all() for node in nodes: G.add_node(node['page_id']) print("node page_id: ",node['page_id']) print("number of nodes: ",G.number_of_nodes()) relationships = graph.match(nodes=None, r_type= 'linkTo', limit=None).all() i = 0 print('start loop') for relationship in relationships: i = i + 1 print(i) print(relationship) start = relationship.start_node['page_id'] end = relationship.end_node['page_id'] print('edge: ',start,' -> ',end) G.add_edge(*(start,end)) print('start pagerank') result = nx.pagerank(G, alpha=0.85, personalization=None, max_iter=20000, tol=1e-06, weight='weight', dangling=None) print(result) print(sorted(result.items(), key=lambda kv: (kv[1], kv[0]))) i = 0 transaction = graph.begin() for cur_page_id in result: i = i + 1 weight = result[cur_page_id] print("node:",i,cur_page_id,weight) matcher = NodeMatcher(graph) node = matcher.match(page_id = cur_page_id).first() # 操作 node['weight'] = weight # 修改weight transaction.push(node) transaction.commit()
class Neo4jPipline(object): def __init__(self): self.graph = Graph("http://xxx.xx.xx.xxx:7474",username="******",password="******") def process_item(self, item, spider): tx = self.graph.begin() worker_list = [{"name":item["username"]},{"name":item["following"]}] for worker in worker_list: node = Node("Person",**worker) tx.merge(node) node_1 = Node(name=item["username"]) node_2 = Node(name=item["following"]) rel = Relationship(node_1,"following",node_2) try: tx.merge(rel) print("successful") tx.commit() except Exception as e: print(e) print("Failed")
def main(): parser = VkParser(DRIVER_PATH) parser.login(LOGIN, PASSWORD) friends_ids = list(parser.get_friends_list(START_ID)) edges = {START_ID: friends_ids} for id in friends_ids: edges[id] = [] for friend_id in parser.get_friends_list(id): if friend_id in friends_ids: edges[id] += [friend_id] nodes = {} g = Graph() tx = g.begin() for id in friends_ids: node = Node("Person", id=id) nodes[id] = node for id in friends_ids: for friend in nodes[id]: edge = Relationship(nodes[id], "FRIENDSHIP", nodes[friend]) tx.create(edge) tx.commit()
def create_gics_node(graph: Graph): event = StockEventBase(GLOBAL_HEADER) df = event.mysql.select_values('gics', 'code,name,level') df.columns = ['code', 'name', 'level'] sector = df[df['level'] == 0] industry_group = df[df['level'] == 1] industry = df[df['level'] == 2] sub_industry = df[df['level'] == 3] t = graph.begin() label0 = ('gics', 'Sector') for index, node in sector.iterrows(): t.create(Node(*label0, code=node['code'], name=node['name'])) label1 = ('gics', 'Industry_Group') for index, node in industry_group.iterrows(): t.create(Node(*label1, code=node['code'], name=node['name'])) label2 = ('gics', 'Industry') for index, node in industry.iterrows(): t.create(Node(*label2, code=node['code'], name=node['name'])) label3 = ('gics', 'Sub_Industry') for index, node in sub_industry.iterrows(): t.create(Node(*label3, code=node['code'], name=node['name'])) t.commit()
def add_friend_to_person_node(self, user_name, friend): graph = Graph('bolt://localhost:7687', username='******', password='******') g = graph.begin() friends = g.run("MATCH (a:User {userName:$uname}) RETURN a.friends", uname=user_name).evaluate() if friends == " ": friends = friend else: friends = friends + "\n" + friend graphdb = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "facebook"), encrypted=False) graphdb.session().run( "MATCH (a:User {userName:$uname}) SET a.friends=$friend", uname=user_name, friend=friends) graphdb.session().run( "MATCH (a:User),(b:User) WHERE a.userName = $uname AND b.firstName = $friendname CREATE (a)-[r:is_friends_with]->(b)", uname=user_name, friendname=friend)
def Create_Nodes(df): #Connexion à la base NEO4J perso graph = Graph(password='******') i = 0 for i in range(df.shape[0]): tx = graph.begin() author = df.iloc[i]["author"] tweet = df.iloc[i]["text"] # id_tweet=df.iloc[i].index() create_date = df.iloc[i]["create_date"] author_location = df.iloc[i]["author_location"] full_name = df.iloc[i]["full_name"] a = Node("Twittos", name=author, tweet=tweet, create_date=str(create_date), full_name=full_name, author_location=author_location) tx.create(a) tx.commit()
def create_nodes(): file_names = glob.glob('../data_users_moves/*.csv') my_set = set() for file in file_names: filename = os.fsdecode(file) if not filename.endswith('.csv'): continue print('Currently using file - ', filename) df = pd.read_csv(filename, header=None) df.columns = ['Timestamp', 'id1', 'id2'] y = set(list(df['id1'])) z = set(list(df['id2'])) ids_of_one_csv = y.union(z) my_set.update(ids_of_one_csv) graph = Graph('127.0.0.1', password='******') tx = graph.begin() for value in my_set: user = Node('User', id=value) tx.create(user) tx.commit()
def f_relation_insert(ent1, ent2, rel, tab_name, label1=None, label2=None, sql=None): print(label1) g = Graph("http://115.159.65.147:7474", username="******", password="******") t1 = time.time() conn = cx.connect('nsyy', 'uat_NSYY', '192.168.0.110:1521/orcl') cur = conn.cursor() if sql == None: f = cur.execute('select {0},{1},{2} from {3} where rownum<=100'.format( ent1, rel, ent2, tab_name)) else: f = cur.execute(sql) tx = g.begin() t1 = time.time() cnt = 0 for i in f.fetchall(): ent1 = i[0] ent2 = i[2] rel = i[1] print(ent1, ent2, rel) left_rel = g.find_one(label=label1, property_key="name", property_value=ent1) print(left_rel) right_rel = g.find_one(label=label2, property_key="name", property_value=ent2) relation_insert = Relationship(left_rel, rel, right_rel) tx.create(relation_insert) cnt = cnt + 1 tx.commit() time_second = time.time() - t1 print('关系导入完成,共导入关系{0}个,时间为{1}秒'.format(cnt, time_second))
class DataToNeo4j: """将excel中数据存入neo4j""" def __init__(self): """建立连接""" self.graph = Graph('http://localhost:7474', username='******', password='******') # 定义label self.invoice_name = '发票名称' self.invoice_value = '发票值' self.graph.delete_all() def create_node(self, node_list_key, node_list_value): """建立节点""" for k in node_list_key: name_node = Node(label=self.invoice_name, name=k) self.graph.create(name_node) for v in node_list_value: value_node = Node(label=self.invoice_value, name=v) self.graph.create(value_node) def create_relation(self, df_data): """建立联系""" tx = self.graph.begin() for _, row in df_data.iterrows(): tx.evaluate(''' MERGE (a:发票名称 {property:$name}) MERGE (b:发票值 {property:$name2}) MERGE (a)-[r:R_TYPE{property:$p}]->(b) ''', parameters={ 'name': row['name'], 'name2': row['name2'], 'p': row['relation'] }) tx.commit()
def build_neo_graph(delta, call_graph1, call_graph2): nodes = {} graph = Graph(password='******') tx = graph.begin() for key in delta.keys(): nodes[key] = Node("function", title=key, content=delta[key]) tx.merge(nodes[key]) for entry in call_graph1: calling_fun = entry['referringFunction'] + "@" + entry['referringFile'] called_fun = entry['referredFunction'] + "@" + entry['referredFile'] relation = Relationship(nodes[calling_fun], "Original Call To", nodes[called_fun]) tx.merge(relation) for entry in call_graph2: calling_fun = entry['referringFunction'] + "@" + entry['referringFile'] called_fun = entry['referredFunction'] + "@" + entry['referredFile'] relation = Relationship(nodes[calling_fun], "Modified Call To", nodes[called_fun]) tx.merge(relation) tx.commit()
def addAffirmationToGraph(affirmation): authenticate(settings.SECRET_NEO4J_DB_HOSTPORT, settings.SECRET_NEO4J_DB_USER, settings.SECRET_NEO4J_DB_PASSWORD) graph = Graph() tx = graph.begin() claim = affirmation.claim claimNode = Node("Claim", claim_id=claim.id, name=claim.name, content=claim.content) tx.merge(claimNode, 'claim_id') user = affirmation.user userNode = Node("User", user_id=user.id, name=user.username) tx.merge(userNode, 'user_id') affirmsRelationship = Relationship(userNode, "Affirms", claimNode, affirmation_id=affirmation.id) tx.merge(affirmsRelationship, 'affirmation_id') tx.commit()
def Create_Nodes(api, df): #Connexion à la base NEO4J perso graph = Graph(password='******') i = 0 for i in range(df.shape[0]): Check_Limit_API( api) #Fonction qui vérifie s'il reste des requêtes dispo sur l'API # print time.ctime() tx = graph.begin() author = df.iloc[i]["author"] tweet = df.iloc[i]["text"] create_date = df.iloc[i]["create_date"] author_location = df.iloc[i]["author_location"] full_name = df.iloc[i]["full_name"] lang = df.iloc[i]["lang"] iso_lang = df.iloc[i]["iso_lang"] a = Node("Twittos", name=author, tweet=tweet, create_date=str(create_date), full_name=full_name, author_location=author_location, lang=lang, iso_lang=iso_lang) tx.create(a) tx.commit() if RETWEET_ONLY: tweets_id = df.index[i] df = Create_Retweet_Link(api, tweets_id, a, author, df) else: followers = getFollowers(api, author, ENTRY_FOLLOWER) Create_Relationship(api, followers, a, author) return df
def init_graph(our_graph: OurGraph, user: str, password: str, url: str = 'bolt://localhost:7687'): graph = Graph(f"{url}", user=user, password=password) graph.delete_all() tx = graph.begin() nodes_dict = create_nodes_dict(our_graph) count = 0 progress = 0.0 total = len(nodes_dict.values()) for v in nodes_dict.values(): tx.create(v) if count > (total / 100): count = 0 progress += 1 print('Vertex Progress', progress, '%') count += 1 count = 0 progress = 0.0 total = len(our_graph.E) for e in our_graph.E: source = e.from_vertex.id target = e.to_vertex.id new_edge = Relationship(nodes_dict[source], e.type, nodes_dict[target]) tx.create(new_edge) if count > (total / 100): count = 0 progress += 1 print('Edge Progress', progress, '%') count += 1 tx.commit()
def create_relationship_in_gics_node(graph: Graph): t = graph.begin() n0 = graph.nodes.match("gics", "Sector") n0_list = list(n0) n1 = graph.nodes.match("gics", "Industry_Group") n1_list = list(n1) n2 = graph.nodes.match("gics", "Industry") n2_list = list(n2) n3 = graph.nodes.match("gics", "Sub_Industry") n3_list = list(n3) for nx in n0_list: for ny in n1_list: if re.match(nx['code'], ny['code']): t.create(Relationship(nx, 'sub_class', ny)) for nx in n1_list: for ny in n2_list: if re.match(nx['code'], ny['code']): t.create(Relationship(nx, 'sub_class', ny)) for nx in n2_list: for ny in n3_list: if re.match(nx['code'], ny['code']): t.create(Relationship(nx, 'sub_class', ny)) t.commit()
def save_simple_graph(self, number_list): client = Graph(password=os.getenv('NEO4J_PASSWORD', 'Neo4j')) graph = client.begin() prev_number = 0 for item in number_list: number_node = self.node_exist(int(item)) if prev_number != 0 and number_node: prev_number_node = Node("number", number=int(prev_number)) prev_number = int(item) num_relationship = Relationship(prev_number_node, "NEXT", number_node) graph.merge(num_relationship) break number = Node("number", number=int(item)) graph.merge(number) if prev_number == 0: prev_number = int(item) continue prev_number_node = Node("number", number=int(prev_number)) prev_number = int(item) num_relationship = Relationship(prev_number_node, "NEXT", number) graph.merge(prev_number_node) graph.merge(num_relationship) graph.commit()
# In[16]: df.head(3) # In[37]: # neo4j configuration/credentials uri = config["neo4j"]["uri"] user = config["neo4j"]["user"] password = config["neo4j"]["password"] graph = Graph(uri=uri, user=user, password=password) # In[39]: gb = graph.begin() topic_entity_name = [] for index, row in tqdm(df.iterrows()): # get the topic number of the doc and create node topic = row["topic_number"] topic_node = graph.nodes.match("TopicNumber", name="Topic_" + str(topic)).first() if topic_node is None: topic_node = Node("TopicNumber", name="Topic_" + str(topic)) graph.create(topic_node) topic_keywords = row["topic_keywords"] temp = [] #creating nodes and relationship between entities present in topic and topic number to which a corresponding doc belongs to
def generateInvoice(order_id): seller = 'Team Team' connection = mysql.connector.connect( host='mysql98.unoeuro.com', database='zakeovich_dk_db_cphbusiness') cursor = connection.cursor() #Update order status cursor.execute( f"UPDATE zakeovich_dk_db_cphbusiness.order o SET o.status = 'completed' WHERE o.id = {order_id};" ) #Get order row cursor.execute( f"SELECT o.total, o.order_no, o.status FROM zakeovich_dk_db_cphbusiness.order o WHERE o.id = {order_id};" ) order_total = cursor.fetchall() order_total = order_total[0][0] #Generate invoice for given order and store in sql cursor.execute( f"INSERT INTO zakeovich_dk_db_cphbusiness.invoice (fk_order_id, total, due_date, issue_date, seller) VALUES ({order_id}, {order_total}, date_add(current_date(), INTERVAL 14 DAY), current_date(), '{seller}');" ) #Get the ordernumber of the order cursor.execute( f"SELECT o.order_no, o.status FROM zakeovich_dk_db_cphbusiness.order o WHERE o.id = {order_id};" ) order = cursor.fetchall() order_no = order[0][0] order_status = order[0][1] #Commit SQL update connection.commit() invoice_id = cursor.lastrowid cursor.execute( f"SELECT i.total, i.due_date, i.issue_date, i.seller FROM zakeovich_dk_db_cphbusiness.invoice i WHERE i.id = {invoice_id};" ) invoice = cursor.fetchall() cursor.close() connection.close() graph = Graph("bolt://35-202-37-187.gcp-neo4j-sandbox.com:7687", auth=("neo4j", "cy3yxxzcXDN6UKnw"), secure=True) #graph = Graph(scheme='bolt',host='hobby-ppgaodfmmciegbkemkpmdcel.dbs.graphenedb.com',port=24787, user='******', password='******',secure=True) #Create a nodematcher matcher = NodeMatcher(graph) #Open the connection g_conn = graph.begin() order_node = matcher.match("Order", order_no=order_no).first() #Update order in Neo4j graph.run( f"MATCH (o {{ order_no: {order_no}}}) SET o.status = '{order_status}'") #Serve invoice to Neo4j invoice_node = Node("Invoice", total=invoice[0][0], due_date=str(invoice[0][1]), seller=str(invoice[0][3])) g_conn.create(invoice_node) order_invoice = Relationship(order_node, "GENERATED", invoice_node, issue_date=str(invoice[0][2])) g_conn.create(order_invoice) #Commit the changes g_conn.commit()
def __init__(self): self.__redis = redis.StrictRedis(host='redis', port=6379, db=0) graph = Graph("http://*****:*****@neo4j:7474/db/data/") self.__tx = graph.begin()
from py2neo import Database from py2neo import Graph, Node import numpy as np """ Connection to Neo4j Database """ dvdrental = "bolt://localhost:7687" db = Database(dvdrental) graph = Graph(password='******') db = graph.begin(autocommit=False) ret = graph.run('''match (f:Film)-[fr:FILM_RENTAL]->(r:Rental) match (c:Customer)-[cr:CUSTOMER_RENTAL]->(r) where fr.ID=cr.ID with count(*) as rented_times match (f:Film)-[in_cat:IN_CATEGORY]->(cat:Category) with f.ID as film_ID, f.title as film_title, cat.name as category_name, rented_times return film_ID, film_title, category_name, rented_times''').to_table() print(ret) db.commit()
class GotGraph(object): """ This object provides a set of helper methods for creating and retrieving nodes and relationships from a Neo4j database holding information about players, teams, fans, comments and their relationships. """ # Note: # I tend to avoid object mapping frameworks. Object mapping frameworks are fun in the beginning # but tend to be annoying after a while. So, I did not create types Player, Team, etc. # # Connects to the DB and sets a Graph instance variable. # Also creates a NodeMatcher and RelationshipMatcher, which are a py2neo framework classes. def __init__( self, auth=('neo4j', 'dbuserdbuser'), host='localhost', port=7687, secure=False, ): self._graph = Graph(secure=secure, bolt=True, auth=auth, host=host, port=port) self._node_matcher = NodeMatcher(self._graph) self._relationship_matcher = RelationshipMatcher(self._graph) def run_q(self, qs, args): """ :param qs: Query string that may have {} slots for parameters. :param args: Dictionary of parameters to insert into query string. :return: Result of the query, which executes as a single, standalone transaction. """ try: tx = self._graph.begin(autocommit=False) result = self._graph.run(qs, args) return result except Exception as e: print("Run exaception = ", e) def run_match(self, labels=None, properties=None): """ Uses a NodeMatcher to find a node matching a "template." :param labels: A list of labels that the node must have. :param properties: A dictionary of {property_name: property_value} defining the template that the node must match. :return: An array of Node objects matching the pattern. """ #ut.debug_message("Labels = ", labels) #ut.debug_message("Properties = ", json.dumps(properties)) if labels is not None and properties is not None: result = self._node_matcher.match(labels, **properties) elif labels is not None and properties is None: result = self._node_matcher.match(labels) elif labels is None and properties is not None: result = self._node_matcher.match(**properties) else: raise ValueError( "Invalid request. Labels and properties cannot both be None.") # Convert NodeMatch data into a simple list of Nodes. full_result = [] for r in result: full_result.append(r) return full_result def find_nodes_by_template(self, tmp): """ :param tmp: A template defining the label and properties for Nodes to return. An example is { "label": "Fan", "template" { "last_name": "Ferguson", "first_name": "Donald" }} :return: A list of Nodes matching the template. """ labels = tmp.get('label', None) props = tmp.get("template", None) result = self.run_match(labels=labels, properties=props) return result def create_node(self, label, **kwargs): n = Node(label, **kwargs) tx = self._graph.begin(autocommit=True) tx.create(n) return n # Given a UNI, return the node for the Fan. def get_character(self, character_id): n = self.find_nodes_by_template({ "label": "Character", "template": { "character_id": character_id } }) if n is not None and len(n) > 0: # I should throw an exception here if there is more than 1. n = n[0] else: n = None return n def get_related_characters(self, ch_id, r_kind): t = self.get_character(ch_id.upper()) nodes = set() nodes.add(t) n = self._relationship_matcher.match(nodes, r_type=r_kind.upper()) return n def create_character_relationship(self, source_id, target_id, label): """ Create a SUPPORTS relationship from a Fan to a Team. :param uni: The UNI for a fan. :param team_id: An ID for a team. :return: The created SUPPORTS relationship from the Fan to the Team """ f = self.get_character(source_id) t = self.get_character(target_id) l = label.upper() r = Relationship(f, l, t) tx = self._graph.begin(autocommit=True) tx.create(r) return r def get_appearance(self, player_id, team_id, year_id): """ Get the information about appearances for a player and team. :param player_id: player_id :param team_id: team_id :param year_id: The year for getting appearances. :return: """ try: # Get the Nodes at the ends of the relationship representing appearances. p = self.get_player(player_id) t = self.get_team(team_id) # Run a match looking for relationships of a specific type linking the nodes. rm = self._graph.match(nodes=[p, t], r_type="APPEARED") result = [] # If there is a list of relationships. if rm is not None: for r in rm: # The type will be a class APPEARED() because of the OO mapping. node_type = type(r).__name__ year = r['year'] # If the type and year are correct, add to result if node_type == "APPEARED" and (year == year_id or year_id is None): result.append(r) return result else: return None except Exception as e: print("get_appearance: Exception e = ", e) raise e # Create an APPEARED relationship from a player to a Team def create_appearance_all(self, player_id, team_id, year, games): """ :param player_id: O :param team_id: :param year: :param games: :return: """ try: tx = self._graph.begin(autocommit=False) q = "match (n:Player {player_id: '" + player_id + "'}), " + \ "(t:Team {team_id: '" + team_id + "'}) " + \ "create (n)-[r:APPEARED { games: " + str(games) + ", year : " + str(year) + \ "}]->(t)" result = self._graph.run(q) tx.commit() except Exception as e: print("create_appearances: exception = ", e) # Create a FOLLOWS relationship from a Fan to another Fan. def create_follows(self, follower, followed): f = self.get_fan(follower) t = self.get_fan(followed) r = Relationship(f, "FOLLOWS", t) tx = self._graph.begin(autocommit=True) tx.create(r) def get_comment(self, comment_id): """ :param comment_id: Comment ID :return: Comment """ n = self.find_nodes_by_template({ "label": "Comment", "template": { "comment_id": comment_id } }) if n is not None and len(n) > 0: n = n[0] else: n = None return n def create_comment(self, uni, comment, team_id=None, player_id=None): """ Creates a comment :param uni: The UNI for the Fan making the comment. :param comment: A simple string. :param team_id: A valid team ID or None. team_id and player_id cannot BOTH be None. :param player_id: A valid player ID or None :return: The Node representing the comment. """ if uni is None or comment is None or (player_id is None and team_id is None): raise ValueError("create_comment: invalid input.") comment_id = str( uuid.uuid4()) # Generate a UUID. Will explain in class. fan = None team = None player = None tx = None try: tx = self._graph.begin( ) # We are going to make a transactional update. fan = self.get_fan(uni) # Get the node for the Fan. if fan is None: raise ValueError("create_comment: Fan node node found.") if team_id is not None: team = self.get_team(team_id) if team is None: raise ValueError("create_comment: Team node node found.") if player_id is not None: player = self.get_player(player_id) if player is None: raise ValueError("create_comment: Player node node found.") c = Node("Comment", comment_id=comment_id, comment=comment) tx.create(c) pc = Relationship(fan, "COMMENT_BY", c) tx.create(pc) if player is not None: pr = Relationship(c, "COMMENT_ON", player) tx.create(pr) if team is not None: p2 = Relationship(c, "COMMENT_ON", team) tx.create(p2) tx.commit() return c except Exception as e: if tx: tx.rollback() raise RuntimeError( "create_comment: Something evil happened., e = " + str(e)) def create_sub_comment(self, uni, origin_comment_id, comment): """ Create a sub-comment (response to a comment or response) and links with parent in thread. :param uni: ID of the Fan making the comment. :param origin_comment_id: Id of the comment to which this is a response. :param comment: Comment string :return: Created comment. """ if uni is None or origin_comment_id is None or comment is None: raise ValueError("create_sub_comment: invalid input.") comment_id = str( uuid.uuid4()) # Generate a UUID. Will explain in class. fan = None origin_comment = None tx = None try: tx = self._graph.begin( ) # We are going to make a transactional update. fan = self.get_fan(uni) # Get the node for the Fan. if fan is None: raise ValueError("create_sub_comment: Fan node node found.") origin_comment = self.get_comment(origin_comment_id) if origin_comment is None: raise ValueError( "create_sub_comment: Original comment node not found.") c = Node("Comment", comment_id=comment_id, comment=comment) tx.create(c) sc = Relationship(c, "RESPONSE_TO", origin_comment) tx.create(sc) pc = Relationship(fan, "RESPONSE_BY", c) tx.create(pc) tx.commit() except Exception as e: if tx: tx.rollback() raise RuntimeError( "create_comment: Something evil happened., e = " + str(e)) def get_sub_comments(self, comment_id): """ :param comment_id: The unique ID of the comment. :return: The sub-comments. """ q = "match (c:Comment {comment_id: {cid}})<-[response:RESPONSE_TO]-(sc:Comment) return sc,response,c" c = self._graph.run(q, cid=comment_id) return c def get_roster(self, team_id, year_id): q = "match (t:Team {team_id: {tid}})<-[a:APPEARED {year: {yid}}]-(p:Player) " + \ " return t.team_id, t.team_name, a.year, a.games, p.player_id, p.last_name, p.first_name" c = self._graph.run(q, tid=team_id, yid=year_id) return c def get_player_comments(self, player_id): """ Gets all of the comments associated with a player, all of the comments on the comment and comments on the comments, etc. Also returns the Nodes for people making the comments. :param player_id: ID of the player. :return: Graph containing comment, comment streams and commenters. """ q = 'match (fan)-[by:COMMENT_BY]->(comment)-[on:COMMENT_ON]->(player:Player {player_id: {pid}}) ' + \ 'return fan.uni, fan.last_name, fan.first_name, comment.comment_id, comment.comment, ' + \ " player.player_id, player.last_name, player.first_name " result = self._graph.run(q, pid=player_id) return result def get_player_comments_g(self, player_id): """ Gets all of the comments associated with a player, all of the comments on the comment and comments on the comments, etc. Also returns the Nodes for people making the comments. :param player_id: ID of the player. :return: Graph containing comment, comment streams and commenters. """ q = 'match (fan)-[by:COMMENT_BY]->(comment)-[on:COMMENT_ON]->(player:Player {player_id: {pid}}) ' + \ 'return fan, by, comment, on, player ' result = self._graph.run(q, pid=player_id) return result def get_team_comments(self, team_id): """ Gets all of the comments associated with a teams, all of the comments on the comment and comments on the comments, etc. Also returns the Nodes for people making the comments. :param player_id: ID of the team. :return: Graph containing comment, comment streams and commenters. """ t = self.get_team(team_id) nodes = set() nodes.add(t) n = self._relationship_matcher.match(nodes, r_type="COMMENT_ON") return n def get_players_by_team(self, team_id): """ :param team_id: Team ID :return: List of Nodes representing players who played for the team. """ q = "match (p:Player)-[r:APPEARED]->(t:Team) where t.team_id={tid} return p,r,t" result = self._graph.run(q, tid=team_id) result = result.data() ans = [] for r in result: current = dict(r['p'].items()) current['year'] = r['r']['year'] ans.append(current) return ans
rel4 = Relationship(author_ankush, "Wrote", ankush_paper_4) rel5 = Relationship(author_ankush, "Wrote", ankush_paper_5) paper_1_conf1 = Relationship(ankush_paper_1, "Presented_at", conf1) paper_2_conf1 = Relationship(ankush_paper_2, "Presented_at", conf1) paper_3_conf1 = Relationship(ankush_paper_3, "Presented_at", conf1) paper_4_conf1 = Relationship(ankush_paper_4, "Presented_at", conf1) paper_5_conf1 = Relationship(ankush_paper_5, "Presented_at", conf1) edition1_conf1_rel1 = Relationship(edition1_conf1, "PartOf", conf1) edition2_conf1_rel2 = Relationship(edition2_conf1, "PartOf", conf1) edition3_conf1_rel3 = Relationship(edition3_conf1, "PartOf", conf1) edition4_conf1_rel4 = Relationship(edition4_conf1, "PartOf", conf1) edition5_conf1_rel5 = Relationship(edition5_conf1, "PartOf", conf1) tx = graph.begin() tx.create(author_edition1) tx.create(author_edition2) tx.create(author_edition3) tx.create(author_ankush) tx.create(ankush_paper_1) tx.create(ankush_paper_2) tx.create(ankush_paper_3) tx.create(ankush_paper_4) tx.create(ankush_paper_5) tx.create(rel1) tx.create(rel2) tx.create(rel3)
def main(to_handle): # Don't forget to start the MDG up before using this script! MDG = Graph() deps = [] matcher = NodeMatcher(MDG) errors.write("In " + to_handle + ":\n") exceptions.write("In " + to_handle + ":\n") # print("Starting") with open(data_dir + to_handle, 'r', newline='') as f: reader = csv.reader(f) prev_gid, prev_art, prev_node, prev_version = None, None, None, None for row in reader: if len(row) < 7: continue tx = MDG.begin() # Get metadata repo, gid, aid, version, packaging, sha = (row[0], row[3], row[4], row[5], row[6], get_hash(row[2])) # Missing: release date, packaging # Create & add node repo_node = Node("Artifact", stars=row[1], url=row[2], groupID=gid, artifact=aid, version=version, packaging=packaging, coordinates=gid + ":" + aid + ":" + version, commit_hash=sha, from_github="True") repo_deps = [] for d in row[7:]: if len(d) > 2: dep_list = convert_dep_to_list(d) if dep_list is not None: repo_deps.append(dep_list) # This is to see if the node was in the MDG before we added try: e_node = existing_node(matcher, repo_node) except Exception as err: errors.write("Error while checking if the node " + gid + ":" + aid + ":" + version + ":" + sha + " exists in " + to_handle + ": " + repr(err) + "\n") continue if e_node is not None: print(e_node["coordinates"]) if e_node is not None: repo_node = e_node else: repo_node["coordinates"] += ":" + sha if version != prev_version or (aid != prev_art and gid != prev_gid): try: tx.create(repo_node) except Exception as err: errors.write("Error while creating node " + repo_node["coordinates"] + " in " + to_handle + ": " + repr(err) + "\n") if aid == prev_art and gid == prev_gid: r_next = Relationship(repo_node, "NEXT", prev_node) try: tx.merge(r_next, "Artifact", "coordinates") except Exception as err: errors.write("Error while merging NEXT between " + repo_node["coordinates"] + " and " + prev_node["coordinates"] + " in " + to_handle + ": " + repr(err) + "\n") prev_gid, prev_art, prev_node, prev_version = ( repo_node["groupID"], repo_node["artifact"], repo_node, repo_node["version"]) deps.append((repo_node, repo_deps)) tx.commit() # print("Done adding nodes and NEXT") for (node, dep_list) in deps: tx = MDG.begin() node_deps = purge_deps(dep_list) for dep in node_deps: dep_node, reason = find_dep_node(MDG, matcher, dep) if dep_node is None: exceptions.write(node["coordinates"] + ": could not" + " create dependency with " + dep[0] + ":" + dep[1] + ":" + dep[2][0] + "because " + reason + "\n") continue r_dep = Relationship(node, "DEPENDS_ON", dep_node) try: tx.merge(r_dep, "Artifact", "coordinates") except Exception as err: errors.write("Could not create dependency between " + r_dep.start_node["coordinates"] + " and " + r_dep.end_node["coordinates"] + "because " + repr(err) + "\n") tx.commit()
class Neo4jQuery(object): def __init__(self, k, config): self.num_topics = k # number of topics self.config = config # config dict for database self.graph = Graph(auth=(config["user"], config["password"])) # get Neo4J graph def construct_topic_vector(self, topic_indices): """ Construct the topic indicator vector Args: topic_indices: List of topic indexes returned from LDA model Returns: indicator vector """ value_str = ["0"] * self.num_topics for k in topic_indices: value_str[k - 1] = "1" return value_str def insert_journal(self): """ Insert Journal into Graph Returns: Query string, Query keys """ return "CREATE (j:Journal {id:{id}, name:{name}, field:{field}, ranking:{ranking})", [ "id", "name", "field", "ranking" ] def insert_paper(self): """ Insert Paper into Graph Returns: Query string, Query keys """ return "CREATE (p:Paper {id:{id}, authors:{authors}, journal_id:{journal_id}, title:{title}, abstract:{abstract}})", [ "id", "authors", "journal_id", "title", "abstract" ] def update_paper(self, col_names): """ Update Paper in Graph Returns: Query string, Query keys """ self.num_topics # Not really required alter_str = ','.join(["p." + x + "={" + x + "}" for x in col_names]) col_names.append("id") return "MATCH (p:Paper) WHERE p.id={id} SET " + alter_str + ";", col_names def insert_topic(self, paper_id, topic_indices): """ Insert Paper-Topic Relationship into Graph Args: paper_id: the id of the paper inserted topic_indices: indicator topic vector Returns: Query string, Query keys """ paper_topic_rel_str = "MATCH (p:Paper), (t:Topic) WHERE p.id={0} AND t.no IN [{1}] CREATE (p)-[:TopicOf]->(t)" value_str = self.construct_topic_vector(topic_indices) topic_nodes = [] for i in range(len(value_str)): if value_str[i] == "1": topic_nodes.append(str(i)) query_str = paper_topic_rel_str.format(paper_id, ','.join(topic_nodes)) return query_str, [] def delete_paper(self): return "MATCH (p:Paper) WHERE p.id={id} DETACH DELETE p", [ "id" ] # Deletes nodes and all edges def delete_topic(self): return "", [] # Empty bc delete_paper handles it def search_journal(self): return "MATCH (j:Journal)<-[:PUBLISHED]-(p:Paper) WHERE p.journal_id={id} RETURN p.id, p.authors, p.journal_id, p.title, j.ranking", [ "id" ] def search_paper(self): return "MATCH (p:Paper) WHERE p.id={id} RETURN p.id, p.authors, p.journal_id, p.title", [ "id" ] def search_authors(self): return "MATCH (p:Paper) WHERE p.authors=~ '.*{authors}.*' RETURN p.id, p.authors, p.journal_id, p.title", [ "authors" ] def get_recommended_papers(self): """ Recommendation Query for calculating cosine-sim, ranking and returning results Returns: Query string, Query keys """ # TODO: Remove p1.id < 3000 q1 = "MATCH (j:Journal)<-[pub1:PUBLISHED]-(p1:Paper)-[r1:TopicOf]->(Topic) WHERE p1.id < 3000 AND j.ranking <> -1 " q2 = "WITH p1 AS p1, j AS j, algo.similarity.cosine({topic_vec}, collect(r1.score)) AS similarity " q3 = "RETURN p1.id, round(similarity * 100) / 100, p1.abstract, p1.authors, p1.journal_id, p1.title " q4 = "ORDER BY similarity DESC, j.ranking LIMIT 10;" return q1 + q2 + q3 + q4, ["topic_vec"] def execute_query(self, query_str, args=[], commit=True): """ Execute query on Neo4J graph Args: query_str: the query-string structure returned by the methods args: argument values to use in the query commit: Commit query or no (NOT USED IN THIS CASE) Returns: (False, Error) or (True, Cursor) """ tx = self.graph.begin() query_str, keys = query_str if len(query_str) == 0: return True, None # No Query to execute if len(keys) == 0: assign_dict = {} else: assign_dict = dict(zip(keys, args)) try: cursor = tx.run(query_str, assign_dict) tx.commit() except Exception as e: print("Error :" + str(e)) return False, e return True, cursor def get_results(self, cursor_results): """ Parse results returned by the cursor of the database Args: cursor_results: Results returned by the cursor of this database Returns: list of values from cursor """ parsed_results = [] data = cursor_results.data() for r in data: row = r.values() parsed_results.append([str(x) for x in row]) return parsed_results def close_db(self): # Do not need to close database since py2neo uses a stateless REST API return
def parse_evtx(evtx_list): event_set = pd.DataFrame(index=[], columns=[ "eventid", "ipaddress", "username", "logintype", "status", "authname" ]) count_set = pd.DataFrame(index=[], columns=["dates", "eventid", "username"]) ml_frame = pd.DataFrame(index=[], columns=["date", "user", "host", "id"]) username_set = [] domain_set = [] admins = [] domains = [] ntmlauth = [] deletelog = [] policylist = [] addusers = {} delusers = {} addgroups = {} removegroups = {} sids = {} hosts = {} dcsync_count = {} dcsync = {} dcshadow_check = [] dcshadow = {} count = 0 record_sum = 0 starttime = None endtime = None if args.timezone: try: datetime.timezone(datetime.timedelta(hours=args.timezone)) tzone = args.timezone print("[*] Time zone is %s." % args.timezone) except: sys.exit("[!] Can't load time zone '%s'." % args.timezone) else: tzone = 0 if args.fromdate: try: fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.") if args.todate: try: tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.") for evtx_file in evtx_list: if args.evtx: with open(evtx_file, "rb") as fb: fb_data = fb.read()[0:8] if fb_data != EVTX_HEADER: sys.exit("[!] This file is not EVTX format {0}.".format( evtx_file)) chunk = -2 with Evtx(evtx_file) as evtx: fh = evtx.get_file_header() try: while True: last_chunk = list(evtx.chunks())[chunk] last_record = last_chunk.file_last_record_number() chunk -= 1 if last_record > 0: record_sum = record_sum + last_record break except: record_sum = record_sum + fh.next_record_number() if args.xmls: with open(evtx_file, "r") as fb: fb_data = fb.read() if "<?xml" not in fb_data[0:6]: sys.exit("[!] This file is not XML format {0}.".format( evtx_file)) record_sum += fb_data.count("<System>") del fb_data print("[*] Last record number is %i." % record_sum) # Parse Event log print("[*] Start parsing the EVTX file.") for evtx_file in evtx_list: print("[*] Parse the EVTX file %s." % evtx_file) for node, err in xml_records(evtx_file): if err is not None: continue count += 1 eventid = int(node.xpath("/Event/System/EventID")[0].text) if not count % 100: sys.stdout.write("\r[*] Now loading %i records." % count) sys.stdout.flush() if eventid in EVENT_ID: logtime = node.xpath("/Event/System/TimeCreated")[0].get( "SystemTime") try: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) except: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone) stime = datetime.datetime(*etime.timetuple()[:4]) if args.fromdate or args.todate: if args.fromdate and fdatetime > etime: continue if args.todate and tdatetime < etime: endtime = stime break if starttime is None: starttime = stime elif starttime > etime: starttime = stime if endtime is None: endtime = stime elif endtime < etime: endtime = stime event_data = node.xpath("/Event/EventData/Data") logintype = "-" username = "******" domain = "-" ipaddress = "-" hostname = "-" status = "-" sid = "-" authname = "-" if eventid == 4672: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if username not in admins and username != "-": admins.append(username) elif eventid in [4720, 4726]: for data in event_data: if data.get( "Name" ) in "TargetUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if eventid == 4720: addusers[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") else: delusers[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") elif eventid == 4719: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if data.get( "Name") in "CategoryId" and data.text != None: category = data.text if data.get( "Name" ) in "SubcategoryGuid" and data.text != None: guid = data.text policylist.append([ etime.strftime("%Y-%m-%d %H:%M:%S"), username, category, guid.lower() ]) elif eventid in [4728, 4732, 4756]: for data in event_data: if data.get( "Name" ) in "TargetUserName" and data.text != None: groupname = data.text elif data.get( "Name" ) in "MemberSid" and data.text not in "-" and data.text != None: usid = data.text addgroups[ usid] = "AddGroup: " + groupname + "(" + etime.strftime( "%Y-%m-%d %H:%M:%S") + ") " elif eventid in [4729, 4733, 4757]: for data in event_data: if data.get( "Name" ) in "TargetUserName" and data.text != None: groupname = data.text elif data.get( "Name" ) in "MemberSid" and data.text not in "-" and data.text != None: usid = data.text removegroups[ usid] = "RemoveGroup: " + groupname + "(" + etime.strftime( "%Y-%m-%d %H:%M:%S") + ") " elif eventid == 4662: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" dcsync_count[username] = dcsync_count.get(username, 0) + 1 if dcsync_count[username] == 3: dcsync[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") dcsync_count[username] = 0 elif eventid in [5137, 5141]: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if etime.strftime( "%Y-%m-%d %H:%M:%S") in dcshadow_check: dcshadow[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") else: dcshadow_check.append( etime.strftime("%Y-%m-%d %H:%M:%S")) else: for data in event_data: if data.get("Name") in ["IpAddress", "Workstation" ] and data.text != None: ipaddress = data.text.split("@")[0] ipaddress = ipaddress.lower().replace( "::ffff:", "") ipaddress = ipaddress.replace("\\", "") if data.get( "Name" ) == "WorkstationName" and data.text != None: hostname = data.text.split("@")[0] hostname = hostname.lower().replace("::ffff:", "") hostname = hostname.replace("\\", "") if data.get( "Name" ) in "TargetUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if data.get( "Name" ) in "TargetDomainName" and data.text != None: domain = data.text if data.get("Name") in [ "TargetUserSid", "TargetSid" ] and data.text != None and data.text[0:2] in "S-1": sid = data.text if data.get("Name") in "LogonType": logintype = int(data.text) if data.get("Name") in "Status": status = data.text if data.get("Name") in "AuthenticationPackageName": authname = data.text if username != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1" and ( ipaddress != "-" or hostname != "-"): if ipaddress != "-": event_series = pd.Series([ eventid, ipaddress, username, logintype, status, authname ], index=event_set.columns) ml_series = pd.Series([ etime.strftime("%Y-%m-%d %H:%M:%S"), username, ipaddress, eventid ], index=ml_frame.columns) else: event_series = pd.Series([ eventid, hostname, username, logintype, status, authname ], index=event_set.columns) ml_series = pd.Series([ etime.strftime("%Y-%m-%d %H:%M:%S"), username, hostname, eventid ], index=ml_frame.columns) event_set = event_set.append(event_series, ignore_index=True) ml_frame = ml_frame.append(ml_series, ignore_index=True) # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype)) count_series = pd.Series([ stime.strftime("%Y-%m-%d %H:%M:%S"), eventid, username ], index=count_set.columns) count_set = count_set.append(count_series, ignore_index=True) # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username)) if domain != "-": domain_set.append([username, domain]) if username not in username_set: username_set.append(username) if domain not in domains and domain != "-": domains.append(domain) if sid != "-": sids[username] = sid if hostname != "-" and ipaddress != "-": hosts[hostname] = ipaddress if authname in "NTML" and authname not in ntmlauth: ntmlauth.append(username) if eventid == 1102: logtime = node.xpath("/Event/System/TimeCreated")[0].get( "SystemTime") try: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) except: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone) deletelog.append(etime.strftime("%Y-%m-%d %H:%M:%S")) namespace = "http://manifests.microsoft.com/win/2004/08/windows/eventlog" user_data = node.xpath( "/Event/UserData/ns:LogFileCleared/ns:SubjectUserName", namespaces={"ns": namespace}) domain_data = node.xpath( "/Event/UserData/ns:LogFileCleared/ns:SubjectDomainName", namespaces={"ns": namespace}) if user_data[0].text != None: username = user_data[0].text.split("@")[0] if username[-1:] not in "$": deletelog.append(username.lower()) else: deletelog.append("-") else: deletelog.append("-") if domain_data[0].text != None: deletelog.append(domain_data[0].text) else: deletelog.append("-") print("\n[*] Load finished.") print("[*] Total Event log is %i." % count) if not username_set: sys.exit( "[!] This event log did not include logs to be visualized. Please check the details of the event log." ) tohours = int((endtime - starttime).total_seconds() / 3600) if hosts: event_set = event_set.replace(hosts) event_set["count"] = event_set.groupby([ "eventid", "ipaddress", "username", "logintype", "status", "authname" ])["eventid"].transform("count") event_set = event_set.drop_duplicates() count_set["count"] = count_set.groupby(["dates", "eventid", "username" ])["dates"].transform("count") count_set = count_set.drop_duplicates() domain_set_uniq = list(map(list, set(map(tuple, domain_set)))) # Learning event logs using Hidden Markov Model if hosts: ml_frame = ml_frame.replace(hosts) ml_frame = ml_frame.sort_values(by="date") if args.learn: print("[*] Learning event logs using Hidden Markov Model.") learnhmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) # Calculate ChangeFinder print("[*] Calculate ChangeFinder.") timelines, detects, detect_cf = adetection(count_set, username_set, starttime, tohours) # Calculate Hidden Markov Model print("[*] Calculate Hidden Markov Model.") detect_hmm = decodehmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) # Calculate PageRank print("[*] Calculate PageRank.") ranks = pagerank(event_set, admins, detect_hmm, detect_cf, ntmlauth) # Create node print("[*] Creating a graph data.") try: graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/" GRAPH = Graph(graph_http) except: sys.exit("[!] Can't connect Neo4j Database.") tx = GRAPH.begin() hosts_inv = {v: k for k, v in hosts.items()} for ipaddress in event_set["ipaddress"].drop_duplicates(): if ipaddress in hosts_inv: hostname = hosts_inv[ipaddress] else: hostname = ipaddress tx.append(statement_ip, { "IP": ipaddress, "rank": ranks[ipaddress], "hostname": hostname }) i = 0 for username in username_set: if username in sids: sid = sids[username] else: sid = "-" if username in admins: rights = "system" else: rights = "user" ustatus = "" if username in addusers: ustatus += "Created(" + addusers[username] + ") " if username in delusers: ustatus += "Deleted(" + delusers[username] + ") " if sid in addgroups: ustatus += addgroups[sid] if sid in removegroups: ustatus += removegroups[sid] if username in dcsync: ustatus += "DCSync(" + dcsync[username] + ") " if username in dcshadow: ustatus += "DCShadow(" + dcshadow[username] + ") " if not ustatus: ustatus = "-" tx.append( statement_user, { "user": username[:-1], "rank": ranks[username], "rights": rights, "sid": sid, "status": ustatus, "counts": ",".join(map(str, timelines[i * 6])), "counts4624": ",".join(map(str, timelines[i * 6 + 1])), "counts4625": ",".join(map(str, timelines[i * 6 + 2])), "counts4768": ",".join(map(str, timelines[i * 6 + 3])), "counts4769": ",".join(map(str, timelines[i * 6 + 4])), "counts4776": ",".join(map(str, timelines[i * 6 + 5])), "detect": ",".join(map(str, detects[i])) }) i += 1 for domain in domains: tx.append(statement_domain, {"domain": domain}) for _, events in event_set.iterrows(): tx.append( statement_r, { "user": events["username"][:-1], "IP": events["ipaddress"], "id": events["eventid"], "logintype": events["logintype"], "status": events["status"], "count": events["count"], "authname": events["authname"] }) for username, domain in domain_set_uniq: tx.append(statement_dr, {"user": username[:-1], "domain": domain}) tx.append( statement_date, { "Daterange": "Daterange", "start": datetime.datetime( *starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"), "end": datetime.datetime( *endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S") }) if len(deletelog): tx.append( statement_del, { "deletetime": deletelog[0], "user": deletelog[1], "domain": deletelog[2] }) if len(policylist): id = 0 for policy in policylist: if policy[2] in CATEGORY_IDs: category = CATEGORY_IDs[policy[2]] else: category = policy[2] if policy[3] in AUDITING_CONSTANTS: sub = AUDITING_CONSTANTS[policy[3]] else: sub = policy[3] username = policy[1] tx.append( statement_pl, { "id": id, "changetime": policy[0], "category": category, "sub": sub }) tx.append(statement_pr, {"user": username[:-1], "id": id}) id += 1 tx.process() tx.commit() print("[*] Creation of a graph data finished.")
# html_rel = Relationship(new_node,"HTML text",html_text) # gp.create(html_rel) gp.commit() def get_the_available_crawlers(): crawlers = ["CRAWLER-2", "CRAWLER-3", "CRAWLER-4"] return crawlers graph_database_location = "http://"+database+":7474/db/data/" graph = Graph(graph_database_location, user='******', password='******') # connect to the local graph database if delete_graph_history == "yes": graph.delete_all() # Delete all the previous made nodes and relationship print("DATABASE DELETED !") gp = graph.begin() coordinates = [] # create the list for coordinates coordinates = generate_coordinates(width, height, coordinates) # generates coordinates based on the diff and the resolution coordinates = generate_random_coordinates(coordinates) # already generated coordinates are shuffled randomly chrome_options = Options() chrome_options.add_extension(".\process_monitor.crx") # Adding the extension to chrome # chrome_options.add_extension("C:\\Users\crawler\Desktop\Crawler\process_monitor.crx") chromium_path = ".\chrome-win32\chrome.exe" # Use the portable chromium browser # If chromium browser is not required then by removing the above chromium path, it will start using the default one # The default will be developer google chrome. # ONly Dev channel google chrome can support the extension used here. This extension used a particular API. # The API used is "chrome.processes" and it is available only in the chrome dev-channel and chromium browser chrome_options.binary_location = chromium_path
class Neo4j: labels = None relationship_types = None property_keys = None constraints = None indexes = None parameters = {} def __init__(self, host, port, username=None, password=None, ssl=False, timeout=None, bolt=None): if timeout is not None: http.socket_timeout = timeout host_port = "{host}:{port}".format(host=host, port=port) uri = "{scheme}://{host_port}/db/data/".format(scheme="https" if ssl else "http", host_port=host_port) self.graph = Graph(uri, user=username, password=password, bolt=bolt, secure=ssl) try: self.neo4j_version = self.graph.dbms.kernel_version except Unauthorized: raise AuthError(uri) except SocketError: raise ConnectionError(uri) def cypher(self, statement): error = False headers = [] rows = [] start = datetime.now() tx = self.graph.begin() try: result = tx.run(statement, self.parameters) headers = list(result.keys()) rows = [[x[header] for header in headers] for x in result] tx.commit() except KeyboardInterrupt: tx.rollback() error = "" except Exception as e: error = e end = datetime.now() return { "headers": headers, "rows": rows, "duration": duration_in_ms(start, end), "error": error } def get_labels(self): if not self.labels: self.labels = sorted(self.graph.node_labels) return self.labels def get_relationship_types(self): if not self.relationship_types: self.relationship_types = sorted(self.graph.relationship_types) return self.relationship_types def get_property_keys(self): if not self.property_keys: self.property_keys = sorted(remote(self.graph).resolve("propertykeys").get().content) return self.property_keys def get_constraints(self): if not self.constraints: data = remote(self.graph).resolve("schema/constraint").get().content self.constraints = sort_dict_by_key(data, "label") return self.constraints def get_indexes(self): if not self.indexes: data = remote(self.graph).resolve("schema/index").get().content self.indexes = sort_dict_by_key(data, "label") return self.indexes def update_parameters(self, key, value): self.parameters[key] = value def refresh(self): self.labels = None self.relationship_types = None self.property_keys = None self.indexes = None self.constraints = None self.get_labels() self.get_relationship_types() self.get_property_keys() self.get_indexes() self.get_constraints() def print_labels(self): headers = ["Labels"] rows = [[x] for x in self.get_labels()] print(pretty_table(headers, rows)) def print_relationship_types(self): headers = ["Relationship Types"] rows = [[x] for x in self.get_relationship_types()] print(pretty_table(headers, rows)) def print_constraints(self): headers = ["Constraints"] constraints = self.get_constraints() rows = [[x] for x in self.format_constraints_indexes(constraints)] print(pretty_table(headers, rows)) def print_indexes(self): headers = ["Indexes"] indexes = self.get_indexes() rows = [[x] for x in self.format_constraints_indexes(indexes)] print(pretty_table(headers, rows)) def format_constraints_indexes(self, values): return [":{}({})".format(value["label"], ",".join(value["property_keys"])) for value in values] def print_schema(self): headers = ["Labels", "Relationship Types", "Constraints", "Indexes"] columns = [self.get_labels()[:]] columns.append(self.get_relationship_types()[:]) columns.append(self.format_constraints_indexes(self.get_constraints()[:])) columns.append(self.format_constraints_indexes(self.get_indexes()[:])) max_length = len(max(columns, key=len)) [x.extend([""] * (max_length - len(x))) for x in columns] rows = [[x[i] for x in columns] for i in range(max_length)] print(pretty_table(headers, rows)) def print_profile(self, profile): planner = profile.arguments["planner"] version = profile.arguments["version"] runtime = profile.arguments["runtime"] print("") print("Planner: {}".format(planner)) print("Version: {}".format(version)) print("Runtime: {}".format(runtime)) print("") headers = ["Operator", "Estimated Rows", "Rows", "DB Hits", "Variables"] rows = [] for n in reversed(walk(profile)): operator = n.operator_type estimated_rows = int(n.arguments["EstimatedRows"]) rows_ = n.arguments["Rows"] db_hits = n.arguments["DbHits"] variables = n.identifiers rows.append([operator, estimated_rows, rows_, db_hits, variables]) print(pretty_table(headers, rows))
from py2neo import Graph, Node, Relationship g = Graph() tx = g.begin() a = Node("Person", name="Alice") tx.create(a) b = Node("Person", name="Bob") ab = Relationship(a, "KNOWS", b) tx.create(ab) tx.commit() g.exists(ab) """ Sample Query >>> from py2neo import Graph >>> g = Graph() >>> g.run("MATCH (a) WHERE a.name={x} RETURN a.name", x="Bob").evaluate() u'Bob' >>> """
def handle(self, *args, **options): # pylint: disable=unused-argument """ Iterates through each course, serializes them into graphs, and saves those graphs to neo4j. """ host = options['host'] port = options['port'] neo4j_user = options['user'] neo4j_password = options['password'] authenticate( "{host}:{port}".format(host=host, port=port), neo4j_user, neo4j_password, ) graph = Graph( bolt=True, password=neo4j_password, user=neo4j_user, https_port=port, host=host, secure=True ) mss = ModuleStoreSerializer() total_number_of_courses = len(mss.all_courses) for index, course in enumerate(mss.all_courses): # first, clear the request cache to prevent memory leaks RequestCache.clear_request_cache() log.info( "Now exporting %s to neo4j: course %d of %d total courses", course.id, index + 1, total_number_of_courses ) nodes, relationships = mss.serialize_course(course.id) log.info( "%d nodes and %d relationships in %s", len(nodes), len(relationships), course.id ) transaction = graph.begin() try: # first, delete existing course transaction.run( "MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format( six.text_type(course.id) ) ) # now, re-add it self.add_to_transaction(nodes, transaction) self.add_to_transaction(relationships, transaction) transaction.commit() except Exception: # pylint: disable=broad-except log.exception( "Error trying to dump course %s to neo4j, rolling back", six.text_type(course.id) ) transaction.rollback()
UNWIND {categories} AS category MERGE (c:Category {name: category}) MERGE (b)-[:IS_IN]->(c) ''' merge_category_query = ''' MATCH (b:Business {id: {business_id}}) MERGE (c:Category {name: {category}}) CREATE UNIQUE (c)<-[:IS_IN]-(b) ''' print("Beginning business batch") with open('data/yelp_academic_dataset_business.json', 'r') as f: tx = db.begin() count = 0 for b in (json.loads(l) for l in f): tx.run(create_business_query, b) count += 1 if count >= 10000: tx.commit() tx = db.begin() print("Committing transaction") count = 0 if count > 0: tx.commit() print("Committing transaction") ## Create spatial layer:
import requests from py2neo import Graph, Node import os import pandas as pd from string_converter import remove_non_alphaNumerics as remove_marks import numpy as np if __name__ == "__main__": pw = os.environ.get('NEO4J_PASS') g = Graph("http://localhost:7474/", password=pw) tx = g.begin() #===================== RETURN GenericDrug object: list of dics, key: rxcui, id =====================# q1 = ''' MATCH (gd:GenericDrug) RETURN id(gd), gd.rxcui ''' gd_obj = g.run(q1) gd_lst = [] for object in gd_obj: gd_dic = {} gd_dic['id'] = object['id(gd)'] gd_dic['rxcui'] = object['gd.rxcui'] gd_lst.append(gd_dic) #===================== Create relation, Iterate genericDrug (faster, about 15276 interations)====================# q3 = ''' MATCH (pc:Prescription) where pc.rxcui = {gd_rxcui} MATCH (gd:GenericDrug) where id(gd) = {id_gd} CREATE (pc)-[:PRESCRIBE]->(gd) '''
from py2neo import Graph, Node, Relationship import sys from time import sleep graph_database_location = "http://192.168.100.53:7474/db/data/" graph = Graph(graph_database_location, user='******', password='******') # connect to the local graph database tx=graph.begin() statement = 'Match (a:Main_Tab)-[c:Crawling_Complete]->(b:Completed) WHERE ((a.Crawler="CRAWLER-1")) RETURN c' count=[] flag_detected = 0 if (sys.argv[1] == "CRAWLER-1"): flag_detected = 1 else: while True: print("Okay I am gonna sleep for 30 seconds and Check again") sleep(30) cursor=tx.run(statement).data() print(cursor) if(len(cursor) != 0): for each in cursor: x=list(each.values()) count.append(x[0]) if (len(count) != 0): flag_detected = 1 break if( flag_detected == 1): print("Detected Completion")