def test_labels_constraints(self): label_1 = next(self.unique_string) a = Node(label_1, name="Alice") b = Node(label_1, name="Alice") self.graph.create(a | b) with self.assertRaises(GraphError): self.graph.schema.create_uniqueness_constraint(label_1, "name") b.remove_label(label_1) self.graph.push(b) self.schema.create_uniqueness_constraint(label_1, "name") a.remove_label(label_1) self.graph.push(a) b.add_label(label_1) self.graph.push(b) try: self.schema.drop_index(label_1, "name") except GraphError as error: # this is probably a server bug assert error.__cause__.status_code // 100 == 5 else: assert False b.remove_label(label_1) self.graph.push(b) self.schema.drop_uniqueness_constraint(label_1, "name") with self.assertRaises(GraphError): self.schema.drop_uniqueness_constraint(label_1, "name") self.graph.delete(a | b)
def add_node(): node = Node() labels = [] dictionary = {} print("Enter a blank string to finish a section\n") print(">>> Labels") while True: label = input().strip() if label == '': break else: node.add_label(label) print(">>> Properties") while True: keyvalue = input() kvlist = keyvalue.split(',') if keyvalue == '': break else: k = kvlist[0].strip() v = kvlist[1].strip() node[k] = v graph.create(node) return node
def _adjust_label_name(self, node: Node) -> str: label_name = list(node.labels)[0] node.clear_labels() if callable(self.config_func_label_name_generator_func): custom_name = self.config_func_label_name_generator_func( label_name) if custom_name is not None: return custom_name label_name_adjusted = label_name if label_name in self.config_dict_json_attr_to_reltype_instead_of_label: label_name_adjusted = self.config_dict_json_attr_to_reltype_instead_of_label[ label_name] if label_name in self.config_dict_label_override: label_name_override_config = self.config_dict_label_override[ label_name] if isinstance(label_name_override_config, str): label_name_adjusted = label_name_override_config elif isinstance(label_name_override_config, dict): label_name_adjusted = list( label_name_override_config.keys())[0] # add extra props as configured by caller extra_props = list(label_name_override_config.values())[0] for extra_prop, extra_val in extra_props.items(): node[extra_prop] = extra_val label_name_adjusted = (label_name_adjusted.capitalize() if self.config_bool_capitalize_labels else label_name_adjusted) node.add_label(label_name_adjusted) node.__primarylabel__ = label_name_adjusted return node
def createNode(self, projectId, node_obj): # 数据库已存在则更新 if node_obj.has_key('id') and graph.exists(graph.node( node_obj['id'])): # 通过id进行判断 节点是否存在 n = graph.node(node_obj['id']) # 用py2neo的graph取出节点 for key in node_obj.keys(): if key not in hideKeys: n[key] = node_obj[key] n.push() # 改完之后重新存入数据库 print("更新节点: %s" % n) return '' # 不存在则新建 else: print(node_obj) newNode = Node('Character', name=node_obj['name']) for key in node_obj.keys(): if key not in hideKeys: newNode[key] = node_obj[key] # 每次创建节点的时候都要加一个新标签projectID 用以和主项目编号(prj-id)进行呼应 newNode.add_label(str(projectId)) h = str(hash(newNode)) newNode['hash'] = h graph.merge(newNode) newNode.push() print("新增节点: %s" % newNode) # 返回新增节点的id query = ''' MATCH (n:{}'''.format(projectId) + ''') WHERE n.hash = {x} and n.name = {y} RETURN n, ID(n) as id ''' result = graph.run(query, x=h, y=node_obj['name']).data() print(result) return jsonify(result={"uid": result[0]['id']})
def set_custom_node( self, custom_node: CustomNodeType, parent_name: str, relationship_properties: CustomRelPropertiesType = None) -> Node: """ Set a custom node. Require a parent node to create a relationship with. """ parent_node: Graph = self.get_node(parent_name) if not parent_node: raise ValueError('Parent not exits.') graph = self._validate_and_connect() tx = graph.begin() node = Node(custom_node['taxonRank'], **custom_node) node.add_label('custom') tx.create(node) if relationship_properties: relationship = PARENT(node, parent_node, **relationship_properties) else: relationship = PARENT(node, parent_node) tx.merge(relationship) tx.commit() return node
def test_should_push_one_label_onto_no_labels(graph): node = Node() graph.create(node) node_id = node.identity assert_has_labels(graph, node_id, {}) node.add_label("A") graph.push(node) assert_has_labels(graph, node_id, {"A"})
def test_should_push_one_additional_label(graph): node = Node("A") graph.create(node) node_id = node.identity assert_has_labels(graph, node_id, {"A"}) node.add_label("B") graph.push(node) assert_has_labels(graph, node_id, {"A", "B"})
def test_should_push_multiple_labels_with_overlap(graph): node = Node("A", "B") graph.create(node) node_id = node.identity assert_has_labels(graph, node_id, {"A", "B"}) node.remove_label("A") node.add_label("C") graph.push(node) assert_has_labels(graph, node_id, {"B", "C"})
def test_should_push_one_label_onto_different_label(graph): node = Node("A") graph.create(node) node_id = node.identity assert_has_labels(graph, node_id, {"A"}) node.clear_labels() node.add_label("B") graph.push(node) assert_has_labels(graph, node_id, {"B"})
def add_tweet_node(tx, user, location, label, entry): tweet = Node("POST", tid=entry['tid']) tweet.add_label(label) tx.merge(tweet, primary_label="POST", primary_key="tid") tx.create(Relationship(user, "POSTED", tweet)) tx.create(Relationship(tweet, "POSTED_FROM", location)) return tweet
def output(node, p_node): for item in node['children']: c_node = Node(name=item['name'], id=item['id'], eventType=item['type']) c_node.add_label('Event') graph.create(c_node) # (child)-[r:Derivation]->(parent) graph.create(Relationship(c_node, 'Derivation', p_node)) output(item, c_node)
def _create_collection_hub_node(self, member_label_name, hub_id): hub_node_label = self._get_hub_node_label_name(member_label_name) hub_node = Node(hub_node_label, id=hub_id) hub_node._is_collectionhub = True hub_node.__primarylabel__ = hub_node_label hub_node.__primarykey__ = "id" for lbl in self.config_list_collection_anchor_extra_labels: hub_node.add_label(lbl) if self.config_str_collection_anchor_attach_list_members_label: hub_node.add_label(member_label_name) return hub_node
def creat_map(record): subject_name = record[1] original_subject_label = record[2] relation_verb = record[3] object_name = record[4] original_object_label = record[5] try: query_from_name = graph.run( 'Match (n) where n.name ="{}" return n'.format( subject_name)).data() query_to_name = graph.run( 'Match (n) where n.name ="{}" return n'.format( object_name)).data() except: # print(subject_name) # print(subject_name) return 0 #print(query_to_name) # 两种方式都可以添加属性 if not query_from_name: from_name = Node(original_subject_label, name=subject_name) graph.create(from_name) else: from_name = query_from_name[0]['n'] if not from_name.has_label(original_subject_label): from_name.add_label(original_subject_label) # 增加label if not query_to_name: to_name = Node(original_object_label, name=object_name) graph.create(to_name) else: to_name = query_to_name[0]['n'] if not to_name.has_label(original_object_label): to_name.add_label(original_object_label) # 增加label try: Cypher_sql = 'Match (n:%s)-[:%s]->(m:%s) where n.name="%s"and m.name="%s" return m' % ( original_subject_label, relation_verb, original_object_label, subject_name, object_name) # print(Cypher_sql) relation_exist = graph.run(Cypher_sql).data() except: return 0 if not relation_exist: map_relation = Relationship(from_name, relation_verb, to_name) map_relation['original_subject_label'] = original_subject_label map_relation['original_object_label'] = original_object_label map_relation['original_text'] = record[6] map_relation['original_text_table'] = record[7] map_relation['original_text_table_id'] = record[8] graph.create(map_relation) return 1 return 2
def add_tweet_node(user, location, date, label, entry): tweet = graph.find_one("POST", "tid", entry['tid']) if tweet: tweet.add_label(label) tweet.push() else: tweet = Node("POST", tid=entry["tid"]) tweet.add_label(label) graph.merge(tweet) graph.merge(Relationship(user, "POSTED", tweet)) graph.merge(Relationship(tweet, "POSTED_FROM", location)) graph.merge(Relationship(tweet, "POSTED_ON", date)) return tweet
def insertIntoGraph(queue): """Insert queue content into graph""" seqAuthor = 1 #authorFilter = loadAuthor() #authorFilter = loadAuthorFilter() authorFilter = [node.name for node in graph.find("Author")] while True: pubAttrs = queue.get() if pubAttrs is None: break include = False if pubAttrs.get('author') is None: continue for author in pubAttrs.get('author'): author = author.strip() if [x for x in authorFilter if compareNames(removeAccents(x['name']), removeAccents(author))]: include = True if not include: continue newpub = graph.find_one("Publication", "title", pubAttrs.get("title")) if newpub is None: logging.info("Creating new publication" + pubAttrs.get("title", "")) newpub = Node() for att in pubAttrs.keys(): newpub[att] = pubAttrs.get(att, -1) newpub.add_label('Publication') graph.create(newpub) else: continue for aName in list(pubAttrs['author']): author = None for authorNode in graph.find("Author"): if compareNames(removeAccents(authorNode['name']), removeAccents(aName)): author = authorNode break if author is None: author = Node("Author", name=aName) graph.create(author) logging.info("New author created: %s" % aName) relAuthoring = Relationship(author, "AUTHORING", newpub) logging.info("!!! Creating relationship: " + newpub.get('title')) graph.create(relAuthoring) print("All insertions done")
def populate_nodes(self): for entity in self.entities: node = Node() node.add_label(entity) node['id'] = entity type = self.entity_types[entity] node.add_label(type) for property in self.properties_dictionary[entity]: key = list(property.keys())[0] value = property[key] node[key] = value #print('node',node) self.entity_nodes[entity] = node self.graph.create(node)
def test_labels_constraints(graph, make_unique_id): label_1 = make_unique_id() a = Node(label_1, name="Alice") b = Node(label_1, name="Alice") graph.create(a | b) with raises(Neo4jError) as e: graph.schema.create_uniqueness_constraint(label_1, "name") assert e.value.code == "Neo.DatabaseError.Schema.ConstraintCreationFailed" b.remove_label(label_1) graph.push(b) graph.schema.create_uniqueness_constraint(label_1, "name") a.remove_label(label_1) graph.push(a) b.add_label(label_1) graph.push(b) b.remove_label(label_1) graph.push(b) graph.schema.drop_uniqueness_constraint(label_1, "name") with raises(Neo4jError) as e: graph.schema.drop_uniqueness_constraint(label_1, "name") assert e.value.code == "Neo.DatabaseError.Schema.ConstraintDropFailed" graph.delete(a | b)
def dbAccess(): from py2neo import Graph, Node, Relationship def output(node, p_node): for item in node['children']: c_node = Node(name=item['name'], id=item['id'], eventType=item['type']) c_node.add_label('Event') graph.create(c_node) # (child)-[r:Derivation]->(parent) graph.create(Relationship(c_node, 'Derivation', p_node)) output(item, c_node) graph = Graph("http://luncert.cn:7474", username="******", password="******") for item in tree: p_node = Node(name=item['name'], id=item['id'], sceneType=item['type']) p_node.add_label('Scene') graph.create(p_node) output(item, p_node)
def add_user_properties(self, user): '''Given a unique user id, adds properties to the existing user Node''' try: user_id = user.id existing_user = Node('User', id=user_id) clean_prop_dict = self.__clean_user_dict(user.__dict__) self.graph.merge(existing_user) for k, v in clean_prop_dict.iteritems(): existing_user[k] = v # add additional label to verified accounts if clean_prop_dict['verified']: print True existing_user.add_label('Verified') except: # bad user id user_id = user['user_id'] error = user['error'] existing_user = Node('User', id=user_id) self.graph.merge(existing_user) existing_user['screen_name'] = 'INVALID' existing_user['error'] = error print 'Found invalid user id' self.graph.push(existing_user)
def bacth_node_label(self, label, entity_labes): tx = self.graph.begin() newnodelist = [] oldnodelist = [] matcher = NodeMatcher(self.graph) for data in entity_labes: node = matcher.match(name=data).first() if node is None: oneNode = Node() oneNode.add_label(label=label) oneNode["name"] = data newnodelist.append(oneNode) else: node.add_label(label=label) oldnodelist.append(node) if len(newnodelist) > 0: newsub = Subgraph(newnodelist) print("newnodelist----", newnodelist) tx.create(newsub) if len(oldnodelist) > 0: oldsub = Subgraph(oldnodelist) print("oldnodelist----", oldnodelist) tx.push(oldsub) tx.commit()
def create_all_nodes(self): """ 从mongodb中抽取三元组,生成node :return: """ nodes = dict() self.debug('[Start create nodes] triple_docs=%s', len(self.triple_docs)) for doc in self.triple_docs: triple_subject = doc.get("triple_subject", "") # 主语 triple_predicate = doc.get("triple_predicate", "") # 谓语 triple_object = doc.get("triple_object", "") # 宾语 # triple_predicate是否属于数据关系属性 property_key = self.data_relationship.get(triple_predicate, {}).get('uri', "") if not triple_subject or not triple_object: continue if triple_subject not in nodes.keys(): # 主语不在nodes中,新建节点 nodes[triple_subject] = PropertyDict({"name": triple_subject}) nodes[triple_subject]['label'] = [ triple_subject, ] if property_key: # triple_predicate为数据属性,新建节点并添加属性值 if triple_object: nodes[triple_subject][property_key] = [ triple_object, ] else: self.warn( "@@@@@@@@@@@@@@@@@@@@@@@ unexpected triple_object, " "[subject=%s, predicate=%s, object=%s]", triple_subject, triple_predicate, triple_object) else: # triple_predicate为关系属性,若triple_object不在nodes中创建新节点 if triple_object not in nodes.keys(): nodes[triple_object] = PropertyDict( {"name": triple_object}) nodes[triple_object]['label'] = [ triple_object, ] else: # 主语在nodes中,更新节点 if property_key: # triple_predicate为数据属性,新建节点并添加属性值 if triple_object: is_exist = nodes.get(triple_subject, {}).get(property_key) if is_exist: nodes[triple_subject][property_key].append( triple_object) else: nodes[triple_subject][property_key] = [ triple_object, ] else: self.warn( "@@@@@@@@@@@@@@@@@@@@@@@ unexpected triple_object, " "[subject=%s, predicate=%s, object=%s]", triple_subject, triple_predicate, triple_object) else: # triple_predicate为关系属性,若triple_object不在nodes中创建新节点 if triple_object not in nodes.keys(): nodes[triple_object] = PropertyDict( {"name": triple_object}) tx = self.bio_graph.begin() self.debug("got nodes=%s", len(nodes)) for item in nodes.values(): # 遍历所有nodes,创建节点 labels = item.get('label', []) node = Node(self.default_label, name=item['name']) # 默认标签标签为“生物概念” if labels: # 若存在其他label,则添加 for label in labels: node.add_label(label) del (item['label']) del (item['name']) if item: # 若节点包含其他属性,则设置属性 for key in item.keys(): node[key] = item[key] tx.create(node) tx.commit()
# print(type(buff)) for record in data: query_from_name=graph.run("Match (n: pig) where n.name ='{}' return n".format(record[1])).data() query_to_name=graph.run("Match (n: pig) where n.name ='{}' return n".format(record[3])).data() #print(query_to_name) # 两种方式都可以添加属性 if not query_from_name: from_name = Node("pig", name=record[1], age=0) graph.create(from_name) else: from_name=query_from_name[0]['n'] print(from_name.has_label('zw'))#判断是否有label from_name.add_label('zw')#增加label print(from_name.has_label('zw')) # 判断是否有label if not query_to_name: to_name = Node("pig", name=record[3], age=0) graph.create(to_name) else: to_name=query_to_name[0]['n'] pig_relation = Relationship(from_name, record[2], to_name,name=record[4]) pig_relation['count'] = 1 graph.create(pig_relation) db.close() print('end') ''' # 创建关系 # 分别建立了test_node_1指向test_node_2和test_node_2指向test_node_1两条关系,关系的类型为"丈夫、妻子",两条关系都有属性count,且值为1。
def import_timeline_data(graph): timeline_rows_processed = process_timeline_data() current_dt = None prev_tf_node = None # counters row_processed_c = 0 tf_nodes_c = 0 u_nodes_c = 0 p_nodes_c = 0 o_nodes_c = 0 batch_c = 0 # graph.delete_all() batch = graph.begin() for row in timeline_rows_processed: # dt = row[0].replace(second=0) dt = row[0] row_processed_c += 1 batch_c += 1 if current_dt != dt: tf_nodes_c += 1 labels = [] if current_dt is None: labels += ['FIRST_TIME_FRAME'] elif row_processed_c == len(timeline_rows_processed): labels += ['LAST_TIME_FRAME'] current_dt = dt # props tf_node = Node( 'TIME_FRAME', # define secs timestamp=current_dt.strftime('%Y-%m-%d %H:%M:%S'), year=int(row[1]), month=int(row[2]), day_in_week=str(row[3]), hour=int(row[4]), part_of_day=str(row[5])) # labels for frame_label in labels: tf_node.add_label(frame_label) #rels batch.create(tf_node) if prev_tf_node is not None: next_frame_rel = Relationship(prev_tf_node, 'NEXT_FRAME', tf_node) batch.create(next_frame_rel) prev_tf_node = tf_node if row[6] == 'user': u_nodes_c += 1 # props if is_number(row[13]): order_count = int(row[13]) else: order_count = 0 if is_number(row[15]): money_spent = float(row[15]) else: money_spent = 0 city = row[11].decode('latin-1').encode('utf-8') country = row[9].decode('latin-1').encode('utf-8') state = row[10].decode('latin-1').encode('utf-8') u_node = Node('USER', oid=int(row[7]), country=country, state=state, city=city, order_count=order_count, money_spent=money_spent) # labels if order_count > 0: u_node.add_label('HAS_PURCHASED') # print u_node #rels batch.create(u_node) user_created_rel = Relationship(u_node, 'CREATED_AT', tf_node) batch.create(user_created_rel) elif row[6] == 'product': p_nodes_c += 1 if is_number(row[11]): price = float(row[11]) else: price = 0 if is_number(row[12]): min_var_price = float(row[12]) else: min_var_price = 0 if is_number(row[13]): max_var_price = float(row[13]) else: max_var_price = 0 p_node = Node('PRODUCT', oid=int(row[7]), price=price, min_var_price=min_var_price, max_var_price=max_var_price) # print p_node #rels batch.create(p_node) product_created_rel = Relationship(p_node, 'CREATED_AT', tf_node) batch.create(product_created_rel) elif row[6] == 'user-order': # Data structure: user_id,order_id,order_date o_nodes_c += 1 o_node = Node('ORDER', oid=int(row[8])) # print o_node #rels batch.create(o_node) order_created_rel = Relationship(o_node, 'CREATED_AT', tf_node) batch.create(order_created_rel) if batch_c == BATCH_SIZE: batch.commit() # time.sleep(14) print 'BATCH COMMITED TF: ' + str(tf_nodes_c) + ', U: ' + str( u_nodes_c) + ', P: ' + str(p_nodes_c) + ', O: ' + str( o_nodes_c) batch_c = 0 batch = graph.begin() batch.commit() print 'BATCH COMMITED TF: ' + str(tf_nodes_c) + ', U: ' + str( u_nodes_c) + ', P: ' + str(p_nodes_c) + ', O: ' + str(o_nodes_c) print 'PROCESSED rows: ' + str(row_processed_c) + ', TF: ' + str( tf_nodes_c) + ', U: ' + str(u_nodes_c) + ', P: ' + str( p_nodes_c) + ', O: ' + str(o_nodes_c)
def NeoHandle(self, op, filedict): op.pop(0) if op[0] == 'Create': tempnode = self.matchper('path', op[1]) if tempnode == []: nodes = Node(name=op[2], path=op[1], ext=op[3], uid=op[4], atime=op[5], mtime=op[6], ctime=op[7]) filename = op[2].split('.') filename = filename[0] print('filename: ', filename) labels = gp.getproperty(filename) self.graph.create(nodes) if labels[0] == 'OK': labels.pop(0) for label in labels: print('label: ', label) nodes.add_label(label) self.graph.push(nodes) linknodes = self.matchlabel(label) for node in linknodes: if node['path'] == op[1]: continue r = Relationship(nodes, label, node) r['weight'] = 1 self.graph.create(r) else: tempnode = tempnode[0] tempnode['ext'] = op[3] tempnode['atime'] = op[5] tempnode['mtime'] = op[6] tempnode['ctime'] = op[7] self.graph.push(tempnode) elif op[0] == 'Open': print('ops: ', op) nodes = self.matchper('path', op[1]) if nodes == []: print('Wrong op') return prefetch = op[3:] for prefile in prefetch: if os.path.exists(prefile): size = os.path.getsize(prefile) fd = os.open(prefile, os.O_RDONLY) if size < 4 * 1024 * 1024: os.read(fd, size) else: os.read(fd, 4 * 1024 * 1024) os.close(fd) opentime = time.mktime(time.strptime(op[2], '%a %b %d %H:%M:%S %Y')) print('filedict: ', filedict) if abs(filedict['time'] - opentime) < 100: if filedict['file1'] == op[1] or filedict['file2'] == op[1] or filedict['file3'] == op[1]: lastfile = filedict['initfile'] node1 = self.matchper('path', lastfile) node1 = node1[0] node2 = nodes[0] rels = self.relmatcher(node1, node2) for rel in rels: if rel['weight'] < 10: rel['weight'] += 1 self.graph.push(rel) for i in range(3): if filedict['file' + str(i + 1)] == op[1] or filedict['file' + str(i + 1)] == '': continue else: node3 = self.matchper('path', filedict['file' + str(i + 1)]) if node3 == []: print('Wrong input') continue node3 = node3[0] rels = self.relmatcher(node1, node3) print(rels) for rel in rels: if rel['weight'] >= 6: rel['weight'] -= 1 self.graph.push(rel) node = nodes[0] node['atime'] = op[2] filedict['initfile'] = op[1] filedict['time'] = opentime for j in range(3): if j + 3 < len(op): filedict['file' + str(j + 1)] = op[j + 3] self.graph.push(node) elif op[0] == 'Read': nodes = self.matchper('path', op[1]) if nodes == []: print('Wrong op') return node = nodes[0] node['atime'] = op[2] self.graph.push(node) elif op[0] == 'Rename': nodes = self.matchper('path', op[1]) if nodes == []: print('Wrong op') return node = nodes[0] node['path'] = op[2] node['name'] = op[3] node['ext'] = op[4] node['ctime'] = op[5] self.graph.push(node) elif op[0] == 'Unlink': if filedict['initfile'] == op[1]: filedict['initfile'] = '' for j in range(3): filedict['file' + str(j + 1)] = '' filedict['time'] = 0.0 for j in range(3): if filedict['file' + str(j + 1)] == op[1]: filedict['file' + str(j + 1)] = '' nodes = self.matchper('path', op[1]) if nodes == []: print('Wrong op') return node = nodes[0] print(node) self.graph.delete(node) elif op[0] == 'Chown': nodes = self.matchper('path', op[1]) if nodes == []: print('Wrong op') return node = nodes[0] node['uid'] = op[2] node['ctime'] = op[3] self.graph.push(node) elif op[0] == 'Change': nodes = self.matchper('path', op[1]) if nodes == []: print('Wrong op') return node = nodes[0] node['atime'] = op[2] node['mtime'] = op[3] node['ctime'] = op[4] self.graph.push(node) else: return
class DataModel(object): def __init__(self): self.entity_name_cache = set() self.property_name_cache = set() self.node_queue = list() self.node = None self.line_count = 0 self.g = None self.transaction = None def connect_data_base(self, scheme='http', host='0.0.0.0', port=7474, \ username='******', password='******'): self.g = Graph(scheme=scheme, host=host, port=port, username=username, password=password) self.transaction = self.g.begin() @get_time_or_not_for_method(get=True) def batch_load(self, source_file, mapper_file, n, want_progress=True): def get_pinyin_n(property_name): for line in open(mapper_file): hanzis, pinyins = make_a_list_of_strings(line, sep=' ') if property_name == hanzis: return pinyins def set_property_of_node(): if property_name not in self.property_name_cache: self.property_name_cache = set() self.property_name_cache.add(property_name) if self.node[property_name] is None: self.node[property_name] = [property_value] else: self.node[property_name].append(property_value) def set_entity_name_of_node(): self.node_queue.append(self.node) self.entity_name_cache = set() self.entity_name_cache.add(entity_name) self.node['entity_name'] = entity_name def commit_head_node(): head_node = self.node_queue.pop(0) self.connect_data_base() self.transaction.create(head_node) self.transaction.commit() def commit_no_more_than_n_nodes_per_trans(n): self.connect_data_base() for _ in range(n): if len(self.node_queue) >= 1: head_node = self.node_queue.pop(0) self.transaction.create(head_node) else: break self.transaction.commit() def get_number_of_lines_of_txt_files(file_name): d = { total_txt_9_lines: 10, total_txt_49_lines: 39, total_txt_1_to_10000: 6499, total_txt_1_to_1000: 65000, total_txt_1_to_10: 6500129, total_txt: 65001290 } return d[file_name] def show_progress(): self.line_count += 1 percent = self.line_count / total_number_of_lines percent100 = percent * 100 end_time = time() elapsed_seconds = end_time - start_time hours, minutes, seconds = get_hours_minutes_seconds( elapsed_seconds) need_total_seconds = elapsed_seconds * (1 - percent) / percent need_hours, need_minutes, need_seconds = get_hours_minutes_seconds( need_total_seconds) sys.stdout.write( "\r已处理约 %.3f%% 的数据," "已累计耗时 %d 小时 %d 分钟 %d 秒,预计还需要 %d 小时 %d 分钟 %d 秒..." % (percent100, hours, minutes, seconds, need_hours, need_minutes, need_seconds)) total_number_of_lines = get_number_of_lines_of_txt_files(source_file) print('\n') start_time = time() for line in open(source_file): if line == '\n': break entity_name, property_name, property_value = make_a_list_of_strings( line) if entity_name not in self.entity_name_cache: if len(self.node_queue) == n: commit_no_more_than_n_nodes_per_trans(n) self.node = Node() set_entity_name_of_node() if property_name == 'BaiduTAG': self.node.add_label(property_value) else: # property_name = get_pinyin_n(property_name) set_property_of_node() if want_progress == True: show_progress() sys.stdout.write('\n\n就快处理完了...') commit_no_more_than_n_nodes_per_trans(n) sys.stdout.write('\n\n数据已经完全导入数据库!\n\n')
def insertAiroData(self, data): print("Inserting node data!") bssidNodes, stationNodes = data[0][0], data[0][1] for b in bssidNodes: try: bNode = Node(b['type'], name=b['name'], bssid=b['bssid'], oui=b['oui'], encryption=b["encryption"], speed=b['speed'], channel=b['channel'], auth=b['auth'], cipher=b['cipher'], lan=b['lan']) bNode.add_label("Device") self.graph.create(bNode) except ClientError: pass for essids, s in stationNodes: sNode = self.graph.nodes.match("Device", bssid=s['bssid']).first() if sNode is None: sNode = Node(s["type"], name=s['name'], bssid=s['bssid'], FirstTimeSeen=s['fts'], LastTimeSeen=s['lts'], Power=s['pwr'], NumPackets=s['pkts'], Association=s['assoc'], oui=s['oui']) sNode.add_label("Device") else: sNode['FirstTimeSeen'] = s['fts'] sNode['LastTimeSeen'] = s['lts'] sNode['Power'] = s['pwr'] sNode['NumPackets'] = s['pkts'] sNode['Association'] = s['assoc'] self.graph.push(sNode) sNode = self.graph.nodes.match("Device", bssid=s['bssid']).first() for essid in essids: nExisting = self.graph.nodes.match("Device", name=essid).first() if len(essid) > 0: newProbe = Node("AP", name=essid) newProbe.add_label("Device") self.graph.create( Relationship(sNode, "Probes", nExisting or newProbe)) if s['assoc'] is not None: aExisting = self.graph.nodes.match("Device", bssid=s['assoc']).first() newAssoc = Node("AP", bssid=s['assoc']) newAssoc.add_label("Device") self.graph.create( Relationship(sNode, "AssociatedTo", aExisting or newAssoc)) print("Database updated!")