def startup(self): self.graph_db = py2neo.Graph() self.selectors = {} # setup selectors for node_type in NodeTypes: self.selectors[node_type] = py2neo.NodeSelector( self.graph_db).select(node_type.name) self._has_started_up = True
def drop_db_link_props(tx): selector = py2neo.NodeSelector(tx) for node in selector.select('link_props'): match = [ is_test_dpid(node[rel]) for rel in ('src_switch', 'dst_switch') ] if not all(match): continue tx.delete(node)
def fromNeo(): py2neo.authenticate("localhost:7474", "neo4j", "neo4j") neoGraph = py2neo.Graph("http://localhost:7474/") neoSelector = py2neo.NodeSelector(neoGraph) query = neoGraph.run("MATCH (f:FUNCTION)-->(s:STRING) WHERE s.string CONTAINS 'OpenSSL' RETURN DISTINCT f.sample") print (query.dump()) query = neoGraph.run("MATCH (s:SAMPLE {sha1: '04301b59c6eb71db2f701086b617a98c6e026872'})-[rels*]->(c) RETURN *") print (query.dump())
def drop_db_switches(tx): selector = py2neo.NodeSelector(tx) for node in selector.select('switch'): if not is_test_dpid(node['name']): continue tx.delete(node)
def toNeo(graphity, allAtts): # GRAPH DB STUFF - NEO4J # receives the NetworkX graph and accompanying sample data # pushes the graph to Neo4J ### NetworkX Graph Structure ### # FUNCTION as node, attributes: function address, size, calltype, list of calls, list of strings, count of calls; functiontype[Standard, Callback, Export], alias (e.g. export name) # FUNCTIoN REFERENCE as edge (function address -> target address), attributes: ref offset (at) # CALLBACK REFERENCE as edge (currently for threads and Windows hooks) # API CALLS (list attribute of function node): address, API name # STRINGS (list attribute of function node): address, string #### py2neo.authenticate("localhost:7474", "neo4j", "neo4j") neoGraph = py2neo.Graph("http://localhost:7474/") neoSelector = py2neo.NodeSelector(neoGraph) # flush of the DB, for test purposes neoGraph.delete_all() mySha1 = allAtts['sha1'] if neoSelector.select("SAMPLE", sha1=mySha1).first(): print("Graph for sample %s already exists in Neo4j instance!" % mySha1) else: # create master node for binary information sampleNode = py2neo.Node("SAMPLE", sha1=mySha1, fileSize=allAtts['filesize'], binType=allAtts['filetype'], imphash=allAtts['imphash'], compilation=allAtts['compilationts'], addressEp=allAtts['addressep'], sectionEp=allAtts['sectionep'], sectionCount=allAtts['sectioncount'], originalFilename=allAtts['originalfilename']) neoGraph.create(sampleNode) # get nodes with 0 indegree, prepare relations from master node indegrees = graphity.in_degree() rootlist = [] for val in indegrees: if indegrees[val] == 0: rootlist.append(val) # parsing of the NetworkX graph - functions, APIs and strings are all Neo4j nodes for nxNode in graphity.nodes(data=True): funcAddress = nxNode[0] funcCalltype = nxNode[1]['calltype'] funcSize = nxNode[1]['size'] funcAlias = '' funcType = '' if nxNode[1].get('functiontype') : funcType = nxNode[1]['functiontype'] if nxNode[1].get('alias') : funcAlias = nxNode[1]['alias'] # sha1 serves as link to master node, but also as node identifier in combination with the function address # TODO for saving memory, explore possibility of replacing sha1 with an index, as sha info is held in master node anyway functionNode = py2neo.Node("FUNCTION", sample=mySha1, address=funcAddress, callType=funcCalltype, funcSize=funcSize, funcType=funcType, alias=funcAlias) neoGraph.create(functionNode) if funcAddress in rootlist: rootrel = py2neo.Relationship(sampleNode, "virtual_relationship", functionNode) neoGraph.create(rootrel) stringList = nxNode[1]['strings'] for stringData in stringList: strRefAddress = stringData[0] theString = stringData[1] # TODO think about string attributes to store, e.g. entropy, len try: # create string node or merge if string already exists, add relationship stringNode = py2neo.Node("STRING", string=theString) # TODO try this using Subgraph class, less interaction with DB server neoGraph.merge(stringNode) stringRel = py2neo.Relationship(functionNode, "references_string", stringNode, address=strRefAddress) neoGraph.create(stringRel) except: print("ERROR with this string %s" % theString) callsList = nxNode[1]['calls'] for callData in callsList: callRefAddress = callData[0] callApiName = callData[1] # create API node or merge if API already exists, add relationship apiNode = py2neo.Node("API", apiname=callApiName) neoGraph.merge(apiNode) apiRel = py2neo.Relationship(functionNode, "calls_api", apiNode, address=callRefAddress) neoGraph.create(apiRel) for from_node, to_node, properties in graphity.edges(data=True): realFromNode = neoSelector.select("FUNCTION", sample=mySha1, address=from_node).first() realToNode = neoSelector.select("FUNCTION", sample=mySha1, address=to_node).first() funcCallsFunc = py2neo.Relationship(realFromNode, "calls_sub", realToNode) neoGraph.create(funcCallsFunc)
# -*- coding: utf-8 -*- import py2neo as gp from py2neo.ogm import * from pandas import DataFrame import sys import os ID = sys.argv[1] password = sys.argv[2] gp.authenticate("localhost:7474",ID,password) graph = gp.Graph() selector = gp.NodeSelector(graph) def readPpiNetwork(filename): dico = {} fic = open(filename) dis = fic.readline().rstrip() for line in fic.readlines(): rel = line.rstrip().rsplit('\t') if rel[0] in dico: dico[rel[0]][rel[1]]=float(rel[2]) else: dico[rel[0]]={} dico[rel[0]][rel[1]]=float(rel[2]) return dis,dico for fichier in os.listdir("./prototype/datasets/interaction_networks/"): dis,relationdict = readPpiNetwork("./prototype/datasets/interaction_networks/"+fichier)
if (i + 1) % self.iter == 0: self.commit() logger.debug("%s nodes added" % (i + 1)) # a new transaction self.begin() # outside cicle if (i + 1) % self.iter != 0: self.commit() logger.debug("%s nodes added" % (i + 1)) # closing file handle.close() # get a selector selector = py2neo.NodeSelector(self.graph) # now add relations. Count them count = 0 # debug logger.info("Adding iterations...") # get a new transaction object self.begin() for tax_id, parent_tax_id in self.all_relations.iteritems(): neo_nodes = selector.select(TaxNode.label, tax_id=tax_id) # Reading from a list (tax_id are unique, so there are 1 results) for neo_node in list(neo_nodes):