class PythonJoernTests(unittest.TestCase): def setUp(self): self.j = JoernSteps() self.j.connectToDatabase() def tearDown(self): pass
def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) self.j.connectToDatabase()
def func_pdg_comp_view(request): if request.method == "GET": funcs = funcs_sel() infos = pdg_vuln_patch_funcs_report.objects.all() return render_to_response("pdg_comp.html", RequestContext(request,{"funcs":funcs, "infos":infos})) else: vuln_id = request.POST.get("funcs_sel") try: vuln_info = vulnerability_info.objects.get(vuln_id=vuln_id) pdg_vuln_patch_funcs_report.objects.get(vuln_info=vuln_info) return HttpResponse(u"已经计算过该函数") except: if os.path.isdir(os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse(u"连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=func_pdg_similarity_proc, args=(vuln_id, neo4jdb)) th.start() return HttpResponse(u"已经启动线程进行计算") else: return HttpResponse(u"特征数据库未启动,请先启动特征数据库") else: return HttpResponse(u"特征数据库不存在")
def runQueryChunk(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """getNodesWithType('Function').id""" res = j.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in j.chunks(res, CHUNK_SIZE): if (flag): functionTuple = tuple(chunk) functionIdStr = str(functionTuple) functionIdStr = functionIdStr.replace(',', '') functionIdStr = functionIdStr.replace('\'', '') #query = """queryNodeIndex("functionId:%s").as("x").statements().map("functionId","location").as("y").select{it.type}{it}""" % functionIdStr query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % functionIdStr stms = j.runGremlinQuery(query) query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stmsFiles = j.runGremlinQuery(query) files = dict() for stmsFile in stmsFiles: files[int(stmsFile[0])] = str(stmsFile[1]).split('/')[-1] codes = dict() for stm in stms: functionnodeid = int(stm[2]) loc = stm[1] type = str(stm[0]) if (codes.__contains__(functionnodeid)): codes[functionnodeid].append([loc, type]) else: codeList = [[loc, type]] codes[functionnodeid] = codeList codesList = codes.items() for id, elem in codesList: lineDict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (lineDict.has_key(loc)): temp = lineDict.get(loc) + ' ' + type lineDict[loc] = temp else: lineDict[loc] = type text = getCleanText(lineDict, False) fileName = files.get(id) addInfoToSourceFile(text, fileName) flag += 1 print flag
def __init__(self, ): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() self.get_function_list(ChunkStartTool) self.FUNCTION_LIST = {} self.BASIC_BLOCK_LIST = {}
def query_node_type(): step = JoernSteps() step.setGraphDbURL('http://localhost:7474/db/data/') step.connectToDatabase() # get all of function in database query = """getNodesWithType('Function')""" res = step.runGremlinQuery(query) for function in res: # for one function, get type for every line line_dict = dict() function_node_id = int(function.ref[5:]) # get map of type-location query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % function_node_id function_nodes = step.runGremlinQuery(query) for node in function_nodes: # get node type and location type = str(node[0]) location = str(node[1]) if (location != 'None'): loc = str(location).split(':')[0] # find in line_dict if (line_dict.has_key(loc)): temp = line_dict.get(loc) + ' ' + type line_dict[loc] = temp else: line_dict[loc] = type clean_type = cc.AST_type_clean(line_dict, True) # do another query to know which files this function belongs to query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % function_node_id file_path = step.runGremlinQuery(query) file_name = str(file_path[0]).split('/')[-1]
def __getConnection(self): print "[+] Creating connection." try: self.connection = JoernSteps() except Exception as e: print "[Error] Cannot instantiate Python-Joern database interface, DBInterface says: {}".format( e.args) return False return True
class DBContentsProvider: def __init__(self): self.j = JoernSteps() self.init_database_connection() def init_database_connection(self): self.j.connectToDatabase() self.j.addStepsDir('steps/') def run_gremlin_query(self, query_script): results = self.j.runGremlinQuery(query_script) return results
def query_node_type_chunk(): step = JoernSteps() step.setGraphDbURL('http://localhost:7474/db/data/') step.connectToDatabase() # get function id query = """getNodesWithType('Function').id""" res = step.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in step.chunks(res, CHUNK_SIZE): function_tuple = tuple(chunk) function_id_str = str(function_tuple).replace(',', '').replace('\'', '') # to know which files this function belongs to query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stms_files = step.runGremlinQuery(query) files = dict() for stms_file in stms_files: files[int(stms_file[0])] = str(stms_file[1]).split('/')[-1] query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % function_id_str stms = step.runGremlinQuery(query) # get node types codes = dict() for stm in stms: function_node_id = int(stm[2]) loc = stm[1] type = str(stm[0]) if (function_node_id in codes): codes[function_node_id].append([loc, type]) else: codeList = [[loc, type]] codes[function_node_id] = codeList codesList = codes.items() for id, elem in codesList: line_dict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (line_dict.has_key(loc)): temp = line_dict.get(loc) + ' ' + type line_dict[loc] = temp else: line_dict[loc] = type clean_type = cc.AST_type_clean(line_dict, True) fileName = files.get(id)
def __init__(self): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() self.return_type_dataDir = os.path.join(FILE_PATH, 'return_type_data') self.parameter_dataDir = os.path.join(FILE_PATH, 'parameter_data') self.edge_dataDir = os.path.join(FILE_PATH, 'edge_data') self.node_type_dataDir = os.path.join(FILE_PATH, 'node_type_data') self.all_return_type=[] self.all_node_type=[] self.FUNCTION_LIST = {} self.get_function_list(ChunkStartTool) self.get_all_return_type()
class DBInterface(object): """Provides database connection""" DATABASE_URL = "http://localhost:7474/db/data/" def __init__(self): self.connection = None def __getConnection(self): print "[+] Creating connection." try: self.connection = JoernSteps() except Exception as e: print "[Error] Cannot instantiate Python-Joern database interface, DBInterface says: {}".format( e.args) return False return True def connectToDB(self): if not self.__getConnection(): return False print "[+] Connecting to the database." self.connection.setGraphDbURL(DBInterface.DATABASE_URL) try: self.connection.connectToDatabase() except Exception as e: print "[Error] Cannot connect to the database, DBInterface says: {}".format( e.args) return False return True def runQuery(self, code): results = None try: results = self.connection.runGremlinQuery(code) except Exception as e: print "[Error] Error occured during query execution, DBInterface says: {}".format( e.args) return None return results
def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) # self.j.addStepsDir( # Configurator.getPath(Configurator.KEY_PYTHON_JOERN) + # "/joern/phpjoernsteps" # ) self.j.addStepsDir( Configurator.getPath(Configurator.KEY_BASE_DIR) + "/custom_gremlin_steps" ) self.j.connectToDatabase()
class jutils: joern = JoernSteps() @staticmethod def connectToDatabase(): jutils.joern.connectToDatabase() jutils.joern.addStepsDir(CHUCKY_STEPS_DIR) @staticmethod def lookup(lucene_query, traversal=None, projection=None): node_selection = "queryNodeIndex('{}')".format(lucene_query) return jutils.raw_lookup(node_selection, traversal, projection) @staticmethod def raw_lookup(node_selection, traversal=None, projection=None): if not projection: attributes = ['it.id', 'it'] else: f = lambda x: 'it.{}'.format(x) attributes = map(f, projection) transform = "transform{{ [ {} ] }}".format(', '.join(attributes)) if not traversal: command = '.'.join([node_selection, transform]) else: command = '.'.join([node_selection, traversal, transform]) return jutils.joern.runGremlinQuery(command) @staticmethod def runGremlinCommands(commands): command = '; '.join(commands) return jutils.joern.runGremlinQuery(command)
def getFunctionSimilarity(): # initialize write file analysis = file(my_constant.FUNC_SIMILAIRTY_FILE_NAME, 'wb') analyze_writer = csv.writer(analysis) analyze_writer.writerow(['func_a', 'func_b', 'similarity']) # initialize python-joern instance joern_instance = JoernSteps() joern_instance.addStepsDir("/data/joern-code/query/") joern_instance.setGraphDbURL("http://localhost:7474/db/data/") # connect to database joern_instance.connectToDatabase() # fetch all function info functions_query = '_().getFunctions()' functions_temp = joern_instance.runGremlinQuery(functions_query)[0] len_func = len(functions_temp) # filter some operator reload functions functions = [] for function in functions_temp: # remove namespace before:: function = my_util.removeNamespace(function) if function == '': continue if not function.startswith("operator ") and [function ] not in functions: functions.append([function]) len_func = len(functions) # compute similarity and write back into file func_similarity_dic = {} word_list_dict = {} for i in range(len_func): for j in range(len_func): if i == j: continue similarity, word_list_dict = computeSim(functions[i], functions[j], word_list_dict) # store back if similarity > 0.5: analyze_writer.writerow( [functions[i][0], functions[j][0], similarity]) func_similarity_dic[(functions[i][0], functions[j][0])] = similarity # close files analysis.close() return func_similarity_dic
class DBContentsProvider: def __init__(self): self._initDatabaseConnection() def _initDatabaseConnection(self): self.j = JoernSteps() self.j.connectToDatabase() self.j.addStepsDir('steps/') def RunGremlinQuery(self, query): results = self.j.runGremlinQuery(query) return results def GetCalleesInfo(self): query = "getCalleeListInfo()" return self.j.runGremlinQuery(query) """ Generate contents for a given selector, overwriting the contents currently held in cndToQueries memory by the server. """ def generate(self, selector): query = """generateTaintLearnStructures(%s.id.toList()) _()""" % (selector) for unused in self.j.runGremlinQuery(query): pass
def cal_funcs_similarity(request): if request.method == "GET": rs = func_similarity_reports.objects.all() reports = [] for r in rs: reports.append(cal_reports(r)) return render_to_response( "ast_function_level.html", RequestContext(request, {'reports': reports})) else: if os.path.isdir( os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse("连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=vuln_patch_compare_all, args=(neo4jdb, )) th.start() return HttpResponse("启动线程计算中,请稍后查看!") else: return HttpResponse("特征数据库未启动,请先启动特征数据库") else: return HttpResponse("特征数据库不存在")
class DBContentsProvider: def __init__(self): self.j = JoernSteps() self.init_database_connection() def init_database_connection(self): self.j.connectToDatabase() self.j.addStepsDir('steps/') def run_gremlin_query(self, query_script): results = self.j.runGremlinQuery(query_script) return results # 以下为用户的数据库查询过程 # 功能:从数据库中查询所有的被调用的函数名称 def query_allCallee_name(self): query = """ g.V.has('type','Callee').as('x').code.dedup().back('x').code.toList() """ result = self.run_gremlin_query(query) return result
class ChuckyJoern(CmdLineTool): """ Joern interface used by chucky tools. """ def __init__(self, description): super(ChuckyJoern, self).__init__(description) self._joern = None self.__is_initialized = False def _init_joern_interface(self, step_dir=None): self._joern = JoernSteps() if step_dir: self._joern.addStepsDir(step_dir) self._joern.connectToDatabase() self.__is_initialized = True def run_query(self, query): return self.joern.runGremlinQuery(query) @property def joern(self): if not self.__is_initialized: self._init_joern_interface() return self._joern
class DBInterface: def connectToDatabase(self): self.j = JoernSteps() self.j.addStepsDir(JOERN_TOOLS_STEPDIR) self.j.connectToDatabase() def runGremlinQuery(self, query): return self.j.runGremlinQuery(query) def chunks(self, ids, chunkSize): return self.j.chunks(ids, chunkSize)
def runQuery(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """getNodesWithType('Function')""" res = j.runGremlinQuery(query) flag = 1 for function in res: if (flag): lineDict = dict() functionnodeid = int(function.ref[5:]) #query = """g.v(%d).functionToAST().astNodes()""" % (functionnodeid) #allNodesOfFunction1 = j.runGremlinQuery(query) query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % functionnodeid allNodesOfFunction = j.runGremlinQuery(query) for node in allNodesOfFunction: #print node type = str(node[0]) location = str(node[1]) if (location != 'None'): loc = str(location).split(':')[0] if (lineDict.has_key(loc)): temp = lineDict.get(loc) + ' ' + type lineDict[loc] = temp else: lineDict[loc] = type text = getCleanText(lineDict, False) #print text query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % functionnodeid filepath = j.runGremlinQuery(query) fileName = str(filepath[0]).split('/')[-1] addInfoToSourceFile(text, fileName) flag += 1 print flag
def createdb(coverage_db,json_dbname,joern_url='http://localhost:7474/db/data/'): """ combine coverage information with joern queries and create json db with results""" global j,conn from joern.all import JoernSteps j = JoernSteps() j.setGraphDbURL(joern_url) j.connectToDatabase() conditionals = {} # filename is key if_ids = j.runGremlinQuery('queryNodeIndex("type:IfStatement").id') print "Total number of IfStatements:%d"%len(if_ids) switch_ids = j.runGremlinQuery('queryNodeIndex("type:SwitchStatement").id') print "Total number of SwitchStatement:%d"%len(switch_ids) if_ids += switch_ids conn = sqlite3.connect(coverage_db) cur = conn.cursor() idx = 0 for id in if_ids: # iterate over each conditional and gather branch info conditional = get_conditional_info(id,idx) if conditional == {}: continue idx+=1 sys.stdout.write("Processing conditional %d out of %d total.\r"%(idx,len(if_ids))) sys.stdout.flush() if conditional["filename"] not in conditionals: #group by file name conditionals[conditional["filename"]] = [] conditionals[conditional["filename"]].append(conditional) #now sort them by filenames and line numbers sorted_conditionals = [] for filename in conditionals: conditionals[filename].sort(key = lambda c: c["line"]) sorted_conditionals += conditionals[filename] #save as json json.dump(sorted_conditionals,open(json_dbname,"wb")) print "\nDone!"
def produce_file_function_location_triads(file): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() root_nodes = j.runGremlinQuery('queryNodeIndex("type:Function")') start_indices = [] function_names = [] for root_node in root_nodes: locationString = root_node.properties['location'] lineNumber = locationString.split(":")[0] start_indices.append(str(int(lineNumber) - 1)) function_names.append(root_node.properties['name']) triads = [] x = 0 while x < len(start_indices): triads.append( (file.split("/")[-1], function_names[x], start_indices[x])) x = x + 1 return triads
def func_pdg_comp_view(request): if request.method == "GET": funcs = funcs_sel() infos = pdg_vuln_patch_funcs_report.objects.all() return render_to_response( "pdg_comp.html", RequestContext(request, { "funcs": funcs, "infos": infos })) else: vuln_id = request.POST.get("funcs_sel") try: vuln_info = vulnerability_info.objects.get(vuln_id=vuln_id) pdg_vuln_patch_funcs_report.objects.get(vuln_info=vuln_info) return HttpResponse(u"已经计算过该函数") except: if os.path.isdir( os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse(u"连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=func_pdg_similarity_proc, args=(vuln_id, neo4jdb)) th.start() return HttpResponse(u"已经启动线程进行计算") else: return HttpResponse(u"特征数据库未启动,请先启动特征数据库") else: return HttpResponse(u"特征数据库不存在")
if not tm_params: return #process it's actual param list if len(ast_children) == 2: actual_params = j.runGremlinQuery("g.v(%d)" "outE('IS_AST_PARENT')" ".inV()"%(node2id(ast_children[1]))) #calculate the affected variables for param in tm_params: if param == 0: retval = find_retval(call_id) if retval: affected_vars.add(retval) else: vars = get_param_vars(node2id(actual_params[param-1])) for v in vars: affected_vars.add(v) print get_ifs(code_to_symbols(affected_vars)); j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() #find out all call expressions and process them one by one call_ids = j.runGremlinQuery("g.V().filter{it.type == 'CallExpression'}.id()") for call_id in call_ids: process_call(call_id)
def Tran(x): switcher ={ "PrimaryExpression": "$COS", "CallExpression": "$CAL", "Condition": "$CON", "Identifier": "$VAR", "CastExpression": "$CAT", "OrExpression": "$OP", "IncDecOp": "$OP", "UnaryOp": "$UOP", "AdditiveExpression": "$ADD", "ArrayIndexing":"$IDX" } return switcher.get(x, 'null') j=JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') # j.addStepsDir('Use this to inject utility traversals') j.connectToDatabase() ptrlist=open('/home/hongfa/workspace/thttpd_workspace/ptrList','r') ptrs=ptrlist.readlines() for ptr in ptrs: #print ptr functionID = ptr.split("functionId:")[1]
def connect_db(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() return j
def setUp(self): self.j = JoernSteps() self.j.connectToDatabase()
from joern.all import JoernSteps j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """ getArguments('strcpy', '1') .sideEffect{ argument = it.code;} .unsanitized( {it._().or( _().isCheck('.*' + argument + '.*'), _().codeContains('.*min.*'))} .locations() """ print "[+] Running query! " results = j.runGremlinQuery(query) print "[+] Number of results: " + str(len(results)) for r in results: print r
call_g = addDataEdge(call_g, startNode, endNode, var) #var is callee node id else: #print 'y' for node in list_callee_id: startNode = str(node[0]) endNode = str(func_node._id) var = str(node[1]) call_g = addDataEdge(call_g, startNode, endNode, var) #var is callee node id return call_g if __name__ == '__main__': j = JoernSteps() j.connectToDatabase() pdg_db_path = "pdg_db" list_testID = os.listdir(pdg_db_path) print list_testID for testID in list_testID: #if testID != '69055': # continue if os.path.exists( os.path.join("dict_call2cfgNodeID_funcID", str(testID))): continue call_g = getCallGraph(j, testID) if call_g == False:
def _initDatabaseConnection(self): self.j = JoernSteps() self.j.connectToDatabase() self.j.addStepsDir('steps/')
#!/usr/bin/env python ############################################################# # A template for feature extraction for functions with joern. # Author: Fabian Yamaguchi ############################################################# from joern.all import JoernSteps j = JoernSteps() j.connectToDatabase() j.addStepsDir('steps/') statementIds = j.runGremlinQuery("queryNodeIndex('type:Function').id") for chunk in j.chunks(statementIds, 256): query = """ idListToNodes(%s).transform{ [it.id, it.name, it.functionToFeatureVec() ] } """ % (chunk) X = j.runGremlinQuery(query) for x in X: print '===' print 'FunctionId: %d' % (x[0]) print 'FunctionName: %s' % (x[1]) print 'Features (list): %s' % (x[2]) print '==='
from joern.all import JoernSteps j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() #query = """ #getArguments('strcpy', '1').sideEffect{ argument = it.code;}.unsanitized({it._().or(_().isCheck('.*' + argument + '.*'), _().codeContains('.*min.*'))}).locations() #""" #query = """ #getCallsTo('memcpy').ithArguments('0').astNodes() #""" #query = """ #getArguments('memcpy', '2').uses() #.filter{it.code == 'len'} #.filter{ # it.in('USES') # .filter{it.type == 'Condition'}.toList() == [] #} #""" #query = """ #getArguments('memcpy', '1') #""" #.sideEffect{ paramName = '.*len.*' } #.filter{ it.code.matches(paramName) } #.unsanitized{ it.isCheck( paramName ) }""" #.params( paramName ) query = "getFilesByName('*')"
import os.path import hashlib from yaml import load from joern.all import JoernSteps try: from yaml import CLoader as Loader except ImportError: from yaml import Loader if len(sys.argv) < 2: print "Usage: verify.py <file 1> <file 2> ..." exit(1) j = JoernSteps() j.setGraphDbURL("http://localhost:7474/db/data") j.connectToDatabase() sys.argv.pop(0) print "Running tests:" # tests hashes are encoded in the intermediate path names, this extracts them def extract_paths(paths): paths = map(lambda p: str.split(str(p), "/")[-1], paths) return map(lambda p: str.split(str(p), ".c")[0], paths) all_tests = extract_paths(j.runGremlinQuery("getNodesWithType('File').filepath")) for arg in sys.argv:
def bug_finder(request): if request.method == "GET": software_sel = software_sel_form() return render_to_response("bug_finder.html", RequestContext(request, {"software_sel":software_sel})) else: if request.POST.has_key("sel_vuln"): soft_id = int(request.POST.get("software")) soft_name = softwares.objects.get(software_id=soft_id).software_name #查询当前软件(不含版本)所涉及的所有漏洞函数 softs = softwares.objects.filter(software_name = soft_name) #先查到涉及的所有cve cves = [] for soft in softs: cves.extend(soft.cve_infos_set.all()) #查到涉及的所有漏洞 sel_vuln = vulnerability_info.objects.filter(cve_info__in = cves, is_in_db=True) software_sel = software_sel_form(request.POST) return render_to_response("bug_finder.html", RequestContext(request, {"sel_vuln":sel_vuln,"software_sel":software_sel})) elif request.POST.has_key("find"): if not is_db_on(): return HttpResponse(u"特征数据库未启动,请先启动特征数据库") soft = softwares.objects.get(software_id=int(request.POST.get("software"))) try: db = graph_dbs.objects.get(soft=soft) #检测软件数据库是否启动 if not is_db_on(db.port): return HttpResponse("软件图形数据库未启动") #连接软件数据库 soft_db = JoernSteps() try: soft_db.setGraphDbURL("http://localhost:%d/db/data/" % db.port) soft_db.connectToDatabase() except: return HttpResponse("连接软件数据库失败! port:%d" % db.port) #连接特征数据库 character_db = JoernSteps() try: character_db.setGraphDbURL("http://localhost:7474/db/data/") character_db.connectToDatabase() except: return HttpResponse("连接特征数据库失败!") #根据选择使用不同的算法 alg = request.POST.get("algorithm") if alg == "CFG": th = Thread(target=func_similarity_cfgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() elif alg == "PDG": th = Thread(target=func_similarity_pdgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() return HttpResponse("已启动线程进行计算,请等候!") except graph_dbs.DoesNotExist: return HttpResponse("软件图形数据库未生成")
class Analysis(object): SQL_QUERY_FUNCS = """sql_query_funcs = [ "mysql_query", "mysqli_query", "pg_query", "sqlite_query" ]\n""" XSS_FUNCS = """xss_funcs = [ "print", "echo" ]\n""" OS_COMMAND_FUNCS = """os_command_funcs = [ "backticks", "exec" , "expect_popen","passthru","pcntl_exec", "popen","proc_open","shell_exec","system", "mail" ]\n""" def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) self.j.connectToDatabase() def prepareQueryStatic(self, attackType): query = self.XSS_FUNCS + self.SQL_QUERY_FUNCS + self.OS_COMMAND_FUNCS query += " m =[]; " if attackType == "sql": query += """ queryMapList =[]; g.V().filter{sql_query_funcs.contains(it.code) && isCallExpression(it.nameToCall().next()) }.callexpressions() .sideEffect{m = start(it, [], 0, 'sql', false, queryMapList)} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'sql', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'sql', queryMapList, it)}}""" elif attackType == "xss": query += """ queryMapList = []; g.V().filter{it.type == TYPE_ECHO || it.type == TYPE_PRINT} .sideEffect{m = start(it, [], 0, 'xss', false, queryMapList)} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'xss', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'xss', queryMapList, it)}}""" elif attackType == "code": query += """queryMapList =[]; g.V().filter{it.type == TYPE_INCLUDE_OR_EVAL && it.flags.contains(FLAG_EXEC_EVAL)} .sideEffect{m = start(it, [], 0, 'code', false, queryMapList )} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'code', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'code', queryMapList, it)}}""" # command execution : sinks considered are : # [backticks, exec,expect_popen,passthru,pcntl_exec,popen,proc_open,shell_exec,system,mail] elif attackType == "os-command": query += """queryMapList =[] g.V().filter{os_command_funcs.contains(it.code) && isCallExpression(it.nameToCall().next()) }.callexpressions() .filter{os_command_funcs.contains(it.ithChildren(0).out.code.next())} .sideEffect{m = start(it, [], 0, 'os-command', false, queryMapList )} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'os-command', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'os-command', queryMapList, it)}}""" elif attackType == "file-inc": query += """queryMapList =[]; g.V().filter{it.type == TYPE_INCLUDE_OR_EVAL && !(it.flags.contains(FLAG_EXEC_EVAL))} .sideEffect{m = start(it, [], 0, 'file-inc', false, queryMapList)} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'file-inc', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'file-inc', queryMapList, it)}}""" elif attackType == "ear": query += """ g.V().filter{ "header" == it.code && isCallExpression(it.nameToCall().next()) }.callexpressions() .ithChildren(1).astNodes() .filter{it.code != null && it.code.startsWith("Location")} .callexpressions() .as('call') .out('FLOWS_TO') .filter{it.type != "AST_EXIT" && it.type != "NULL" } .or( _().filter{it.type == "AST_CALL"} .sideEffect{n = jumpToCallingFunction(it)} .filter{n.type != "AST_EXIT" && n.type != "NULL" && n.type != "AST_RETURN"} , _().filter{it.type == "AST_CALL"} .sideEffect{n = jumpToCallingFunction(it)} .filter{n.type == "AST_RETURN"} .out('FLOWS_TO') .filter{n.type != "AST_EXIT" && n.type != "NULL" } , _().filter{it.type != "AST_CALL"} , _().as('b') .filter{it.type == "AST_CALL"} .astNodes() .filter{it.code != null && it.code != "/home/user/log/codeCoverage.txt"} .back('b') ) .back('call') .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'ear', '1')} .transform{warnmessage}""" return query def prepareFinalQuery(self, seed): get = [] for g in seed.get: if '=' in g: t = g[0:g.find('=')] get.append('?' + t + '=') get.append('&' + t + '=') params = [] for p in seed.params: if '=' in p: params.append(p[0:p.find('=')] + '=') query = """g.V('url', '%s') .findNavigationSeq(%s, %s, %s).dedup().path""" % ( seed.src, seed.dst, get, params) print(query) # {it.url} return query def runQuery(self, query): return query def runTimedQuery(self, query): start = time.time() res = None try: if query: res = self.j.runGremlinQuery(query) except Exception as err: print "Caught exception:", type(err), err elapsed = time.time() - start timestr = "Query done in %f seconds." % (elapsed) return (res, timestr) def readExploitSeedsFile(self, attackType): if attackType == "sql": print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_xss.txt' ) file = '/home/user/navex/results/include_map_resolution_results.txt' elif attackType == "xss": file = '/home/user/navex/results/include_map_resolution_results_xss.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_xss.txt' ) elif attackType == "code": file = '/home/user/navex/results/include_map_resolution_results_code.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_code.txt' ) elif attackType == "os-command": file = '/home/user/navex/results/include_map_resolution_results_os-command.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_os-command.txt' ) elif attackType == "file-inc": file = '/home/user/navex/results/include_map_resolution_results_file-inc.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_file-inc.txt' ) elif attackType == "ear": file = '/home/user/navex/results/include_map_resolution_results_ear.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_ear.txt' ) with open(file, 'r') as f: lines = [line.strip() for line in f] return lines
from joern.all import JoernSteps j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') # j.addStepsDir('Use this to inject utility traversals') j.connectToDatabase() res = j.runGremlinQuery('getFunctionsByName("main")') # res = j.runCypherQuery('...') for r in res: print r
class ManualCCSearch(object): ''' classdocs ''' UNTRUSTED_DATA = """attacker_sources = [ "_GET", "_POST", "_COOKIE", "_REQUEST", "_ENV", "HTTP_ENV_VARS" ]\n""" SQL_QUERY_FUNCS = """sql_query_funcs = [ "mysql_query", "pg_query", "sqlite_query" ]\n""" # Gremlin operations ORDER_LN = ".order{it.a.lineno <=> it.b.lineno}" # Order by linenumber def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) # self.j.addStepsDir( # Configurator.getPath(Configurator.KEY_PYTHON_JOERN) + # "/joern/phpjoernsteps" # ) self.j.addStepsDir( Configurator.getPath(Configurator.KEY_BASE_DIR) + "/custom_gremlin_steps" ) self.j.connectToDatabase() # self.QUERIES_DIR = Configurator.getPath(Configurator.BASE_DIR) + \ # "/gremlin_queries" def searchCCOne(self): """ Search for the first vulnerable tutorial (SQL injection from stackoverflow): $user_alcohol_permitted_selection = $_POST['alcohol_check']; //Value sent using jquery .load() $user_social_club_name_input = $_POST['name']; //Value sent using jquery .load() $query="SELECT * FROM social_clubs WHERE name = $user_social_club_name_input"; if ($user_alcohol_permitted_selection != "???") { $query.= "AND WHERE alcohol_permitted = $user_alcohol_permitted_selection"; } """ # construct gremlin query step by step: # 1. Find variable name X of "variable = $_POST[..]" # 2. Go to next statement list. # (3. Find variable name Y of "variable = $_POST[..]" # (4. Go to next statement list. # 5. Find variable name Z and string str1 of "variable = string" # 6. Check if str1 contains regexp "WHERE any_word=$Y". # (7. Go to next statement list.) # (8. Check for if-statement with variable $X.) # 9. Check if variable $Z is extended using string with regexp # "and where any_word=$X" # (10. Check for mysql_query($Z)) # all nodes # query = "g.V(NODE_TYPE, TYPE_STMT_LIST).out" # # # AST_ASSIGN nodes' right side # query += ".rval" query = "g.V" return query def sqlNewIndirect(self): query = self.UNTRUSTED_DATA + self.SQL_QUERY_FUNCS query += open(self.QUERIES_DIR + "sql_new_indirect.query", 'r').read() return query def runQuery(self, query): return query def runTimedQuery(self, myFunction, query=None): start = time.time() res = None try: if query: res = self.j.runGremlinQuery(myFunction(query)) else: res = self.j.runGremlinQuery(myFunction()) except Exception as err: print "Caught exception:", type(err), err elapsed = time.time() - start # print "Query done in %f seconds." % (elapsed) result = [] try: for node in res: print node data = CodeCloneData() data.stripDataFromOutput(node) data.setQueryTime(elapsed) result.append(data) except TypeError: # res is not iterable, because it is one/no node. # print res if res: data = CodeCloneData() data.stripDataFromOutput(node) data.setQueryTime(elapsed) result.append(data) print res return (result, elapsed)
#coding=utf-8 ''' Created on Jan 4, 2016 @author: root ''' from algorithm.util import vuln_patch_compare from astLevel_algorithm.models import vulnerability_info from joern.all import JoernSteps if __name__ == "__main__": objects = vulnerability_info.objects.all() neo4jdb = JoernSteps() neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() for obj in objects: vuln_patch_compare(obj.vuln_id, neo4jdb)
def produce_nodes_string(): def queryParent(j, nodeId): j.connectToDatabase() parent = j.runGremlinQuery('g.v(' + str(nodeId) + ').parents()') return parent def getStringForNode(node, nodes_and_parents): global global_node_types parent = nodes_and_parents[node] code = str(parent[0].properties['code']).replace(',', '') code = code.replace('¬', '') parentString = parent[0].properties[ 'type'] + "," + code + "," + str( parent[0].properties['functionId']) + "," + str( parent[0].properties['childNum']) parent_identifier = hash(tuple(parentString)) code = str(node.properties['code']).replace(',', '') code = code.replace('¬', '') nodeString = node.properties['type'] + "," + code + "," + str( node.properties['functionId']) + "," + str( node.properties['childNum']) node_identifier = hash(tuple(nodeString)) addition_string = str(node_identifier) + "," + str( node.properties['type']) + "," + str( node.properties['code']) + "," + str( node.properties['functionId']) + "," + str( node.properties['childNum']) + "," + str( parent_identifier) + "¬" #global_node_types.add(node.properties['type']) return addition_string syntactical_features = [] #max_depth_ast = get_max_depth_ast() #ast_node_types_tfs = get_node_types_tfs() #ast_node_types_tfidfs = get_node_types_tfidfs() #ast_node_type_avg_depths = get_node_type_avg_depth() #keywords_term_frequency = get_keywords_term_frequency() global global_node_types ast_features = [0] * 57 all_nodes_string = "" j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() root_nodes = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef")') all_ast_nodes = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef").astNodes()') ast_parents = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef").astNodes().parents()') nodes_and_parents = {} for node in all_ast_nodes: nodes_and_parents[node] = queryParent(j, node._id) for node in all_ast_nodes: if not node in root_nodes: all_nodes_string += getStringForNode( node, nodes_and_parents) for ast_node in all_ast_nodes: x = 0 while x < len(global_node_types): if global_node_types[x] == ast_node.properties['type']: ast_features[x] += 1 x = x + 1 continue else: x = x + 1 #print(ast_features) #print(all_nodes_string) return all_nodes_string, ast_features
class DBContentsProvider: def __init__(self): self._initDatabaseConnection() def _initDatabaseConnection(self): self.j = JoernSteps() self.j.connectToDatabase() self.j.addStepsDir('steps/') """ Generate contents for a given selector, overwriting the contents currently held in cndToQueries memory by the server. """ def generate(self, selector): query = """generateTaintLearnStructures(%s.id.toList()) _()""" % (selector) for unused in self.j.runGremlinQuery(query): pass def generateChecksForInvocations(self, invocs): query = """generateChecksForInvocations(%s.toList()) _()""" % (invocs) for unused in self.j.runGremlinQuery(query): pass # Source Analysis def getSourceAPISymbols(self): query = """_().transform{ getSourceAPISymbols() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllDefStmtsPerArg(self): query = """_().transform{ getAllDefStmtsPerArg() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] # Condition Analysis def getAllChecksPerArg(self): query = """_().transform{ getAllChecksPerArg() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllConditions(self): query = """_().transform{ getAllConditions() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllConditionsCode(self): query = """_().transform{ getAllConditionsCode() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getInvocationCallSiteIds(self): query = """_().transform{ getInvocationCallSites() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getSubConditions(self, nodeId): query = """_().transform{ subConditions(%s) }.scatter() """ % (nodeId) return [x for x in self.j.runGremlinQuery(query)] def getAllCndFeatureVectors(self, invocs = [], argNum = None): if not invocs: if argNum != None: query = """_().transform{ getAllCndFeatureVectors(%d) }.scatter() """ % (argNum) else: query = """_().transform{ getAllCndFeatureVectors() }.scatter() """ else: if argNum != None: query = """_().transform{ getCndFeatureVectorsForInvocs(%s, %d) }.scatter() """ % (invocs, argNum) else: query = """_().transform{ getCndFeatureVectorsForInvocs(%s) }.scatter() """ % (invocs) return [x for x in self.j.runGremlinQuery(query)] def getAllASTNodeLabels(self): query = """_().transform{ getAllASTNodeLabels() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] # Choosing sinks def getControlledSinks(self, nodeId): query = """_().transform{ getControlledSinks(%s) }.scatter() """ % (nodeId) return [x for x in self.j.runGremlinQuery(query)]