Пример #1
0
def get_function_call_values_of_function_definitions(tx, function_def_node):
	"""
	Description:
	-------------
	navigates the call graph to find the bindings between 'function-call arguments' & 'function definition params'

	@param {pointer} tx: neo4j transaction pointer
	@param {node} function_def_node: a 'FunctionExpression' or 'FunctionDeclaration' node of esprima AST
	@return {dictionary} { call_line: {p1: val1, p2:val2}, call_line: {p1: val1, p2: val2}, ... }
	"""

	out = {}
	query = """
	MATCH (param)<-[:AST_parentOf {RelationType: 'params'}]-(functionDef { Id: '%s' })<-[:CG_parentOf]-(caller {Type: 'CallExpression'})-[:AST_parentOf {RelationType: 'arguments'}]-> (arg) RETURN collect(distinct param) as params, caller, collect(distinct arg) AS args
	"""%(function_def_node['Id'])


	results = tx.run(query)
	for each_binding in results:
		call_expression = each_binding['caller']
		args = each_binding['args']
		params = each_binding['params']
		if len(args) < len(params):
			params = params[::-1] # must reverse this list to match in case of call with lower number of arguments than definition

		call_location_line = call_expression['Location']
		call_nid = call_expression['Id']
		key = call_nid + '__Loc=' + call_location_line
		out[key] = {}

		for i in range(len(params)):
		
			if i <= len(args)-1: # handle the case the function is called with lesser arguments than its definition
				[param, param_type] = get_value_of_identifer_or_literal(params[i])
				argument_type = args[i]['Type']
				if argument_type== 'MemberExpression':
					tree = QU.getChildsOf(tx, args[i])
					ce = QU.get_code_expression(tree)
					identifiers =  ce[2]
					arg = ce[0]
					arg_type = 'MemberExpression'
				elif argument_type== 'ObjectExpression':
					tree = QU.getChildsOf(tx, args[i])
					ce = QU.get_code_expression(tree)
					identifiers =  ce[2]
					arg = ce[0]
					arg_type = 'ObjectExpression'
				elif argument_type== 'Literal' or argument_type== 'Identifier':
					[arg, arg_type] = get_value_of_identifer_or_literal(args[i])
					identifiers = None
				else:
					tree = QU.getChildsOf(tx, args[i])
					ce = QU.get_code_expression(tree)
					identifiers =  ce[2]
					arg = ce[0]
					arg_type = argument_type

				out[key][param] = {'Value': arg, 'Type':arg_type, 'ResolveIdentifiers': identifiers}

	return out
Пример #2
0
    def _get_function_signature(function_dictionary):
        """
		gets the signature of a given function
		"""

        definition_type = function_dictionary['type']
        if definition_type == 'FunctionExpression':
            wrapper_node = neo4jQueryUtilityModule.getChildsOf(
                tx, {
                    'Id': function_dictionary['id'],
                    'Type': definition_type
                },
                relation_type='params')
            signature = neo4jQueryUtilityModule.get_code_expression(
                wrapper_node)[0]
            parameter = __find_url_parameter(function_dictionary)
            index = __get_parameter_index_position(signature, parameter)
            return [signature, parameter, index]

        elif definition_type == 'FunctionDeclaration':
            wrapper_node = neo4jQueryUtilityModule.getChildsOf(
                tx, {
                    'Id': function_dictionary['id'],
                    'Type': definition_type
                })
            signature = neo4jQueryUtilityModule.get_code_expression(
                wrapper_node, short_form=True)[0]
            parameter = __find_url_parameter(function_dictionary)
            index = __get_parameter_index_position(signature, parameter)
            return [signature, parameter, index]

        else:
            print(function_dictionary)

        return ['', '', '']
Пример #3
0
def get_function_name(tx, function_node):

    t = function_node['Type']
    loc = function_node['Location']
    function_node_id = function_node['Id']
    if t == 'FunctionDeclaration':
        query = """
		MATCH (n {Id: '%s'})-[:AST_parentOf {RelationType: 'id'}]->(name)
		WHERE name.Type = 'Identifier'
		OR name.Type = 'MemberExpression'
		RETURN name
		""" % (function_node_id)
    else:
        # handle all cases in one go: object expr, assignment expr, var declarator
        query = """
		MATCH (n {Id: '%s'})<-[:AST_parentOf]-(parent)-[:AST_parentOf]->(name)
		WHERE name.Type = 'Identifier'
		OR name.Type = 'MemberExpression'
		RETURN name
		""" % (function_node_id)

    results = tx.run(query)
    for item in results:
        node = item['name']
        if node['Type'] == 'Identifier':
            name = node['Code']  # return the function name
        else:
            tree = neo4jQueryUtilityModule.getChildsOf(tx, node)
            name = neo4jQueryUtilityModule.get_code_expression(tree)[0]
        return [name, loc, t]
    return ['Anonymous', loc, t]
Пример #4
0
def get_value_of(tx, varname, context_node, calls=[]):

    out_values = []
    node_id = context_node['Id']
    arg = str(varname) + '__' + str(node_id)
    if arg in calls:  # return call if same parameters already invoked
        return out_values

    if len(calls) > 100:
        return out_values

    if DEBUG:
        print("[+] get_value_of(%s, %s)" % (varname, node_id))

    query = """
	MATCH (n_s { Id: '%s' })<-[:PDG_parentOf { Arguments: '%s' }]-(n_t) RETURN collect(distinct n_t) AS resultset
	""" % (node_id, varname)
    results = tx.run(query)
    for item in results:
        current_nodes = item['resultset']
        for iterator_node in current_nodes:

            tree = neo4jQueryUtilityModule.getChildsOf(tx, iterator_node)
            contextNode = tree['node']
            if contextNode['Id'] == constantsModule.PROGRAM_NODE_INDEX:
                continue
            ex = neo4jQueryUtilityModule.get_code_expression(tree)
            #loc = iterator_node['Location']
            [code_expr, literals, idents] = ex
            out_values.append([code_expr, literals, idents])
            new_varnames = utilityModule.get_unique_list(list(idents))

            # main recursion flow
            for new_varname in new_varnames:
                if new_varname == varname or new_varname in constantsModule.JS_DEFINED_VARS:
                    continue

                call_arg = str(new_varname) + '__' + str(contextNode['Id'])
                calls.append(call_arg)
                v = get_value_of(tx, new_varname, contextNode, calls)
                out_values.extend(v)

    return out_values
Пример #5
0
def _get_varname_value_from_context(tx, varname, context_node, PDG_on_variable_declarations_only=False, context_scope=''):
	"""
	Description:
	-------------
	function for the data flow analysis
	
	@param tx {pointer} neo4j transaction pointer
	@param {string} varname
	@param {dict} context_node: node specifying the CFG-level statement where varname is defined
	@param {bool} PDG_on_variable_declarations_only: internal val to keep state in recursions
	@param {string} context_scope: internal val to keep context scope in recursions
	@return {list}: a 2d list where each entry is of the following format
		[program_slice, literals, dict of identifer mapped to identifer node is, location dict]
	"""


	## ------------------------------------------------------------------------------- ## 
	## Globals and utility functions
	## ------------------------------------------------------------------------------- ## 

	# output
	out_values = [] 
	# stores a map: funcDef id -->> get_function_call_values_of_function_definitions(funcDef)
	knowledge_database = {} 
	# context node identifer
	node_id = context_node['Id']

	def _get_all_call_values_of(varname, func_def_node):
		
		key = func_def_node['Id']
		if key in knowledge_database:
			knowledge = knowledge_database[key]
		else:
			knowledge = get_function_call_values_of_function_definitions(tx, func_def_node)	
			knowledge_database[key] = knowledge

		ret = {}
		for nid, values in knowledge.items():
			if varname in values:
				ret[nid] = values[varname]

		return ret



	## ------------------------------------------------------------------------------- ## 
	## Main logic 
	## ------------------------------------------------------------------------------- ## 

	if PDG_on_variable_declarations_only:
		# for VariableDeclaration PDG relations
		query = """
		MATCH (n_s { Id: '%s' })<-[:PDG_parentOf { Arguments: '%s' }]-(n_t {Type: 'VariableDeclaration'}) RETURN collect(distinct n_t) AS resultset
		"""%(node_id, varname)
	else:
		# for all PDG relations
		query = """
		MATCH (n_s { Id: '%s' })<-[:PDG_parentOf { Arguments: '%s' }]-(n_t) RETURN collect(distinct n_t) AS resultset
		"""%(node_id, varname)

	results = tx.run(query)
	for item in results: 
		currentNodes = item['resultset'] 
		for iteratorNode in currentNodes:
			if iteratorNode['Type'] == 'BlockStatement': 
				# the parameter 'varname' is a function argument

				func_def_node = get_function_def_of_block_stmt(tx, iteratorNode) # check if func def has a varname parameter 
				if func_def_node['Type'] == 'FunctionExpression' or func_def_node['Type'] == 'FunctionDeclaration':

					match_signature = check_if_function_has_param(tx, varname, func_def_node)
					if match_signature:
						if context_scope == '':
							out = ['%s = %s'%(varname, constantsModule.LOCAL_ARGUMENT_TAG_FOR_FUNC),
								  [],
								  [varname],
								  iteratorNode['Location']]
						else:
							out = ['%s %s = %s'%(context_scope, varname, constantsModule.LOCAL_ARGUMENT_TAG_FOR_FUNC),
								  [],
								  [varname],
								  iteratorNode['Location']]					
						out_values.append(out)
						
						varname_values_within_call_expressions = _get_all_call_values_of(varname, func_def_node)
						for nid in varname_values_within_call_expressions:
							each_argument = varname_values_within_call_expressions[nid]

							location_line = _get_location_part(nid)

							if each_argument['Type'] == 'Literal':
								if context_scope == '':
									out = ['%s <--(invocation-value)-- \"%s\"'%(varname, each_argument['Value']),
										  [each_argument['Value']],
										  [varname],
										  location_line]
								else:
									out = ['%s %s <--(invocation-value)-- \"%s\"'%(context_scope, varname, each_argument['Value']),
										  [each_argument['Value']],
										  [varname],
										  location_line]

								out_values.append(out)

							elif each_argument['Type'] == 'Identifier':

								call_expr_id = _get_node_id_part(nid)
								# use this as an id to mark variables in this scope when doing def-use analsis
								context_id_of_call_scope = '[scope-id=%s]'%call_expr_id  

								if context_scope == '':
									out = ['%s <--(invocation-value)-- [def-scope-id=%s] %s'%(varname, call_expr_id, each_argument['Value']),
										  [],
										  [varname, each_argument['Value']],
										  location_line]
								else:
									out = ['%s %s <--(invocation-value)-- [def-scope-id=%s] %s'%(context_scope, varname, call_expr_id, each_argument['Value']),
											  [],
											  [varname, each_argument['Value']],
											  location_line]

								out_values.append(out)

								
								top_level_of_call_expr = get_non_anonymous_call_expr_top_node(tx, {'Id': call_expr_id})
								recurse= _get_varname_value_from_context(tx, each_argument['Value'], top_level_of_call_expr, context_scope=context_id_of_call_scope)
								out_values.extend(recurse)

							elif each_argument['Type'] == 'MemberExpression':

								call_expr_id = _get_node_id_part(nid)
								context_id_of_call_scope = '[scope-id=%s]'%call_expr_id  

								if context_scope == '':
									out = ['%s <--(invocation-value)-- [def-scope-id=%s] %s'%(varname, call_expr_id, each_argument['Value']),
										  [],
										  [varname, each_argument['Value']],
										  location_line]
								else:
									out = ['%s %s <--(invocation-value)-- [def-scope-id=%s] %s'%(context_scope, varname, call_expr_id, each_argument['Value']),
											  [],
											  [varname, each_argument['Value']],
											  location_line]						
								out_values.append(out)	

								# PDG on member expressions-> do PDG on the top most parent of it!
								top_most = each_argument['Value'].split('.')[0]
								call_expr_id = _get_node_id_part(nid)
								top_level_of_call_expr = get_non_anonymous_call_expr_top_node(tx, {'Id': call_expr_id})
								recurse= _get_varname_value_from_context(tx, top_most, top_level_of_call_expr, context_scope=context_id_of_call_scope)
								out_values.extend(recurse)

							elif each_argument['Type'] == 'ObjectExpression':
								
								call_expr_id = _get_node_id_part(nid)
								context_id_of_call_scope = '[scope-id=%s]'%call_expr_id  

								if context_scope == '':
									out = ['%s <--(invocation-value)-- [def-scope-id=%s] %s'%(varname, call_expr_id, each_argument['Value']),
										  [],
										  [varname, each_argument['Value']],
										  location_line]
								else:
									out = ['%s %s <--(invocation-value)-- [def-scope-id=%s] %s'%(context_scope, varname, call_expr_id, each_argument['Value']),
											  [],
											  [varname, each_argument['Value']],
											  location_line]

								out_values.append(out)	

								additional_identifiers = each_argument['ResolveIdentifiers']
								if additional_identifiers is not None:
									for each_additional_identifier in additional_identifiers:
										
										top_level_of_call_expr = get_non_anonymous_call_expr_top_node(tx, {'Id': call_expr_id})
										recurse= _get_varname_value_from_context(tx, each_additional_identifier, top_level_of_call_expr, context_scope=context_id_of_call_scope)
										out_values.extend(recurse)	


							else: 
								# expression statements, call expressions (window.location.replace(), etc)
								if context_scope == '':
									out = ['%s <--(invocation-value)-- %s'%(varname, each_argument['Value']),
										  [],
										  [varname, each_argument['Value']],
										  location_line]
									
								else:
									out = ['%s %s <--(invocation-value)-- %s'%(context_scope, varname, each_argument['Value']),
										  [],
										  [varname, each_argument['Value']],
										  location_line]

								out_values.append(out)				



							## ThisExpression Pointer Analysis
							## NOTE: this code block must be executed for ALL branches, so we have to place it outside of all conditional branches
							additional_identifiers = each_argument['ResolveIdentifiers']
							if additional_identifiers is not None:
								if 'ThisExpression' in additional_identifiers:
									this_expression_node_id = additional_identifiers['ThisExpression']
									pointer_resolutions = get_this_pointer_resolution(tx, {'Id': this_expression_node_id })
									for item in pointer_resolutions['methods']:
										owner_item = item['owner']
										owner_top = item['top']
										tree_owner = QU.getChildsOf(tx, owner_item)
										tree_owner_exp = QU.get_code_expression(tree_owner)[0]
										location_line = owner_item['Location']
										out_line = '%s this --(points-to)--> %s [this-nid: %s]'%(context_scope,tree_owner_exp, this_expression_node_id)
										out = [out_line.lstrip(),
											  [],
											  [tree_owner_exp[0]],
											  location_line]
										out_values.append(out)

										# def-use analysis over resolved `this` pointer
										if owner_item != '' and owner_item is not None and owner_item!= constantsModule.WINDOW_GLOBAL_OBJECT and owner_item['Type'] == 'Identifier':
											recurse_values = _get_varname_value_from_context(tx, tree_owner_exp, owner_top, PDG_on_variable_declarations_only=True)
											out_values.extend(recurse_values)


									# handle `this` that resolves to DOM elements in events 
									for element in pointer_resolutions['events']:
										if 'relation' in element:
											# fetched via analysis
											item = element['relation']
											target_node_id = item['Arguments'].split('___')[1]
											if target_node_id == 'xx': 
												continue
											else:
												tree_owner = QU.getChildsOf({'Id': target_node_id})
												tree_owner_exp = QU.get_code_expression(tree_owner)
												location_line = tree_owner['Location']
												out_line = '%s this --(points-to)--> %s [this-nid: %s]'%(context_scope, tree_owner_exp, this_expression_node_id)
												out = [out_line.lstrip(),
													  [],
													  [tree_owner_exp],
													  location_line]
												out_values.append(out) 
										else:
											# fetched from DB
											item = element['owner']
											target_node_id = item['Id']		
											tree_owner = QU.getChildsOf({'Id': target_node_id})
											tree_owner_exp = QU.get_code_expression(tree_owner)
											location_line = tree_owner['Location']
											out_line = '%s this --(points-to)--> %s [this-nid: %s]'%(context_scope, tree_owner_exp, this_expression_node_id)
											out = [out_line.lstrip(),
												  [],
												  [tree_owner_exp],
												  location_line]
											out_values.append(out) 				


				continue


			tree = QU.getChildsOf(tx, iteratorNode)
			contextNode = tree['node']
			if contextNode['Id'] == constantsModule.PROGRAM_NODE_INDEX: 
				continue
			ex = QU.get_code_expression(tree)
			loc = iteratorNode['Location']
			[code_expr, literals, idents] = ex
			if context_scope != '':
				code_expr = context_scope + '  ' + code_expr 
			out_values.append([code_expr, literals, idents, loc])
			new_varnames = list(set((list(idents)))) # get unique vars

			# handle `this` expressions
			if 'ThisExpression' in new_varnames:
				this_expression_node_id = idents['ThisExpression']
				pointer_resolutions = get_this_pointer_resolution(tx, {'Id': this_expression_node_id })
				for item in pointer_resolutions['methods']:
					owner_item = item['owner']
					owner_top = item['top']
					tree_owner = QU.getChildsOf(tx, owner_item)
					tree_owner_exp = QU.get_code_expression(tree_owner)[0]
					location_line = owner_item['Location']
					out_line = '%s this --(points-to)--> %s [this-nid: %s]'%(context_scope, tree_owner_exp, this_expression_node_id)
					out = [out_line.lstrip(),
						  [],
						  [tree_owner_exp[0]],
						  location_line]
					out_values.append(out)

					# def-use analysis over resolved `this` pointer
					if owner_item != '' and owner_item is not None and owner_item!= constantsModule.WINDOW_GLOBAL_OBJECT and owner_item['Type'] == 'Identifier':
						recurse_values = _get_varname_value_from_context(tx, tree_owner_exp, owner_top, PDG_on_variable_declarations_only=True)
						out_values.extend(recurse_values)


				# handle `this` that resolves to DOM elements in events 
				for element in pointer_resolutions['events']:
					if 'relation' in element:
						# fetched via analysis
						item = element['relation']
						target_node_id = item['Arguments'].split('___')[1]
						if target_node_id == 'xx': 
							continue
						else:
							tree_owner = QU.getChildsOf({'Id': target_node_id})
							tree_owner_exp = QU.get_code_expression(tree_owner)
							location_line = tree_owner['Location']
							out_line = '%s this --(points-to)--> %s [this-nid: %s]'%(context_scope, tree_owner_exp, this_expression_node_id)
							out = [out_line.lstrip(),
								  [],
								  [tree_owner_exp],
								  location_line]
							out_values.append(out) 
					else:
						# fetched from DB
						item = element['owner']
						target_node_id = item['Id']		
						tree_owner = QU.getChildsOf({'Id': target_node_id})
						tree_owner_exp = QU.get_code_expression(tree_owner)
						location_line = tree_owner['Location']
						out_line = '%s this --(points-to)--> %s [this-nid: %s]'%(context_scope, tree_owner_exp, this_expression_node_id)
						out = [out_line.lstrip(),
							  [],
							  [tree_owner_exp],
							  location_line]
						out_values.append(out) 


			# main recursion flow
			for new_varname in new_varnames:
				if new_varname == varname or new_varname in constantsModule.JS_DEFINED_VARS: continue

				# check if new_varname is a function call
				# i.e., it has a `callee` relation to a parent of type `CallExpression`
				new_varname_id = idents[new_varname]
				check_function_call_query="""
				MATCH (n { Id: '%s' })<-[:AST_parentOf {RelationType: 'callee'}]-(fn_call {Type: 'CallExpression'})-[:CG_parentOf]->(call_definition)
				RETURN call_definition
				"""%(new_varname_id)
				call_definition_result = tx.run(check_function_call_query)
				is_func_call = False
				for definition in call_definition_result:
					item = definition['call_definition']
					if item is not None:
						is_func_call = True
						wrapper_node_function_definition = QU.getChildsOf(tx, item)
						ce_function_definition = QU.get_code_expression(wrapper_node_function_definition)
						location_function_definition = item['Location']
						body = ce_function_definition[0]
						body = jsbeautifier.beautify(body)
						out_line = """%s %s\n\t\t\t %s"""%(context_scope, constantsModule.FUNCTION_CALL_DEFINITION_BODY, body)
						out = [out_line.strip(),
							  [],
							  [],
							  location_function_definition]
						if out not in out_values:
							# avoid returning/printing twice
							out_values.append(out)

				if is_func_call:
					continue
				v = _get_varname_value_from_context(tx, new_varname, contextNode, context_scope = context_scope)
				out_values.extend(v)	



	return out_values
Пример #6
0
def inout_relationship(tx):

    out_dep = {}
    out_control = {}
    function_names = {}
    functions = find_function_expressions(tx)
    for each_func_item in functions:

        each_func = each_func_item[0]
        each_func_params = each_func_item[1]
        fn_id = each_func['Id']

        out_dep[fn_id] = []
        out_control[fn_id] = []

        if DEBUG:
            print("-" * 10)
            print("[+] inout_relationship -> function_id: %s" % fn_id)
            print("[+] inout_relationship -> function_params: %s" %
                  str(each_func_params))

        return_statements = get_return_statements(tx, fn_id)
        for return_statement in return_statements:
            tree = neo4jQueryUtilityModule.getChildsOf(tx, return_statement)
            ex = neo4jQueryUtilityModule.get_code_expression(tree)
            [code_expr, literals, idents] = ex
            code_expr = code_expr.strip()
            if len(
                    code_expr
            ) == 0 or code_expr == '\"true\"' or code_expr == '\"false\"':
                continue

            if DEBUG:
                print("[+] inout_relationship -> return: %s" % code_expr)

            cache = {}
            for p in each_func_params:
                if p in out_dep[fn_id]:
                    continue
                else:
                    done = False
                    for ident in idents:
                        if ident in constantsModule.JS_DEFINED_VARS or ident == 'this' or ident == 'ThisExpression':
                            continue
                        if DEBUG:
                            print("[+] inout_relationship -> tracking: %s" %
                                  ident)
                        if ident not in cache:
                            slices = get_value_of(tx, ident.strip(),
                                                  return_statement)
                            cache[ident] = slices
                        else:
                            slices = cache[ident]
                        for each_slice in slices:
                            slice_code = each_slice[0]
                            if p in slice_code:
                                if fn_id not in function_names:
                                    func_name = get_function_name(
                                        tx, each_func)
                                    function_names[fn_id] = func_name + [
                                        each_func_params
                                    ]
                                out_dep[fn_id].append(p)
                                done = True
                                break
                        if done:
                            break

        control_statements = get_control_predicates(tx, fn_id)
        for control_statement in control_statements:
            tree = neo4jQueryUtilityModule.getChildsOf(tx, control_statement)
            ex = neo4jQueryUtilityModule.get_code_expression(tree)
            [code_expr, literals, idents] = ex
            code_expr = code_expr.strip()

            if len(
                    code_expr
            ) == 0 or code_expr == '\"true\"' or code_expr == '\"false\"':
                continue

            if DEBUG:
                print("[+] inout_relationship -> control: %s" % code_expr)

            for p in each_func_params:
                if p in out_control[fn_id]:
                    continue
                else:
                    for ident in idents:
                        if ident in constantsModule.JS_DEFINED_VARS: continue
                        if p.strip() == ident.strip():
                            if fn_id not in function_names:
                                func_name = get_function_name(tx, each_func)
                                function_names[fn_id] = func_name + [
                                    each_func_params
                                ]
                            out_control[fn_id].append(p)
                            break

    # if DEBUG:
    # 	print("[+] inout_relationship -> dependency:\n%s\n"%str(out_dep))
    # 	print("[+] inout_relationship -> control:\n%s\n"%str(out_control))
    # 	print("[+] inout_relationship -> function names:%s\n"%str(function_names))
    return [out_dep, out_control, function_names]