def push(stack, node, parent_idx): """ Push the node's index, and its children's types into stack. The stack helps align the frag_info_seq[:] with frag_seq[1:], so that each frag in frag_seq knows its parent's id. When predict, we want not only the frag itself, but its type (the same as frag's type; so never mind) and its parent's type (idx). :param stack: :param node: :param parent_idx: :return: """ node_type = get_node_type(node) for key in reversed(PROP_DICT[node_type]): if key not in node: continue child = node[key] # If it has a single child if (is_single_node(child) and get_node_type(child) not in TERM_TYPE): frag_type = get_node_type(child) frag_info = (parent_idx, frag_type) stack.append(frag_info) # If it has multiple children elif is_node_list(child): for _child in reversed(child): if (_child is not None and get_node_type(_child) not in TERM_TYPE): frag_type = get_node_type(_child) frag_info = (parent_idx, frag_type) stack.append(frag_info)
def infer_id_types(node, parent): if get_node_type(parent) == 'MemberExpression': if get_node_type(parent['property']) == 'Identifier': name = parent['property']['name'] if name in builtin.resolve_pattern: return builtin.resolve_pattern[name] return [JSType.js_object] return [JSType.unknown]
def is_eval(self, node): node_type = get_node_type(node) if node_type != 'ExpressionStatement': return False node_type = get_node_type(node['expression']) if node_type != 'CallExpression': return False node_type = get_node_type(node['expression']['callee']) if node_type != 'Identifier': return False return node['expression']['callee']['name'] in EVAL_LIST
def resolve_FuncCall(node, parent, symbols, is_global, is_check, cand, hlist): global_var, local_var = symbols go_flag = True callee_type = get_node_type(node['callee']) if callee_type == 'Identifier': name = node['callee']['name'] if (name in builtin.FUNCS or name in builtin.OBJS or name in builtin.ARRAYS): expr = None elif name in ID_HARNESS_MAP: if not is_duplicate(hlist, name): fname = pick_one(ID_HARNESS_MAP[name]) hlist.append(fname) expr = None else: symbol = find_symbol(node['callee'], symbols) if symbol == None: symbol = change_id(node['callee'], [JSType.js_func], symbols, cand) expr = symbol.expr go_flag = symbol.get_flag() symbol.set_flag(False) elif callee_type in ['FunctionExpression', 'ArrowFunctionExpression']: expr = node['callee'] elif callee_type in ['MemberExpression', 'CallExpression', 'SequenceExpression']: resolve_id(node['callee'], node, symbols, is_global, is_check, cand, hlist) expr = None elif callee_type == 'NewExpression': node['callee']['callee']['name'] = 'Function' return symbols elif callee_type == 'BlockStatement': resolve_list(node['body'], node, symbols, is_global, is_check, cand, hlist) expr = None else: error('resolve_id FunctionCall fail') expr = None resolve_list(node['arguments'], node, symbols, is_global, is_check, cand, hlist) if (go_flag and expr != None and 'params' in expr and 'body' in expr): l1 = [] for arg in expr['params']: if get_node_type(arg) == 'Identifier': l1.append(Symbol(arg, arg)) l1.append(Symbol('arguments', None, JSType.js_array)) symbols = global_var, l1 symbols = hoisting(expr['body'], symbols, False) resolve_id(expr['body'], node, symbols, False, False, cand, hlist) return global_var, local_var
def is_declared_id(node, parent, prop): node_type = get_node_type(node) if node_type != 'Identifier': return False parent_type = get_node_type(parent) # var, const, let, func if parent_type in ['VariableDeclarator', 'FunctionDeclaration']: return prop == 'id' # Assignment Expression elif parent_type == 'AssignmentExpression': return prop == 'left' return False
def make_frags(node, frag_seq, frag_info_seq, node_types, stack): # Append the node before visiting its children frag = dict() frag_idx = len(frag_seq) # # pre-order depth-first traverse: add root first # cannot postpone -- need to increase frag_idx frag_seq.append(frag) # Push node info into the stack if len(stack) > 0: frag_info = stack.pop() frag_info_seq.append(frag_info) push(stack, node, frag_idx) node_type = get_node_type(node) node_types.add(node_type) for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] # If it has a single child if (is_single_node(child) and get_node_type(child) not in TERM_TYPE): frag[key] = prune(child) make_frags(child, frag_seq, frag_info_seq, node_types, stack) # If it has multiple children elif is_node_list(child): frag[key] = [] for _child in child: if _child is None: frag[key].append(None) elif get_node_type(_child) in TERM_TYPE: frag[key].append(_child) else: pruned_child = prune(_child) frag[key].append(pruned_child) make_frags(_child, frag_seq, frag_info_seq, node_types, stack) # If it is a terminal (attributes without structure) else: # print("terminal: ", key, child) frag[key] = node[key] # Append the fragment -- redundant; can be deleted frag_seq[frag_idx] = frag return frag
def push(self, stack, node): parent_idx = self.frag2idx(node) node_type = get_node_type(node) for key in reversed(PROP_DICT[node_type]): if key not in node: continue child = node[key] if (type(child) == dict and is_pruned(child)): info = (parent_idx, get_node_type(child)) stack.append(info) elif type(child) == list: for _child in reversed(child): if _child != None and is_pruned(_child): info = (parent_idx, get_node_type(_child)) stack.append(info)
def frag2idx(self, frag): node_type = get_node_type(frag) hash_val = hash_frag(frag) if hash_val in self._new_frag_dict: return self._new_frag_dict[hash_val] else: return self._new_frag_dict[node_type]
def build_def_dict(node, def_dict): node_type = get_node_type(node) if node_type in ['ClassExpression', 'FunctionExpression']: return if (node_type in ['FunctionDeclaration', 'ClassDeclaration'] and node['id'] != None and node['id']['type'] == 'Identifier'): func_name = node['id']['name'] def_dict.add(func_name) return elif (node_type == 'VariableDeclarator' and 'type' in node['id'] and node['id']['type'] == 'Identifier'): var_name = node['id']['name'] def_dict.add(var_name) elif (node_type == 'AssignmentExpression' and 'type' in node['left'] and node['left']['type'] == 'Identifier'): var_name = node['left']['name'] def_dict.add(var_name) for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] if (is_single_node(child) and child['type'] not in TERM_TYPE): build_def_dict(child, def_dict) elif is_node_list(child): for _child in child: if _child != None: build_def_dict(_child, def_dict)
def normalize_id(node, id_dict, parent=None, prop=None): node_type = get_node_type(node) if node_type == 'ObjectPattern': return for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] # Traversal if is_single_node(child): normalize_id(child, id_dict, node, key) elif is_node_list(child): for _child in child: if _child is not None: normalize_id(_child, id_dict, node, key) # Exit if the node is not an ID if node_type != 'Identifier': return # Exit if the node is a property of an object if (parent['type'] == 'MemberExpression' and prop != 'object' and parent['computed'] == False): return # Do not normalize keys (ObjectExpression) if (parent['type'] == 'Property' and prop == 'key'): return # Replace the ID id_name = node['name'] if id_name in id_dict: node['name'] = id_dict[id_name]
def get_type(expr, symbols): if expr == None: return JSType.unknown expr_type = get_node_type(expr) if (expr_type == 'Literal' and type(expr['value']) == bool): return JSType.js_bool elif (expr_type == 'Literal' and expr['value'] == None): return JSType.js_null elif (expr_type == 'Literal' and type(expr['value']) in [int, float]): return JSType.js_number elif (expr_type == 'Literal' and type(expr['value']) == str): return JSType.js_string elif (expr_type == 'Literal' and 'regex' in expr): return JSType.js_regex elif expr_type == 'ArrayExpression': return JSType.js_array elif expr_type in ['ObjectExpression', 'ClassExpression']: return JSType.js_object elif expr_type == 'NewExpression': return get_type_newExpr(expr) elif expr_type == 'Identifier': sym = find_symbol(expr, symbols) if sym != None: return sym.get_type() return JSType.unknown
def build_ast(self, node, stack, frag): node_type = get_node_type(node) for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] # If it has a single child if is_single_node(child): if not is_pruned(child): frag = self.build_ast(child, stack, frag) # Expand the frag elif frag: self.push(stack, frag) node[key] = frag return None # If it has multiple children elif is_node_list(child): for idx, _child in enumerate(child): if _child == None: continue elif not is_pruned(_child): frag = self.build_ast(child[idx], stack, frag) # Expand the frag elif frag: self.push(stack, frag) child[idx] = frag return None return frag
def restore_node(handle_id, node_name, node_type_name, node_meta_type, fallback_user): """ Tries to get a existing node handle from the SQL database before creating a new handle with an old handle id. When we are setting the handle_id explicitly we need to run django-admin.py sqlsequencereset noclook and paste that SQL statements in to the dbhell. """ node_type = utils.get_node_type(node_type_name) defaults = { 'node_name': node_name, 'node_type': node_type, 'node_meta_type': node_meta_type, 'creator': fallback_user, 'modifier': fallback_user, } node_handle, created = NodeHandle.objects.get_or_create( handle_id=handle_id, defaults=defaults) if not created: if node_handle.node_meta_type != node_meta_type: node_handle.node_meta_type = node_meta_type node_handle.save() # rather than calling .save() which will do a db fetch of node_type node_handle._create_node( node_type.get_label()) # Make sure data is saved in neo4j as well. return node_handle
def resolve_ClassDecl(node, parent, symbols, is_global, is_check, cand, hlist): if (node['id'] != None and get_node_type(node['id']) == 'Identifier'): if is_check: return symbols ty = JSType.js_object sym = Symbol(node['id'], None, ty) symbols[0].append(sym) return symbols
def rewrite(self, node, parser): node_type = get_node_type(node) if self.is_eval(node): # Parse arguments and retrieve new subtrees args = node['expression']['arguments'] org_args = deepcopy(args) new_args = self.str2code(args, parser) if org_args != new_args: return new_args else: return # Recursive traversal for key in PROP_DICT[ node_type]: # why not iterating over $node's keys? $node can be invalid if key not in node: continue # not every possible prop is in node child = node[key] if is_single_node(child): self.rewrite(child, parser) elif is_node_list(child): child_idx = 0 for _child in child: if _child is not None: subtree = self.rewrite(_child, parser) if subtree is not None: node[key] = self.append(node[key], subtree, child_idx) child_idx += len(subtree) - 1 child_idx += 1
def is_string(self, node): node_type = get_node_type(node) if node_type != 'Literal': return False if (node['raw'].startswith('"') or node['raw'].startswith('\'')): return True else: return False
def make_edges(node, frag_seq, frag_info_seq, node_types, stack): node_type = get_node_type(node) for key in PROP_DICT[node_type]: if key not in node: continue node_types.add(node_type) # Append the node before visiting its children frag = dict() frag_idx = len(frag_seq) frag_seq.append(frag) # Push node info into the stack if len(stack) > 0: frag_info = stack.pop() frag_info_seq.append(frag_info) push(stack, node, frag_idx) child = node[key] # If it has a single child if (is_single_node(child) and get_node_type(child) not in TERM_TYPE): frag[(node_type, key)] = prune(child) make_edges(child, frag_seq, frag_info_seq, node_types, stack) # If it has multiple children elif is_node_list(child): frag[(node_type, key)] = [] for _child in child: if _child is None: frag[(node_type, key)].append(None) elif get_node_type(_child) in TERM_TYPE: frag[(node_type, key)].append(_child) else: pruned_child = prune(_child) frag[(node_type, key)].append(pruned_child) make_edges(_child, frag_seq, frag_info_seq, node_types, stack) # If it is a terminal else: frag[(node_type, key)] = node[key] # Append the fragment frag_seq[frag_idx] = frag
def get_Object_prop(obj, prop): if obj == None: return None if get_node_type(obj) == 'ObjectExpression': props = obj['properties'] for assign in props: if assign['key'] == prop: return assign['value'] return None
def func_hoisting(node, sym_list): if node == None: return node_type = get_node_type(node) for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] if is_single_node(child): if get_node_type(child) == 'FunctionDeclaration': sym_list.append(Symbol(child['id'], child, JSType.js_func)) elif get_node_type(child) == 'BlockStatement': func_hoisting(child, sym_list) elif is_node_list(child): for _child in child: if _child == None: continue if get_node_type(_child) == 'FunctionDeclaration': sym_list.append(Symbol(_child['id'], _child, JSType.js_func)) elif get_node_type(_child) == 'BlockStatement': func_hoisting(_child, sym_list)
def push(stack, node, parent_idx): node_type = get_node_type(node) for key in reversed(PROP_DICT[node_type]): if key not in node: continue child = node[key] # If it has a single child if (is_single_node(child) and get_node_type(child) not in TERM_TYPE): frag_type = get_node_type(child) frag_info = (parent_idx, frag_type) stack.append(frag_info) # If it has multiple children elif is_node_list(child): for _child in reversed(child): if (_child is not None and get_node_type(_child) not in TERM_TYPE): frag_type = get_node_type(_child) frag_info = (parent_idx, frag_type) stack.append(frag_info)
def traverse(self, node, frag_seq, stack): node_type = get_node_type(node) if node_type not in TERM_TYPE: parent_idx = self.frag2idx(node) else: return for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] # If it has a single child if is_single_node(child): if is_pruned(child): frag_idx = frag_seq.pop(0) if frag_idx == -1: if stack != None: frag_info = (parent_idx, get_node_type(child)) stack.append(frag_info) continue frag = self.idx2frag(frag_idx) node[key] = frag self.traverse(node[key], frag_seq, stack) # If it has multiple children elif is_node_list(child): for idx, _child in enumerate(child): if _child == None: continue elif is_pruned(_child): frag_idx = frag_seq.pop(0) if frag_idx == -1: if stack != None: frag_info = (parent_idx, get_node_type(_child)) stack.append(frag_info) continue frag = self.idx2frag(frag_idx) child[idx] = frag self.traverse(child[idx], frag_seq, stack)
def var_hoisting(node, parent, sym_list): node_type = get_node_type(node) if node_type in [ 'FunctionDeclaration', 'FunctionExpression', 'ClassDeclaration', 'ClassExpression' ]: return elif (parent != None and get_node_type(parent) == 'VariableDeclaration' and parent['kind'] == 'var' and node_type == 'VariableDeclarator'): symbols = pattern_hoisting(node['id'], node) sym_list += symbols else: for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] if (is_single_node(child) and get_node_type(child) not in TERM_TYPE): var_hoisting(child, node, sym_list) elif is_node_list(child): for _child in child: if _child != None: var_hoisting(_child, node, sym_list)
def resolve_Try(node, parent, symbols, is_global, is_check, cand, hlist): global_var, local_var = symbols length = len(local_var) ret = ([], []) for x in [node['block'], node['handler'], node['finalizer']]: g1, l1 = global_var[::], local_var[::] func_hoisting(x, l1) if (x != None and x == node['handler'] and get_node_type(x['param']) == 'Identifier'): l1.append(Symbol(x['param'], None, JSType.js_object)) g1, l1 = resolve_id(x, node, (g1, l1), is_global, is_check, cand, hlist) ret = merge_symbols(ret, (g1, l1[:length])) return symbols
def build_type_mask(self): type_mask = [] type_size = len(self._type_list) for frag in self._oov_frag_list: frag_type_mask = [0 for i in range(type_size)] # Fragments if type(frag) == dict: frag_type = get_node_type(frag) # Out of vocabularies else: frag_type = frag type_idx = self._type_dict[frag_type] frag_type_mask[type_idx] = 1 type_mask += [frag_type_mask] return type_mask
def append_frag(self, cand_list, valid_type, root, stack): # Try all fragments in top k while len(cand_list) > 0: cand_idx = random.choice(cand_list) cand_frag = self._new_frag_list[cand_idx] if type(cand_frag) == dict: cand_type = get_node_type(cand_frag) else: cand_type = cand_frag if cand_type == valid_type: parent_idx, frag_type = self.expand_ast(cand_frag, stack, root) frag = [cand_idx] return True, frag, parent_idx, frag_type else: cand_list.remove(cand_idx) return False, None, None, None
def update_frags(sorted_frags, frag_list, hash_frag_list, oov_types): new_frag_list = [] new_frag_dict = {} # Append frags not in OoV for frag_idx in sorted_frags: frag = frag_list[frag_idx] frag_type = get_node_type(frag) if not is_oov(frag, hash_frag_list): frag_idx = len(new_frag_list) new_frag_list += [frag] new_frag_dict[hash_frag(frag)] = frag_idx # Append OoVs for oov_type in oov_types: frag_idx = len(new_frag_list) new_frag_list += [oov_type] new_frag_dict[oov_type] = frag_idx return new_frag_list, new_frag_dict
def help_Assign(pattern, parent, init, symbols, is_global, is_VarDecl, is_check, cand, hlist): if pattern == None: return symbols pattern_type = get_node_type(pattern) if pattern_type == 'Identifier': if is_check: return symbols ty = get_type(init, symbols) if is_VarDecl: sym = find_symbol(pattern, symbols) if sym == None: error('help_VarDecl fail') sym.update_type(ty) else: sym = Symbol(pattern, None, ty) symbols[0].append(sym) return symbols elif pattern_type == 'ArrayPattern': items = pattern['elements'] for idx in range(len(items)): item = items[idx] item_init = get_Array_item(init, idx) symbols = help_Assign(item, pattern, item_init, symbols, is_global, is_VarDecl, is_check, cand, hlist) return symbols elif pattern_type == 'ObjectPattern': for prop in pattern['properties']: prop_init = get_Object_prop(init, prop['key']) symbols = help_Assign(prop['value'], pattern, prop_init, symbols, is_global, is_VarDecl, is_check, cand, hlist) return symbols elif pattern_type == 'MemberExpression': return resolve_id(pattern, parent, symbols, is_global, is_check, cand, hlist) elif pattern_type == 'AssignmentPattern': # TODO: Check return symbols else: error('Unknown branch in help assign') return symbols
def restore_node(handle_id, node_name, node_type_name, node_meta_type, fallback_user): """ Tries to get a existing node handle from the SQL database before creating a new handle with an old handle id. If an existing node if found do not overwrite it. When we are setting the handle_id explicitly we need to run django-admin.py sqlsequencereset noclook and paste that SQL statements in to the dbhell. """ node_type = utils.get_node_type(node_type_name) defaults = { 'node_name': node_name, 'node_type': node_type, 'node_meta_type': node_meta_type, 'creator': fallback_user, 'modifier': fallback_user, } if NodeHandle.objects.filter(handle_id=handle_id).exists(): # NodeHandle id already taken, create a new NodeHandle and map old id to new node_handle = NodeHandle.objects.create(**defaults) created = True NODE_HANDLE_ID_MAPPING[handle_id] = node_handle.handle_id logger.info('Remapping handle_id {} to {}'.format( handle_id, node_handle.handle_id)) else: node_handle, created = NodeHandle.objects.get_or_create( handle_id=handle_id, defaults=defaults) if not created: if node_handle.node_meta_type != node_meta_type: node_handle.node_meta_type = node_meta_type node_handle.save() # rather than calling .save() which will do a db fetch of node_type node_handle._create_node( node_type.get_label()) # Make sure data is saved in neo4j as well. # Create NodeHandleContext net_ctx = sriutils.get_network_context() NodeHandleContext.objects.get_or_create(nodehandle=node_handle, context=net_ctx) return node_handle
def collect_id(node, id_dict, id_cnt, parent=None, prop=None): node_type = get_node_type(node) # Tree traversal for key in PROP_DICT[node_type]: if key not in node: continue child = node[key] if is_single_node(child): collect_id(child, id_dict, id_cnt, node, key) elif is_node_list(child): for _child in child: if _child is not None: collect_id(_child, id_dict, id_cnt, node, key) if parent is not None and is_func_decl(parent): id_type = 'f' else: id_type = 'v' if is_declared_id(node, parent, prop): id_name = node['name'] add_id(id_name, id_dict, id_cnt, id_type)
def pattern_hoisting(pattern, node): if pattern == None: return [] pattern_type = get_node_type(pattern) if pattern_type == 'Identifier': return [Symbol(pattern, node, JSType.undefined)] elif pattern_type == 'ArrayPattern': ret = [] for item in pattern['elements']: ret += pattern_hoisting(item, pattern) return ret elif pattern_type == 'ObjectPattern': ret = [] for prop in pattern['properties']: ret += pattern_hoisting(prop, pattern) return ret elif pattern_type == 'AssignmentPattern': return pattern_hoisting(pattern['left'], pattern) elif pattern_type == 'Property': return pattern_hoisting(pattern['value'], pattern) elif pattern_type == 'RestElement': return pattern_hoisting(pattern['argument'], pattern) else: error('pattern_hoisting: %s %s' % (pattern['type'], node['type'])) return []