def frag2idx(self, frag): node_type = get_node_type(frag) hash_val = hash_frag(frag) if hash_val in self._new_frag_dict: return self._new_frag_dict[hash_val] else: return self._new_frag_dict[node_type]
def update_frag_seq(frag_seq, new_frag_dict, frag_list, hash_frag_list): new_frag_seq = [] for frag_idx in frag_seq: frag = frag_list[frag_idx] if is_oov(frag, hash_frag_list): frag_idx = get_oov_idx(frag, new_frag_dict) else: frag_idx = new_frag_dict[hash_frag(frag)] new_frag_seq += [frag_idx] return new_frag_seq
def update_frag_info(frag_info_seq, new_frag_dict, frag_list, hash_frag_list): new_frag_info_seq = [] for parent_idx, frag_type in frag_info_seq: parent_frag = frag_list[parent_idx] if is_oov(parent_frag, hash_frag_list): parent_idx = get_oov_idx(parent_frag, new_frag_dict) else: parent_idx = new_frag_dict[hash_frag(parent_frag)] frag_info = (parent_idx, frag_type) new_frag_info_seq += [frag_info] return new_frag_info_seq
def add_frag(self, node): hash_val = hash_frag(node) # New fragment if hash_val not in self._hash_frag_list: self._hash_frag_list.add(hash_val) frag_idx = self.get_frag_idx() self._frag_dict[hash_val] = frag_idx self._frag_list += [node] # Existing fragment, but different hash elif hash_val not in self._frag_dict: frag_idx = self._frag_list.index(node) # Existing fragment else: frag_idx = self._frag_dict[hash_val] return frag_idx
def ast2code(self, ast): ast_name = hash_frag(ast) + '.json' ast_path = os.path.join(self._js_dir, ast_name) write_ast(ast_path, ast) ast_path = str.encode(ast_path + '\n') self._printer.stdin.write(ast_path) js_path = self._printer.stdout.readline() js_path = js_path.decode('utf-8').strip() os.remove(ast_path.strip()) if 'Error' in js_path: return None else: return js_path
def update_frags(sorted_frags, frag_list, hash_frag_list, oov_types): new_frag_list = [] new_frag_dict = {} # Append frags not in OoV for frag_idx in sorted_frags: frag = frag_list[frag_idx] frag_type = get_node_type(frag) if not is_oov(frag, hash_frag_list): frag_idx = len(new_frag_list) new_frag_list += [frag] new_frag_dict[hash_frag(frag)] = frag_idx # Append OoVs for oov_type in oov_types: frag_idx = len(new_frag_list) new_frag_list += [oov_type] new_frag_dict[oov_type] = frag_idx return new_frag_list, new_frag_dict
def collect_oov(seed_dict, frag_list): oov_types = set() hash_frag_list = set() oov_pool = {} sorted_frags, oov_idx = sort_frags(seed_dict) # For rare fragments for frag_idx in sorted_frags[oov_idx:]: frag = frag_list[frag_idx] node_type = get_node_type(frag) # Append to OoV pool if node_type not in oov_pool: oov_pool[node_type] = [] oov_pool[node_type] += [frag] # Add OoV type oov_types.add(node_type) # Add to OoV fragment list hash_val = hash_frag(frag) hash_frag_list.add(hash_val) return sorted_frags, oov_types, hash_frag_list, oov_pool
def is_oov(frag, hash_frag_list): hash_val = hash_frag(frag) return hash_val in hash_frag_list