def binary_ast_fn(filename, dest_filename, idx, start=0, end=-1, *args): kwargs = args[0][0] # canot feed dict parameters in multi-processing dest_filename = dest_filename + str(idx) with file_io.open(filename, "r") as reader, file_io.open(dest_filename, 'w') as writer: reader.seek(start) line = safe_readline(reader) while line: if end > 0 and reader.tell() > end: break ast = json_io.json_loads(line) if ast: try: ast = util_ast.value2children(ast) ast = util_ast.remove_root_with_uni_child(ast) root_idx = util_ast.get_root_idx(ast) ast = util_ast.delete_node_with_uni_child(ast, idx=root_idx) root_idx = util_ast.get_root_idx(ast) bin_ast = util_ast.binarize_tree(ast, idx=root_idx) # to binary ast tree root_idx = util_ast.get_root_idx(ast) bin_ast = util_ast.reset_indices(bin_ast, root_idx) # reset node indices bin_ast = util_ast.pad_leaf_node(bin_ast, MAX_SUB_TOKEN_LEN) except RecursionError: LOGGER.error('RecursionError, ignore this tree') bin_ast = None except Exception as err: LOGGER.error(err) bin_ast = None else: bin_ast = None print(json_io.json_dumps(bin_ast), file=writer) line = safe_readline(reader)
def sbtao_fn(filename, dest_filename, idx, start=0, end=-1, *args): kwargs = args[0][0] # canot feed dict parameters in multi-processing dest_filename = dest_filename + str(idx) with file_io.open(filename, "r") as reader, file_io.open(dest_filename, 'w') as writer: reader.seek(start) line = safe_readline(reader) while line: if end > 0 and reader.tell() > end: break ast = json_io.json_loads(line) if ast: ast = util_ast.value2children(ast) padded_ast = util_ast.pad_leaf_node(ast, MAX_SUB_TOKEN_LEN) root_idx = util_ast.get_root_idx(padded_ast) sbt = util_ast.build_sbtao_tree(padded_ast, idx=root_idx) else: sbt = None print(json_io.json_dumps(sbt), file=writer) line = safe_readline(reader)