def __setstate__(self, state): for k, v in state.items(): self.__dict__[k] = v if "recovery" in state: recovery = save_temp_bin(state["recovery"]) del self.__dict__["recovery"] self.init_params() self.pc.populate(recovery)
def run_traslate(self, input_file, output_file, opt): if not hasattr(self, "model_bin_path") or self.model_bin_path is None: # TODO remove after EMNLP self.model_bin_path = save_temp_bin(self.model_bin) opt["model"] = self.model_bin_path opt["src"] = input_file opt["output"] = output_file if is_cuda: opt["gpu"] = 0 run_param('translate.py', opt)
def train(self, save_data, opt): save_data_archive = save_temp_bin(save_data) save_data_dir = temp_dir() shutil.unpack_archive(filename=save_data_archive, extract_dir=save_data_dir, format="gztar") save_model = temp_dir() opt["data"] = save_data_dir + "data" opt["save_model"] = save_model if is_cuda: opt["world_size"] = 1 opt["gpu_ranks"] = 0 run_param('train.py', opt) return save_model
def translate(self, plans: List[str], opts=None): # Translate entire reader file using a model if not opts: opts = {"beam_size": BEAM_SIZE, "find_best": True} model_path = save_temp_bin(self.model_bin) o_lines = [ [add_features(s.strip()) for i, s in enumerate(s.split("."))] if s != "" else [] for s in plans ] n_lines = list(set(chain.from_iterable(o_lines))) if len(n_lines) == 0: return [] source_path = save_temp(n_lines) target_path = temp_name() n_best = opts["beam_size"] if opts["find_best"] else 1 self.run_traslate( model_path, source_path, target_path, { "replace_unk": None, "beam_size": opts["beam_size"], "n_best": n_best, "batch_size": 64 }) out_lines_f = open(target_path, "r", encoding="utf-8") out_lines = chunks(out_lines_f.read().splitlines(), n_best) out_lines_f.close() map_lines = { n: find_best_out(n, out) for n, out in zip(n_lines, out_lines) } return [" ".join([map_lines[s] for s in lines]) for lines in o_lines]
def __init__(self, model, features=True): self.model_bin = model self.features = features self.sentences_cache = {} self.model_bin_path = save_temp_bin(self.model_bin)