def make_generator_inputs(self, data): source = preproc.mr2source_inputs(data) tokens = [self.source_vocab.start_token] + source \ + [self.source_vocab.stop_token] inputs = Variable( torch.LongTensor([[self.source_vocab[t] for t in tokens]]).t(), lengths=torch.LongTensor([len(tokens)]), length_dim=0, batch_dim=1, pad_value=self.source_vocab.pad_index) if self._gpu > -1: inputs = inputs.cuda(self._gpu) return {"source_inputs": inputs}
def run(self, env, verbose=False): output_path = env["proj_dir"] / "output" / self.filename output_path.parent.mkdir(exist_ok=True, parents=True) if self.checkpoint is None: ckpt = self._get_default_checkpoint(env) else: ckpt = self.checkpoint if ckpt is None: raise RuntimeError("No checkpoints found!") ckpt_path = env["checkpoints"][ckpt]["path"] if verbose: print("Loading model from {}".format(ckpt_path)) model = plum.load(ckpt_path).eval() if env["gpu"] > -1: model.cuda(env["gpu"]) self._gpu = env["gpu"] samples = self.make_samples() with open(output_path, "w") as out_fp: for i, mr in enumerate(samples, 1): print("{}/{}".format(i, len(samples)), end="\r" if i < len(samples) else "\n", flush=True) gen_input = self.make_generator_inputs(mr) tokens = self._get_outputs(model, gen_input) source = preproc.mr2source_inputs(mr) data = json.dumps({ "source": source, "mr": mr, "text": " ".join(tokens), }) print(data, file=out_fp, flush=True)