Пример #1
0
 def make_generator_inputs(self, data):
     source = preproc.mr2source_inputs(data)
     tokens = [self.source_vocab.start_token] + source \
         + [self.source_vocab.stop_token]
     inputs = Variable(
         torch.LongTensor([[self.source_vocab[t] for t in tokens]]).t(),
         lengths=torch.LongTensor([len(tokens)]),
         length_dim=0, batch_dim=1, 
         pad_value=self.source_vocab.pad_index) 
     if self._gpu > -1:
         inputs = inputs.cuda(self._gpu)
     return {"source_inputs": inputs}
    def run(self, env, verbose=False):

        output_path = env["proj_dir"] / "output" / self.filename
        output_path.parent.mkdir(exist_ok=True, parents=True)

        if self.checkpoint is None:
            ckpt = self._get_default_checkpoint(env)
        else:
            ckpt = self.checkpoint
        if ckpt is None:
            raise RuntimeError("No checkpoints found!")

        ckpt_path = env["checkpoints"][ckpt]["path"]
        if verbose:
            print("Loading model from {}".format(ckpt_path))
        model = plum.load(ckpt_path).eval()
        if env["gpu"] > -1:
            model.cuda(env["gpu"])
        self._gpu = env["gpu"]

        samples = self.make_samples()

        with open(output_path, "w") as out_fp:
            for i, mr in enumerate(samples, 1):
                print("{}/{}".format(i, len(samples)),
                      end="\r" if i < len(samples) else "\n",
                      flush=True)

                gen_input = self.make_generator_inputs(mr)

                tokens = self._get_outputs(model, gen_input)
                source = preproc.mr2source_inputs(mr)
                data = json.dumps({
                    "source": source,
                    "mr": mr,
                    "text": " ".join(tokens),
                })
                print(data, file=out_fp, flush=True)