args.beam, 1, 1).transpose(2, 1).transpose( 1, 0).contiguous().view( args.beam**2, XMB.size(1), XMB.size(2))[top_beam_idxs] XMB, MMB = append_batch(XMB, beam_toks, MMB) if (beam_toks == end_token).sum().item() == args.beam or _ == context_size_e2 - 1: break for tok in beam_seqs[0]: tokens.append(text_encoder.decoder[tok.item()].replace('</w>', ' ').replace('\n', '')) beams = [] for beam in beam_seqs: beams.append(" ".join("".join( [text_encoder.decoder[tok.item()].replace( '</w>', ' ').replace('\n', '') for tok in beam if tok != end_token]).split())) sequence_all['beams'] = beams final_sequences.append(sequence_all) import pickle utils.mkpath("/".join(eval_file_name.split("/")[:-1])) with open(eval_file_name, "wb") as f: pickle.dump(final_sequences, f)
] special = [data.start_token, data.end_token] special += ["<{}>".format(relation) for relation in relations] encoder_path = "model/encoder_bpe_40000.json" bpe_path = "model/vocab_40000.bpe" text_encoder = TextEncoder(encoder_path, bpe_path) for special_token in special: text_encoder.decoder[len(text_encoder.encoder)] = special_token text_encoder.encoder[special_token] = len(text_encoder.encoder) data_loader = cdata.GenerationDataLoader(opt) data_loader.load_data("data/conceptnet/") data_loader.make_tensors(text_encoder, special, test=False) opt.data.maxr = data_loader.max_r save_path = "data/conceptnet/processed/generation" save_name = os.path.join(save_path, "{}.pickle".format(utils.make_name_string(opt.data))) utils.mkpath(save_path) print("Data Loader will be saved to {}".format(save_name)) torch.save(data_loader, save_name)