hidden_size=HIDDEN_SIZE, seq_len=SEQ_LEN, batch_size=BATCH_SIZE, vocab_size=VOCAB_SIZE, num_layers=NUM_LAYERS, dp_keep_prob=DP_KEEP_PROB) model.load_state_dict(torch.load(load_path, map_location='cpu')) hidden = model.init_hidden() model.eval() #--------------- GENERATE SAMPLES first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000) # samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN) samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN) #-------------- CONVERTING TO WORDS data_path = "/Users/mlizaire/Codes/IFT6135/HW2/assignment2/data/" filename = os.path.join(data_path, "ptb.train.txt") word_2_id, id_2_word = _build_vocab(filename) sequences = [] print("THIS IS RNN") for i in range(15): word_sequence = [] id_sequence = np.array(torch.t(samples)[i]) for index in id_sequence: word = id_2_word[int(index)] word_sequence.append(word)
#toy #model.load_state_dict(torch.load("RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt")) model.eval() #print(model.out_layer.weight.data) #also indices inputs = torch.from_numpy( np.random.randint(1, high=1000, size=10).astype(np.int64)) hidden = model.init_hidden() model.zero_grad() hidden = repackage_hidden(hidden) samples = model.generate(inputs, hidden, 35) #returns indices samples = samples.transpose(0, 1) filename = "data/ptb.train.txt" def _read_words(filename): with open(filename, "r") as f: return f.read().replace("\n", "<eos>").split() def _build_vocab(filename): data = _read_words(filename) counter = collections.Counter(data)
# # GENERATE DATA # ############################################################################### samples_id_with_inputs = [] samples_id = [] samples_words_with_inputs = [] samples_words = [] inputs = torch.from_numpy(np.random.randint(0, 10000, 128).astype(np.int64)).contiguous().to(device) hidden = model.init_hidden()[0] hidden = hidden.to(device) samples_all = model.generate(inputs, hidden, SEQ_LEN) print(samples_all[:, :10]) for i in range(10): # print(samples_all[:,i]) # print(samples_all[:,:10].T) # print(samples_all[:,:10].T.tolist()) samples_id_with_inputs.append([inputs[i].tolist()] + samples_all[:, i].T.tolist()) samples_id.append(samples_all[:, i].T.tolist()) print(len(samples_id_with_inputs)) print(len(samples_id_with_inputs[0])) # print(samples_id) # print(samples_id.shape) for s, sentence in enumerate(samples_id_with_inputs): samples_words_with_inputs.append([])