def main(_): model_path = os.path.join('model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) print(converter.vocab_size) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join('model', FLAGS.file_type) if not os.path.exists(model_path): os.makedirs(model_path) # Read and Load Corpus for Train and Validation. training_corpus, validating_corpus = read_corpus() # Build Text Converter print( "---------------------------- Initializing Text Converter ----------------------------" ) start_time = time.time() converter = TextConverter(training_corpus, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) print('Initialize Text Converter Finished in %.3f Seconds.\n' % (time.time() - start_time)) # Vectorize Content of Corpus vectroize_corpus(converter) # Build Char RNN Model model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) # Train Model model.train(FLAGS.max_steps, model_path, FLAGS.validate_every_n_steps, FLAGS.log_every_n_steps)
def main(_): model_path = os.path.join('model', FLAGS.name)#创建路径字符串 if os.path.exists(model_path) is False:#创建文件夹路径 os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read()#读取整个文件作为字符串 converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text)#将文本序列化 g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps)#100,100 print(converter.vocab_size) model = CharRNN(converter.vocab_size,#创建模型,这里num_classes设置为了字典的大小,因为要预测下一个char num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size ) model.train(g,#训练模型 FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join('model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) print( "---------------------------- Reading Corpus ----------------------------" ) start_time = time.time() read_corpus() print("Read Corpus Finished in " + str(time.time() - start_time) + ' Seconds.') converter = TextConverter(corpus, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(corpus) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join('model', FLAGS.name) if not os.path.exists(model_path): os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() convert = TextConvert(text, FLAGS.max_vocab) convert.save_vocab(os.path.join(model_path, 'text_convert.pkl')) arr = convert.text2arr(text) g = batch_generate(arr, FLAGS.num_seqs, FLAGS.num_steps) model = CharRNN( convert.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size ) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_n, FLAGS.print_n, )
def main(_): model_path = os.path.join('model', FLAGS.name) # 拼接路径model/'name' if os.path.exists(model_path) is False: os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() # 读取文本 converter = TextConverter(text, FLAGS.max_vocab) # 文本转换为词汇且截取FLAGS.max_vocab个词 converter.save_to_file(os.path.join(model_path, 'converter.pk1')) # 序列化存储词汇 data = converter.text_to_data(text) # 将文本转化为输入(word_to_int) g = batch_generator(data, FLAGS.n_seqs, FLAGS.n_steps) # 获取batch生成器 print(converter.vocab_size) # 模型参数初始化 model = CharRNN(converter.vocab_size, n_seqs=FLAGS.n_seqs, n_steps=FLAGS.n_steps, state_size=FLAGS.state_size, n_layers=FLAGS.n_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n)
def main(_): model_path = os.path.join('model', FLAGS.name) # 保存模型的路径 if os.path.exists(model_path) is False: os.makedirs(model_path) # 用codecs提供的open方法来指定打开的文件的语言编码,它会在读取的时候自动转换为内部unicode with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() # 读取训练的文本 converter = TextConverter(text, FLAGS.max_vocab) # 转换text文本格式 converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) # 转换text为数组 g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) # 批生成 print(converter.vocab_size) model = CharRNN(converter.vocab_size, # 读取模型 num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size ) model.train(g, # 训练 FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join('model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() tokenizer = Tokenizer(text, FLAGS.num_words) tokenizer.save_to_file(os.path.join(model_path, 'tokenizer.pkl')) arr = tokenizer.texts_to_sequences(text) batch = batch_generator(arr, FLAGS.batch_size, FLAGS.num_steps) print(tokenizer.vocab_size) model = CharRNN(tokenizer.vocab_size, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps, n_neurons=FLAGS.n_neurons, n_layers=FLAGS.n_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, embedding=FLAGS.embedding, embedding_size=FLAGS.embedding_size ) model.train(batch, FLAGS.n_iterations, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): script_path = os.path.abspath(os.path.dirname(__file__)) model_path = os.path.join(script_path, 'model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() print("corpus size " + str(len(text))) if os.path.exists(FLAGS.whitelist_file): with codecs.open(FLAGS.whitelist_file, encoding='utf-8') as f: whitelist = f.read() text = remove_non_matching_chars(text, whitelist) converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join('model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) print(converter.vocab_size) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size ) model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join("model", FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) with open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() converter = TextConverter(text=text, filename=None, max_vocab=FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text=text) print(converter.vocab_size()) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) model = CharRNN(num_class=converter.vocab_size(), num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learn_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( batch_generate=g, max_steps=FLAGS.max_steps, save_path=model_path, save_per_n=FLAGS.save_per_n, print_per_n=FLAGS.print_per_n, )
def main(_): model_path = os.path.join('model', FLAGS.name) print(model_path) if os.path.exists(model_path) is False: os.makedirs(model_path) path_exist = False else: path_exist = True with codecs.open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) print(converter.vocab_size) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size ) model_file_path = tf.train.latest_checkpoint(model_path) if path_exist: model.load(model_file_path) indexes = [] for dirpath, dirnames, filenames in os.walk(model_path): for name in filenames: filepath = os.path.join(dirpath, name) if filepath.endswith(".index"): indexes.append(int(name[6:-6])) indexes.sort() last_index = indexes[-1] model.step = last_index model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def train(model: CharRNN, optimizer: optim.Optimizer, criterion, inputs, targets): model.train() optimizer.zero_grad() hidden = None total_loss = 0 for i in range(inputs.shape[0]): output, hidden = model(inputs[i].unsqueeze(0).unsqueeze(0).float(), hidden) loss = criterion(output.squeeze(0), targets[i].unsqueeze(0).long()) total_loss += loss total_loss.backward() optimizer.step() return output, total_loss.item() / inputs.shape[0]
def run(): model_path = os.path.join(args.model_path, args.model_name) if os.path.exists(model_path) is False: os.makedirs(model_path) dict_path = os.path.join(model_path, 'dictionary.txt') word2id_path = os.path.join(model_path, 'word2id.pkl') feature_path = os.path.join(model_path, 'feature.npy') label_path = os.path.join(model_path, 'label.npy') print("data_path " + args.data_path) print("model_path " + model_path) print("dict_path " + dict_path) print("word2id_path " + word2id_path) print("feature_path " + feature_path) print("label_path " + label_path) data_maker = MakeData(raw_path=args.data_path, dict_path=dict_path, word2id_path=word2id_path, feature_path=feature_path, label_path=label_path, low_frequency=0) data_maker.pretreatment_data() data_maker.delete_low_frequency() file = open(args.data_path, 'r', encoding='utf-8') content = file.read() file.close() text_array = data_maker.text2array(content) data_maker.save_dictionary() data_maker.make_data(text_array, args.batch_size, args.steps) model = CharRNN(vocab_size=data_maker.vocab_size, feature_path=feature_path, label_path=label_path, lstm_size=args.lstm_size, dropout_rate=args.dropout_rate, embedding_size=args.embedding_size) model.train(data_stream=data_stream(feature_path, label_path, data_maker.vocab_size), epochs=args.epochs, model_path=os.path.join(model_path, 'checkpoint'))
def main(_): if os.path.exists(FLAGS.model_name) is False: os.mkdir(FLAGS.model_name) converter = TextTransform(FLAGS.input_file, FLAGS.max_vocab) converter.save_to_file(os.path.join(FLAGS.model_name, 'converter.pkl')) arr = converter.text_to_arr(converter.text) g = batch_generate(arr, FLAGS.num_seqs, FLAGS.num_steps, FLAGS.epoch_size) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train(g, FLAGS.model_name, FLAGS.log_every_n)
def main(_): model_path = os.path.join('model', FLAGS.name) if not os.path.exists(model_path): os.makedirs(model_path) with open(FLAGS.input_file_path, 'r', encoding='utf-8') as f: text = f.read() tc = util.TextConverter(text, FLAGS.max_vocab) tc.save_vocab(os.path.join('vocab', FLAGS.name)) output_size = tc.vocab_size batch_generator = util.batch_generator(tc.text_to_arr(text), FLAGS.batch_size, FLAGS.seq_size) model = CharRNN(output_size=output_size, batch_size=FLAGS.batch_size, seq_size=FLAGS.seq_size, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob) model.train(batch_generator, max_steps=FLAGS.max_steps, model_save_path=model_path, save_with_steps=FLAGS.save_every_n_steps, log_with_steps=FLAGS.log_every_n_steps)
def main(_): ## 对数据进行预处理。调用read_utils.py模块中的文本转换类TextConverter,获取经过频数挑选的字符并且得到相应的index。 ## 然后调用batch_generator函数得到一个batch生成器。 model_path = os.path.join('model', FLAGS.name) # 路径拼接 print("模型保存位置: ", model_path) if os.path.exists(model_path) is False: os.makedirs(model_path) # 递归创建目录 # Python读取文件中的汉字方法:导入codecs,添加encoding='utf-8' with codecs.open(FLAGS.input_file, encoding='utf-8') as f: print("建模训练数据来源", FLAGS.input_file) text = f.read() # 返回一个词典文件 converter = TextConverter(text, FLAGS.max_vocab) # 将经过频数挑选的字符序列化保存 converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) #得到每个字符的index g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) # 得到一个batch生长期 print(converter.vocab_size) # 打印字符数量 ## 数据处理完毕后,调用model.py模块的CharRNN类构造循环神经网络,最后调用train()函数对神经网络进行训练 model = CharRNN(converter.vocab_size, #字符分类的数量 num_seqs=FLAGS.num_seqs, #一个batch中的序列数 num_steps=FLAGS.num_steps, #一个序列中的字符数 lstm_size=FLAGS.lstm_size, #每个cell的节点数量 num_layers=FLAGS.num_layers, #RNN的层数 learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size ) model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join('model', FLAGS.name) arr, converter = initialize_converter(model_path) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) model = CharRNN( num_classes=converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size, text_converter=converter ) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n )
class Trainer(object): def __init__(self, args): self.args = args self.device = torch.device('cuda' if self.args.cuda else 'cpu') self.convert = None self.model = None self.optimizer = None self.criterion = self.get_loss self.meter = AverageValueMeter() self.train_loader = None self.get_data() self.get_model() self.get_optimizer() def get_data(self): self.convert = TextConverter(self.args.txt, max_vocab=self.args.max_vocab) dataset = TextDataset(self.args.txt, self.args.len, self.convert.text_to_arr) self.train_loader = DataLoader(dataset, self.args.batch_size, shuffle=True, num_workers=self.args.num_workers) def get_model(self): self.model = CharRNN(self.convert.vocab_size, self.args.embed_dim, self.args.hidden_size, self.args.num_layers, self.args.dropout, self.args.cuda).to(self.device) if self.args.cuda: cudnn.benchmark = True def get_optimizer(self): optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.lr) self.optimizer = ScheduledOptim(optimizer) @staticmethod def get_loss(score, label): return nn.CrossEntropyLoss()(score, label.view(-1)) def save_checkpoint(self, epoch): if (epoch + 1) % self.args.save_interval == 0: model_out_path = self.args.save_file + "epoch_{}_model.pth".format( epoch + 1) torch.save(self.model, model_out_path) print("Checkpoint saved to {}".format(model_out_path)) def save(self): model_out_path = self.args.save_file + "final_model.pth" torch.save(self.model, model_out_path) print("Final model saved to {}".format(model_out_path)) @staticmethod def pick_top_n(predictions, top_n=5): top_predict_prob, top_predict_label = torch.topk(predictions, top_n, 1) top_predict_prob /= torch.sum(top_predict_prob) top_predict_prob = top_predict_prob.squeeze(0).cpu().numpy() top_predict_label = top_predict_label.squeeze(0).cpu().numpy() c = np.random.choice(top_predict_label, size=1, p=top_predict_prob) return c def train(self): self.meter.reset() self.model.train() for x, y in tqdm(self.train_loader): y = y.long() x, y = x.to(self.device), y.to(self.device) # Forward. score, _ = self.model(x) loss = self.criterion(score, y) # Backward. self.optimizer.zero_grad() loss.backward() # Clip gradient. nn.utils.clip_grad_norm_(self.model.parameters(), 5) self.optimizer.step() self.meter.add(loss.item()) print('perplexity: {}'.format(np.exp(self.meter.value()[0]))) def test(self): self.model.eval() begin = np.array([i for i in self.args.begin]) begin = np.random.choice(begin, size=1) text_len = self.args.predict_len samples = [self.convert.word_to_int(c) for c in begin] input_txt = torch.LongTensor(samples)[None] input_txt = input_txt.to(self.device) _, init_state = self.model(input_txt) result = samples model_input = input_txt[:, -1][:, None] with torch.no_grad(): for i in range(text_len): out, init_state = self.model(model_input, init_state) prediction = self.pick_top_n(out.data) model_input = torch.LongTensor(prediction)[None].to( self.device) result.append(prediction[0]) print(self.convert.arr_to_text(result)) def predict(self): self.model.eval() samples = [self.convert.word_to_int(c) for c in self.args.begin] input_txt = torch.LongTensor(samples)[None].to(self.device) _, init_state = self.model(input_txt) result = samples model_input = input_txt[:, -1][:, None] with torch.no_grad(): for i in range(self.args.predict_len): out, init_state = self.model(model_input, init_state) prediction = self.pick_top_n(out.data) model_input = torch.LongTensor(prediction)[None].to( self.device) result.append(prediction[0]) print(self.convert.arr_to_text(result)) def run(self): for e in range(self.args.max_epoch): print('===> EPOCH: {}/{}'.format(e + 1, self.args.max_epoch)) self.train() self.test() self.save_checkpoint(e) self.save()
def train(opt, x_train, x_val, dictionary_len): ''' Training a network Arguments --------- net: CharRNN network data: training data to train the network (text) epochs: Number of epochs to train batch_size: Number of mini-sequences per mini-batch, aka batch size seq_length: Number of character steps per mini-batch lr: learning rate clip: gradient clipping val_frac: Fraction of data to hold out for validation print_every: Number of steps for printing training and validation loss ''' torch.manual_seed(0) np.random.seed(0) random.seed(0) # Declaring the hyperparameters batch_size = opt.batch_size seq_length = int(opt.seq_length) epochs = 50 if torch.cuda.is_available(): device = "cuda" torch.cuda.manual_seed_all(0) else: device = "cpu" print(device) date = datetime.now().strftime('%y%m%d%H%M%S') if opt.nologs: writer = SummaryWriter(log_dir=f'logs/nologs/') else: writer = SummaryWriter(log_dir=f'logs/logs_{date}/') y_train = get_labels_text_prediction(x_train) train_dataset = TextDataset(x_train, y_train, max_len=seq_length) if not opt.onlytrain: y_val = get_labels_text_prediction(x_val) val_dataset = TextDataset(x_val, y_val, max_len=seq_length) val_loader = DataLoader(dataset=val_dataset, pin_memory=device == 'cuda', batch_size=batch_size, shuffle=False) train_loader = DataLoader(dataset=train_dataset, pin_memory=device == 'cuda', batch_size=batch_size, shuffle=True) model_params = { 'dictionary_len': dictionary_len, 'dropout': opt.dropout, 'hidden_size': opt.hidden_size, 'layers': opt.layers, 'embedding_len': 32, 'device': device, 'lr': opt.lr } model = CharRNN(**model_params).to(device) print(model) # embed() # summary(model, input_size=(channels, H, W)) # summary(model, input_size=(dictionary_len, 28, 28)) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.scheduler: scheduler = ReduceLROnPlateau(optimizer, 'min', cooldown=3, factor=0.5, patience=10) global_step = 0 for j in trange(epochs, desc='T raining LSTM...'): for i, (x, y) in enumerate(train_loader): if i == len(train_loader) - 1: print("FER PADDING - DE MOMENT NO VA") continue model.train() x = x.to(device) y = y.to(device) # state_h, state_c = model.zero_state(opt.batch_size) # # Transfer data to GPU # state_h = state_h.to(device) # state_c = state_c.to(device) # DELETE PAST GRADIENTS optimizer.zero_grad() # FORWARD PASS --> ultim state , (tots) [ state_h[-1] == pred ] pred, (state_h, state_c) = model(x) # pred, (state_h, state_c) = model(x, (state_h, state_c)) # CALCULATE LOSS # pred = pred.transpose(1, 2) pred2 = pred.view(-1, dictionary_len) y2 = y.view(-1) loss = criterion(pred2, y2) loss_value = loss.item() # BACKWARD PASS loss.backward() # MINIMIZE LOSS optimizer.step() global_step += 1 if i % 100 == 0: writer.add_scalar('train/loss', loss_value, global_step) print('[Training epoch {}: {}/{}] Loss: {}'.format( j, i, len(train_loader), loss_value)) if not opt.onlytrain: val_loss = [] for i, (x, y) in enumerate(val_loader): if i == len(val_loader) - 1: # print("FER PADDING - DE MOMENT NO VA") continue model.eval() x = x.to(device) y = y.to(device) # state_h, state_c = model.zero_state(opt.batch_size) # state_h = state_h.to(device) # state_c = state_c.to(device) # NO BACKPROPAGATION # FORWARD PASS # pred, (state_h, state_c) = model(x, (state_h, state_c)) pred, (state_h, state_c) = model(x) # CALCULATE LOSS # pred = pred.transpose(1, 2) # pred = [batch x 40 x diccionary_len] # y = [batch x 40] pred2 = pred.view(-1, dictionary_len) y2 = y.view(-1) loss = criterion(pred2, y2) # loss = criterion(pred, y) val_loss.append(loss.item()) if i % 50 == 0: print('[Validation epoch {}: {}/{}] Loss: {}'.format( j, i, len(val_loader), loss.item())) writer.add_scalar('val/loss', np.mean(val_loss), j) if opt.scheduler: scheduler.step(np.mean(val_loss)) writer.add_scalar("lr", optimizer.param_groups[0]["lr"], j) predicted_words = inference_prediction(model, device, 500) # output = pred[0].unsqueeze(0) # [1,diccionary_len, 40] # predicted_words = do_inference_test(output, model, device) print(predicted_words) writer.add_text('val/Generated_Samples', predicted_words, j) checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } # if j % 5 == 0: os.makedirs("weights/{}".format(date), exist_ok=True) torch.save(checkpoint, "weights/{}/checkpoint_{}.pt".format(date, j))