parser.add_argument('--smooth_weight', default=0.5, type=float, help="Smooth weight for loss") parser.add_argument('--mask_weight', default=0.5, type=float, help="Explainability mask weight for regularization") parser.add_argument( '--exp_regularization_weight', default=0.0, type=float, help="Weights for the regularization term for the exp mask") if __name__ == '__main__': seed = 8964 tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) args = parser.parse_args() if not os.path.exists(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) args = vars(args) args['seed'] = seed model = Model(**args) model.train()
print('quantile 25 percent: %.2f' % np.quantile(cc, 0.25)) print('quantile 50 percent: %.2f' % np.quantile(cc, 0.50)) print('quantile 75 percent: %.2f' % np.quantile(cc, 0.75)) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) sampler = WarpSampler(user_train, usernum, itemnum, batch_size=args.batch_size, maxlen=args.maxlen, n_workers=3) model = Model(usernum, itemnum, args) #sess.run(tf.global_variables_initializer()) aug_data_signature = './aug_data/{}/lr_{}_maxlen_{}_hsize_{}_nblocks_{}_drate_{}_l2_{}_nheads_{}_gen_num_{}_M_{}'.format( args.dataset, args.lr, args.maxlen, args.hidden_units, args.num_blocks, args.dropout_rate, args.l2_emb, args.num_heads, args.reversed_gen_number, args.M) print(aug_data_signature) model_signature = 'lr_{}_maxlen_{}_hsize_{}_nblocks_{}_drate_{}_l2_{}_nheads_{}_gen_num_{}'.format( args.lr, args.maxlen, args.hidden_units, args.num_blocks, args.dropout_rate, args.l2_emb, args.num_heads, 5) if not os.path.isdir('./aug_data/' + args.dataset): os.mkdir('./aug_data/' + args.dataset)
import utils trial_data = utils.load_data('./data/trial-data-processed.json') train_data = utils.load_data('./data/train-data-processed.json') dev_data = utils.load_data('./data/dev-data-processed.json') test_data = utils.load_data('./data/test-data-processed.json') utils.build_vocab(trial_data + train_data + dev_data + test_data) build_vocab() train_data = load_data('./data/train-data-processed.json') train_data += load_data('./data/trial-data-processed.json') dev_data = load_data('./data/dev-data-processed.json') if args.test_mode: # use validation data as training data train_data += dev_data dev_data = [] model = Model(args) best_dev_acc = 0.0 os.makedirs('./checkpoint', exist_ok=True) checkpoint_path = './checkpoint/%d-%s.mdl' % (args.seed, datetime.now().isoformat()) print('Trained model will be saved to %s' % checkpoint_path) for i in range(args.epoch): print('Epoch %d...' % i) if i == 0: dev_acc = model.evaluate(dev_data) print('Dev accuracy: %f' % dev_acc) start_time = time.time() np.random.shuffle(train_data) cur_train_data = train_data
predicted_users_num = 0 print("test sub items") for _, uij in DataInputTest(test_set, predict_batch_size): if predicted_users_num >= predict_users_num: break score_ = model.test(sess, uij) score_arr.append(score_) predicted_users_num += predict_batch_size return score_[0] tf.reset_default_graph() gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: model = Model(user_count, item_count, cate_count, cate_list, predict_batch_size, predict_ads_num) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print('test_gauc: %.4f\t test_auc: %.4f' % _eval(sess, model)) sys.stdout.flush() lr = 1.0 start_time = time.time() for _ in range(50): random.shuffle(train_set) epoch_size = round(len(train_set) / train_batch_size) loss_sum = 0.0 for _, uij in DataInput(train_set, train_batch_size): loss = model.train(sess, uij, lr)
def add_nml_entry(self, name, str, attributes): self.project_info.append( Model(str.strip(), attributes, diag_specific_model=False))
# Celda de todos # https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py import gym import numpy as np env = gym.make('CartPole-v1') from memory import Memory from tqdm import tqdm from model import Model model = Model() memory = Memory() games = 1000 average_steps = [] # Play random for game in range(games): observation = env.reset() plays = [] actions = [] step = 0 while True: step += 1 action = env.action_space.sample() plays.append(observation) actions.append(action) next_observation, reward, done, _ = env.step(action) if done: memory.addGame(plays, actions) break average_steps.append(step)
auc_sum += auc_ * len(uij[0]) test_gauc = auc_sum / len(test_set) Auc = calc_auc(score_arr) global best_auc if best_auc < test_gauc: best_auc = test_gauc model.save(sess, 'D://data/tf/din/ckpt') return test_gauc, Auc with tf.Session() as sess: model = Model(user_count,item_count,cate_count,cate_list) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) lr = 1.0 start_time = time.time() for _ in range(50): random.shuffle(train_set) epoch_size = round(len(train_set)/ train_batch_size) loss_sum = 0.0
def synthesize(session_config): Hp.numgpus = 1 # Load data # transcripts char2idx, idx2char = data.load_vocab() lines = codecs.open(Hp.synthes_data_text, 'r', 'utf-8').readlines()[1:] # skip the first head line # skip the first field(number), text normalization, E: EOS lines_normalize = [ data.text_normalize(line.split(" ", 1)[-1]).strip() + u"␃" for line in lines ] transcripts = np.zeros((len(lines), Hp.num_charac), np.int32) for i, line in enumerate(lines_normalize): transcripts[i, :len(line)] = [char2idx[char] for char in line] # tile each transcript to a batch and the batch_size is the number of ref kinds (16) transcripts_num = transcripts.shape[0] # num of transcripts transcripts = np.tile(transcripts, (1, Hp.synthes_batch_size)) transcripts = transcripts.reshape(transcripts_num, Hp.synthes_batch_size, Hp.num_charac) # ref audios mels, maxlen = [], 0 files = [ os.path.join(Hp.synthes_ref_audio_dir, x) for x in os.listdir(Hp.synthes_ref_audio_dir) ] for f_path in files: _, mel, _ = signal_process.load_spectrograms(f_path) #mel = np.reshape(mel, (-1, Hp.num_mels)) maxlen = max(maxlen, mel.shape[0]) mels.append(mel) assert len(mels) == Hp.synthes_batch_size ref = np.zeros((len(mels), maxlen, Hp.num_mels * Hp.reduction_factor), np.float32) for i, m in enumerate(mels): ref[i, :m.shape[0], :] = m ref_lens = np.ones((len(mels), 1), np.int32) * maxlen speaker = np.ones((len(mels), 1), np.int32) # Load Graph model = Model(mode="synthes") print("Synthesize Graph Loaded") saver = tf.train.Saver() save_sample_dir = os.path.join(Hp.logdir, "synthesize") if not os.path.exists(save_sample_dir): os.mkdir(save_sample_dir) with tf.Session(config=session_config) as sess: latest_model = tf.train.latest_checkpoint( os.path.join(Hp.logdir, "models")) if Hp.restore_model is not None and Hp.restore_model != latest_model: print("Restore Model from Specific Model") restore_model = Hp.restore_model else: print("Restore Model from Last Checkpoint") restore_model = latest_model saver.restore(sess, restore_model) for text_idx in range(transcripts_num): mag_hats, aligns = sess.run( [model.mag_hat, model.alignments], { model.inputs_transcript[0]: transcripts[text_idx], model.inputs_reference[0]: ref, model.inputs_ref_lens[0]: ref_lens, model.inputs_speaker[0]: speaker }) save_sample_path = os.path.join(save_sample_dir, "sample_{}".format(text_idx + 1)) if not os.path.exists(save_sample_path): os.mkdir(save_sample_path) for i in range(len(mag_hats)): wav_hat = signal_process.spectrogrom2wav(mag_hats[i]) write( os.path.join(save_sample_path, 'style_{}.wav'.format(i + 1)), Hp.sample_rate, wav_hat) signal_process.plot_alignment(aligns[i], gs=i + 1, mode="save_fig", path=save_sample_path) print("Done! Synthesize for sample {}".format(text_idx + 1)) print("All jobs Done!")
def train(opt): plotDir = os.path.join(opt.exp_dir,opt.exp_name,'plots') if not os.path.exists(plotDir): os.makedirs(plotDir) lib.print_model_settings(locals().copy()) """ dataset preparation """ if not opt.data_filtering_off: print('Filtering the images containing characters which are not in opt.character') print('Filtering the images whose label is longer than opt.batch_max_length') # see https://github.com/clovaai/deep-text-recognition-benchmark/blob/6593928855fb7abb999a99f428b3e4477d4ae356/dataset.py#L130 opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') #considering the real images for discriminator opt.batch_size = opt.batch_size*2 train_dataset = Batch_Balanced_Dataset(opt) log = open(os.path.join(opt.exp_dir,opt.exp_name,'log_dataset.txt'), 'a') AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) valid_dataset, valid_dataset_log = hierarchical_dataset(root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle=False, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) log.write(valid_dataset_log) print('-' * 80) log.write('-' * 80 + '\n') log.close() """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = AdaINGenV2(opt) ocrModel = Model(opt) disModel = MsImageDis(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for currModel in [model, ocrModel, disModel]: for name, param in currModel.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU ocrModel = torch.nn.DataParallel(ocrModel).to(device) if not opt.ocrFixed: ocrModel.train() else: ocrModel.module.Transformation.eval() ocrModel.module.FeatureExtraction.eval() ocrModel.module.AdaptiveAvgPool.eval() # ocrModel.module.SequenceModeling.eval() ocrModel.module.Prediction.eval() model = torch.nn.DataParallel(model).to(device) model.train() disModel = torch.nn.DataParallel(disModel).to(device) disModel.train() #loading pre-trained model if opt.saved_ocr_model != '' and opt.saved_ocr_model != 'None': print(f'loading pretrained ocr model from {opt.saved_ocr_model}') if opt.FT: ocrModel.load_state_dict(torch.load(opt.saved_ocr_model), strict=False) else: ocrModel.load_state_dict(torch.load(opt.saved_ocr_model)) print("OCRModel:") print(ocrModel) if opt.saved_synth_model != '' and opt.saved_synth_model != 'None': print(f'loading pretrained synth model from {opt.saved_synth_model}') if opt.FT: model.load_state_dict(torch.load(opt.saved_synth_model), strict=False) else: model.load_state_dict(torch.load(opt.saved_synth_model)) print("SynthModel:") print(model) if opt.saved_dis_model != '' and opt.saved_dis_model != 'None': print(f'loading pretrained discriminator model from {opt.saved_dis_model}') if opt.FT: disModel.load_state_dict(torch.load(opt.saved_dis_model), strict=False) else: disModel.load_state_dict(torch.load(opt.saved_dis_model)) print("DisModel:") print(disModel) """ setup loss """ if 'CTC' in opt.Prediction: ocrCriterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: ocrCriterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) # ignore [GO] token = ignore index 0 recCriterion = torch.nn.L1Loss() styleRecCriterion = torch.nn.L1Loss() # loss averager loss_avg_ocr = Averager() loss_avg = Averager() loss_avg_dis = Averager() loss_avg_ocrRecon_1 = Averager() loss_avg_ocrRecon_2 = Averager() loss_avg_gen = Averager() loss_avg_imgRecon = Averager() loss_avg_styRecon = Averager() ##---------------------------------------## # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.optim=='adam': optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, opt.beta2), weight_decay=opt.weight_decay) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps, weight_decay=opt.weight_decay) print("SynthOptimizer:") print(optimizer) # filter that only require gradient decent gen_filtered_parameters = [] gen_params_num = [] # for p in filter(lambda p: p.requires_grad, model.parameters()): for name, p in model.named_parameters(): if p.requires_grad and not('enc_style' in name): gen_filtered_parameters.append(p) gen_params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(gen_params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.optim=='adam': gen_optimizer = optim.Adam(gen_filtered_parameters, lr=opt.lr, betas=(opt.beta1, opt.beta2), weight_decay=opt.weight_decay) else: gen_optimizer = optim.Adadelta(gen_filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps, weight_decay=opt.weight_decay) print("GenOptimizer:") print(gen_optimizer) #filter parameters for OCR training ocr_filtered_parameters = [] ocr_params_num = [] for p in filter(lambda p: p.requires_grad, ocrModel.parameters()): ocr_filtered_parameters.append(p) ocr_params_num.append(np.prod(p.size())) print('OCR Trainable params num : ', sum(ocr_params_num)) # setup optimizer if opt.optim=='adam': ocr_optimizer = optim.Adam(ocr_filtered_parameters, lr=opt.lr, betas=(opt.beta1, opt.beta2), weight_decay=opt.weight_decay) else: ocr_optimizer = optim.Adadelta(ocr_filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps, weight_decay=opt.weight_decay) print("OCROptimizer:") print(ocr_optimizer) #filter parameters for OCR training dis_filtered_parameters = [] dis_params_num = [] for p in filter(lambda p: p.requires_grad, disModel.parameters()): dis_filtered_parameters.append(p) dis_params_num.append(np.prod(p.size())) print('Dis Trainable params num : ', sum(dis_params_num)) # setup optimizer if opt.optim=='adam': dis_optimizer = optim.Adam(dis_filtered_parameters, lr=opt.lr, betas=(opt.beta1, opt.beta2), weight_decay=opt.weight_decay) else: dis_optimizer = optim.Adadelta(dis_filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps, weight_decay=opt.weight_decay) print("DisOptimizer:") print(dis_optimizer) ##---------------------------------------## """ final options """ with open(os.path.join(opt.exp_dir,opt.exp_name,'opt.txt'), 'a') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.saved_synth_model != '': try: start_iter = int(opt.saved_synth_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') except: pass #get schedulers scheduler = get_scheduler(optimizer,opt) ocr_scheduler = get_scheduler(ocr_optimizer,opt) dis_scheduler = get_scheduler(dis_optimizer,opt) gen_scheduler = get_scheduler(gen_optimizer,opt) start_time = time.time() best_accuracy = -1 best_norm_ED = -1 best_accuracy_ocr = -1 best_norm_ED_ocr = -1 iteration = start_iter cntr=0 while(True): # train part # pdb.set_trace() if opt.lr_policy !="None": scheduler.step() ocr_scheduler.step() dis_scheduler.step() gen_scheduler.step() image_tensors_all, labels_1_all, labels_2_all = train_dataset.get_batch() # ## comment # pdb.set_trace() # for imgCntr in range(image_tensors.shape[0]): # save_image(tensor2im(image_tensors[imgCntr]),'temp/'+str(imgCntr)+'.png') # pdb.set_trace() # ### # print(cntr) cntr+=1 disCnt = int(image_tensors_all.size(0)/2) image_tensors, image_tensors_real, labels_gt, labels_2 = image_tensors_all[:disCnt], image_tensors_all[disCnt:disCnt+disCnt], labels_1_all[:disCnt], labels_2_all[:disCnt] image = image_tensors.to(device) image_real = image_tensors_real.to(device) batch_size = image.size(0) ##-----------------------------------## #generate text(labels) from ocr.forward if opt.ocrFixed: # ocrModel.eval() length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = ocrModel(image, text_for_pred) preds = preds[:, :text_for_loss.shape[1] - 1, :] preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) labels_1 = converter.decode(preds_index.data, preds_size.data) else: preds = ocrModel(image, text_for_pred, is_train=False) _, preds_index = preds.max(2) labels_1 = converter.decode(preds_index, length_for_pred) for idx, pred in enumerate(labels_1): pred_EOS = pred.find('[s]') labels_1[idx] = pred[:pred_EOS] # prune after "end of sentence" token ([s]) # ocrModel.train() else: labels_1 = labels_gt ##-----------------------------------## text_1, length_1 = converter.encode(labels_1, batch_max_length=opt.batch_max_length) text_2, length_2 = converter.encode(labels_2, batch_max_length=opt.batch_max_length) #forward pass from style and word generator images_recon_1, images_recon_2, style = model(image, text_1, text_2) if 'CTC' in opt.Prediction: if not opt.ocrFixed: #ocr training with orig image preds_ocr = ocrModel(image, text_1) preds_size_ocr = torch.IntTensor([preds_ocr.size(1)] * batch_size) preds_ocr = preds_ocr.log_softmax(2).permute(1, 0, 2) ocrCost_train = ocrCriterion(preds_ocr, text_1, preds_size_ocr, length_1) #content loss for reconstructed images preds_1 = ocrModel(images_recon_1, text_1) preds_size_1 = torch.IntTensor([preds_1.size(1)] * batch_size) preds_1 = preds_1.log_softmax(2).permute(1, 0, 2) preds_2 = ocrModel(images_recon_2, text_2) preds_size_2 = torch.IntTensor([preds_2.size(1)] * batch_size) preds_2 = preds_2.log_softmax(2).permute(1, 0, 2) ocrCost_1 = ocrCriterion(preds_1, text_1, preds_size_1, length_1) ocrCost_2 = ocrCriterion(preds_2, text_2, preds_size_2, length_2) # ocrCost = 0.5*( ocrCost_1 + ocrCost_2 ) else: if not opt.ocrFixed: #ocr training with orig image preds_ocr = ocrModel(image, text_1[:, :-1]) # align with Attention.forward target_ocr = text_1[:, 1:] # without [GO] Symbol ocrCost_train = ocrCriterion(preds_ocr.view(-1, preds_ocr.shape[-1]), target_ocr.contiguous().view(-1)) #content loss for reconstructed images preds_1 = ocrModel(images_recon_1, text_1[:, :-1], is_train=False) # align with Attention.forward target_1 = text_1[:, 1:] # without [GO] Symbol preds_2 = ocrModel(images_recon_2, text_2[:, :-1], is_train=False) # align with Attention.forward target_2 = text_2[:, 1:] # without [GO] Symbol ocrCost_1 = ocrCriterion(preds_1.view(-1, preds_1.shape[-1]), target_1.contiguous().view(-1)) ocrCost_2 = ocrCriterion(preds_2.view(-1, preds_2.shape[-1]), target_2.contiguous().view(-1)) # ocrCost = 0.5*(ocrCost_1+ocrCost_2) if not opt.ocrFixed: #training OCR ocrModel.zero_grad() ocrCost_train.backward() # torch.nn.utils.clip_grad_norm_(ocrModel.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) ocr_optimizer.step() #if ocr is fixed; ignore this loss loss_avg_ocr.add(ocrCost_train) else: loss_avg_ocr.add(torch.tensor(0.0)) #Domain discriminator: Dis update disCost = opt.disWeight*0.5*(disModel.module.calc_dis_loss(images_recon_1.detach(), image_real) + disModel.module.calc_dis_loss(images_recon_2.detach(), image)) disModel.zero_grad() disCost.backward() # torch.nn.utils.clip_grad_norm_(disModel.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) dis_optimizer.step() loss_avg_dis.add(disCost) # #[Style Encoder] + [Word Generator] update #Adversarial loss disGenCost = 0.5*(disModel.module.calc_gen_loss(images_recon_1)+disModel.module.calc_gen_loss(images_recon_2)) #Input reconstruction loss recCost = recCriterion(images_recon_1,image) #OCR Content cost ocrCost = 0.5*(ocrCost_1+ocrCost_2) cost = opt.ocrWeight*ocrCost + opt.reconWeight*recCost + opt.disWeight*disGenCost model.zero_grad() ocrModel.zero_grad() disModel.zero_grad() cost.backward(retain_graph=True) # torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() #Pair style reconstruction loss if opt.styleReconWeight == 0.0: styleRecCost = torch.tensor(0.0) else: # if opt.styleDetach: # styleRecCost = styleRecCriterion(model(images_recon_2, None, None, styleFlag=True), style.detach()) # else: # styleRecCost = styleRecCriterion(model(images_recon_2, None, None, styleFlag=True), style) # with torch.no_grad(): predStyle = model(images_recon_2, None, None, styleFlag=True) styleRecCost = styleRecCriterion(predStyle, style.detach()) model.zero_grad() styleRecCost.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) gen_optimizer.step() loss_avg.add(cost) #Individual losses loss_avg_ocrRecon_1.add(opt.ocrWeight*0.5*ocrCost_1) loss_avg_ocrRecon_2.add(opt.ocrWeight*0.5*ocrCost_2) loss_avg_gen.add(opt.disWeight*disGenCost) loss_avg_imgRecon.add(opt.reconWeight*recCost) loss_avg_styRecon.add(styleRecCost) # validation part if (iteration + 1) % opt.valInterval == 0 or iteration == 0: # To see training progress, we also conduct validation when 'iteration == 0' #Save training images os.makedirs(os.path.join(opt.exp_dir,opt.exp_name,'trainImages',str(iteration)), exist_ok=True) for trImgCntr in range(batch_size): try: save_image(tensor2im(image[trImgCntr].detach()),os.path.join(opt.exp_dir,opt.exp_name,'trainImages',str(iteration),str(trImgCntr)+'_input_'+labels_gt[trImgCntr]+'.png')) save_image(tensor2im(images_recon_1[trImgCntr].detach()),os.path.join(opt.exp_dir,opt.exp_name,'trainImages',str(iteration),str(trImgCntr)+'_recon_'+labels_1[trImgCntr]+'.png')) save_image(tensor2im(images_recon_2[trImgCntr].detach()),os.path.join(opt.exp_dir,opt.exp_name,'trainImages',str(iteration),str(trImgCntr)+'_pair_'+labels_2[trImgCntr]+'.png')) except: print('Warning while saving training image') elapsed_time = time.time() - start_time # for log with open(os.path.join(opt.exp_dir,opt.exp_name,'log_train.txt'), 'a') as log: model.eval() ocrModel.module.Transformation.eval() ocrModel.module.FeatureExtraction.eval() ocrModel.module.AdaptiveAvgPool.eval() ocrModel.module.SequenceModeling.eval() ocrModel.module.Prediction.eval() disModel.eval() with torch.no_grad(): valid_loss, current_accuracy, current_norm_ED, preds, confidence_score, labels, infer_time, length_of_data = validation_synth_lrw_res( iteration, model, ocrModel, disModel, recCriterion, styleRecCriterion, ocrCriterion, valid_loader, converter, opt) model.train() if not opt.ocrFixed: ocrModel.train() else: # ocrModel.module.Transformation.eval() # ocrModel.module.FeatureExtraction.eval() # ocrModel.module.AdaptiveAvgPool.eval() ocrModel.module.SequenceModeling.train() # ocrModel.module.Prediction.eval() disModel.train() # training loss and validation loss loss_log = f'[{iteration+1}/{opt.num_iter}] Train OCR loss: {loss_avg_ocr.val():0.5f}, Train Synth loss: {loss_avg.val():0.5f}, Train Dis loss: {loss_avg_dis.val():0.5f}, Valid OCR loss: {valid_loss[0]:0.5f}, Valid Synth loss: {valid_loss[1]:0.5f}, Valid Dis loss: {valid_loss[2]:0.5f}, Elapsed_time: {elapsed_time:0.5f}' current_model_log_ocr = f'{"Current_accuracy_OCR":17s}: {current_accuracy[0]:0.3f}, {"Current_norm_ED_OCR":17s}: {current_norm_ED[0]:0.2f}' current_model_log_1 = f'{"Current_accuracy_recon":17s}: {current_accuracy[1]:0.3f}, {"Current_norm_ED_recon":17s}: {current_norm_ED[1]:0.2f}' current_model_log_2 = f'{"Current_accuracy_pair":17s}: {current_accuracy[2]:0.3f}, {"Current_norm_ED_pair":17s}: {current_norm_ED[2]:0.2f}' #plotting lib.plot.plot(os.path.join(plotDir,'Train-OCR-Loss'), loss_avg_ocr.val().item()) lib.plot.plot(os.path.join(plotDir,'Train-Synth-Loss'), loss_avg.val().item()) lib.plot.plot(os.path.join(plotDir,'Train-Dis-Loss'), loss_avg_dis.val().item()) lib.plot.plot(os.path.join(plotDir,'Train-OCR-Recon1-Loss'), loss_avg_ocrRecon_1.val().item()) lib.plot.plot(os.path.join(plotDir,'Train-OCR-Recon2-Loss'), loss_avg_ocrRecon_2.val().item()) lib.plot.plot(os.path.join(plotDir,'Train-Gen-Loss'), loss_avg_gen.val().item()) lib.plot.plot(os.path.join(plotDir,'Train-ImgRecon1-Loss'), loss_avg_imgRecon.val().item()) lib.plot.plot(os.path.join(plotDir,'Train-StyRecon2-Loss'), loss_avg_styRecon.val().item()) lib.plot.plot(os.path.join(plotDir,'Valid-OCR-Loss'), valid_loss[0].item()) lib.plot.plot(os.path.join(plotDir,'Valid-Synth-Loss'), valid_loss[1].item()) lib.plot.plot(os.path.join(plotDir,'Valid-Dis-Loss'), valid_loss[2].item()) lib.plot.plot(os.path.join(plotDir,'Valid-OCR-Recon1-Loss'), valid_loss[3].item()) lib.plot.plot(os.path.join(plotDir,'Valid-OCR-Recon2-Loss'), valid_loss[4].item()) lib.plot.plot(os.path.join(plotDir,'Valid-Gen-Loss'), valid_loss[5].item()) lib.plot.plot(os.path.join(plotDir,'Valid-ImgRecon1-Loss'), valid_loss[6].item()) lib.plot.plot(os.path.join(plotDir,'Valid-StyRecon2-Loss'), valid_loss[7].item()) lib.plot.plot(os.path.join(plotDir,'Orig-OCR-WordAccuracy'), current_accuracy[0]) lib.plot.plot(os.path.join(plotDir,'Recon-OCR-WordAccuracy'), current_accuracy[1]) lib.plot.plot(os.path.join(plotDir,'Pair-OCR-WordAccuracy'), current_accuracy[2]) lib.plot.plot(os.path.join(plotDir,'Orig-OCR-CharAccuracy'), current_norm_ED[0]) lib.plot.plot(os.path.join(plotDir,'Recon-OCR-CharAccuracy'), current_norm_ED[1]) lib.plot.plot(os.path.join(plotDir,'Pair-OCR-CharAccuracy'), current_norm_ED[2]) # keep best accuracy model (on valid dataset) if current_accuracy[1] > best_accuracy: best_accuracy = current_accuracy[1] torch.save(model.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'best_accuracy.pth')) torch.save(disModel.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'best_accuracy_dis.pth')) if current_norm_ED[1] > best_norm_ED: best_norm_ED = current_norm_ED[1] torch.save(model.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'best_norm_ED.pth')) torch.save(disModel.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'best_norm_ED_dis.pth')) best_model_log = f'{"Best_accuracy_Recon":17s}: {best_accuracy:0.3f}, {"Best_norm_ED_Recon":17s}: {best_norm_ED:0.2f}' # keep best accuracy model (on valid dataset) if current_accuracy[0] > best_accuracy_ocr: best_accuracy_ocr = current_accuracy[0] if not opt.ocrFixed: torch.save(ocrModel.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'best_accuracy_ocr.pth')) if current_norm_ED[0] > best_norm_ED_ocr: best_norm_ED_ocr = current_norm_ED[0] if not opt.ocrFixed: torch.save(ocrModel.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'best_norm_ED_ocr.pth')) best_model_log_ocr = f'{"Best_accuracy_ocr":17s}: {best_accuracy_ocr:0.3f}, {"Best_norm_ED_ocr":17s}: {best_norm_ED_ocr:0.2f}' loss_model_log = f'{loss_log}\n{current_model_log_ocr}\n{current_model_log_1}\n{current_model_log_2}\n{best_model_log_ocr}\n{best_model_log}' print(loss_model_log) log.write(loss_model_log + '\n') # show some predicted results dashed_line = '-' * 80 head = f'{"Ground Truth":32s} | {"Prediction":25s} | Confidence Score & T/F' predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n' for gt_ocr, pred_ocr, confidence_ocr, gt_1, pred_1, confidence_1, gt_2, pred_2, confidence_2 in zip(labels[0][:5], preds[0][:5], confidence_score[0][:5], labels[1][:5], preds[1][:5], confidence_score[1][:5], labels[2][:5], preds[2][:5], confidence_score[2][:5]): if 'Attn' in opt.Prediction: # gt_ocr = gt_ocr[:gt_ocr.find('[s]')] pred_ocr = pred_ocr[:pred_ocr.find('[s]')] # gt_1 = gt_1[:gt_1.find('[s]')] pred_1 = pred_1[:pred_1.find('[s]')] # gt_2 = gt_2[:gt_2.find('[s]')] pred_2 = pred_2[:pred_2.find('[s]')] predicted_result_log += f'{"ocr"}: {gt_ocr:27s} | {pred_ocr:25s} | {confidence_ocr:0.4f}\t{str(pred_ocr == gt_ocr)}\n' predicted_result_log += f'{"recon"}: {gt_1:25s} | {pred_1:25s} | {confidence_1:0.4f}\t{str(pred_1 == gt_1)}\n' predicted_result_log += f'{"pair"}: {gt_2:26s} | {pred_2:25s} | {confidence_2:0.4f}\t{str(pred_2 == gt_2)}\n' predicted_result_log += f'{dashed_line}' print(predicted_result_log) log.write(predicted_result_log + '\n') loss_avg_ocr.reset() loss_avg.reset() loss_avg_dis.reset() loss_avg_ocrRecon_1.reset() loss_avg_ocrRecon_2.reset() loss_avg_gen.reset() loss_avg_imgRecon.reset() loss_avg_styRecon.reset() lib.plot.flush() lib.plot.tick() # save model per 1e+5 iter. if (iteration + 1) % 1e+5 == 0: torch.save( model.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'iter_'+str(iteration+1)+'.pth')) if not opt.ocrFixed: torch.save( ocrModel.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'iter_'+str(iteration+1)+'_ocr.pth')) torch.save( disModel.state_dict(), os.path.join(opt.exp_dir,opt.exp_name,'iter_'+str(iteration+1)+'_dis.pth')) if (iteration + 1) == opt.num_iter: print('end the training') sys.exit() iteration += 1
(x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.reshape(-1, 32, 32, 3) x_test = x_test.reshape(-1, 32, 32, 3) elif trial.parameters['dataset'] == 'mnist': (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(-1, 28, 28, 1) x_test = x_test.reshape(-1, 28, 28, 1) x_train = x_train.astype('float64') x_test = x_test.astype('float64') y_train = keras.utils.to_categorical(y_train, 10) y_test = keras.utils.to_categorical(y_test, 10) x_train /= 255. x_test /= 255. model = Model(trial.parameters) # sherpa_callback = client.keras_send_metrics(trial, objective_name='val_acc',context_names=['acc', 'val_acc', 'loss', 'val_loss']) history = model.fit(x_train, y_train, x_test, y_test) # send metrics to sherpa for epoch in range(len(history['acc'])): context = { 'acc': history['acc'][epoch], 'loss': history['loss'][epoch], 'val_acc': history['val_acc'][epoch], 'val_loss': history['val_loss'][epoch], } client.send_metrics(trial, epoch + 1, context['val_loss'], context) model.save_model('SherpaResults/{dataset}/Models/{id}.h5'.format(
def train(opt): """ dataset preparation """ opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') train_dataset = Batch_Balanced_Dataset(opt) AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle= True, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) print('-' * 80) """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for name, param in model.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU model = torch.nn.DataParallel(model).to(device) model.train() if opt.continue_model != '': print(f'loading pretrained model from {opt.continue_model}') model.load_state_dict(torch.load(opt.continue_model)) print("Model:") print(model) """ setup loss """ if 'CTC' in opt.Prediction: criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to( device) # ignore [GO] token = ignore index 0 # loss averager loss_avg = Averager() # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.adam: optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999)) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps) print("Optimizer:") print(optimizer) """ final options """ # print(opt) with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.continue_model != '': start_iter = int(opt.continue_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') start_time = time.time() best_accuracy = -1 best_norm_ED = 1e+6 i = start_iter while (True): # train part image_tensors, labels = train_dataset.get_batch() image = image_tensors.to(device) text, length = converter.encode(labels, batch_max_length=opt.batch_max_length) batch_size = image.size(0) if 'CTC' in opt.Prediction: preds = model(image, text).log_softmax(2) preds_size = torch.IntTensor([preds.size(1)] * batch_size) preds = preds.permute(1, 0, 2) # to use CTCLoss format cost = criterion(preds, text, preds_size, length) else: preds = model(image, text[:, :-1]) # align with Attention.forward target = text[:, 1:] # without [GO] Symbol cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() loss_avg.add(cost) # validation part if i % opt.valInterval == 0: elapsed_time = time.time() - start_time print( f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}' ) # for log with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log: log.write( f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n' ) loss_avg.reset() model.eval() with torch.no_grad(): valid_loss, current_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data = validation( model, criterion, valid_loader, converter, opt) model.train() for pred, gt in zip(preds[:5], labels[:5]): if 'Attn' in opt.Prediction: pred = pred[:pred.find('[s]')] gt = gt[:gt.find('[s]')] print(f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}') log.write( f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}\n') valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}' valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}' print(valid_log) log.write(valid_log + '\n') # keep best accuracy model if current_accuracy > best_accuracy: best_accuracy = current_accuracy torch.save( model.state_dict(), f'./saved_models/{opt.experiment_name}/best_accuracy.pth' ) if current_norm_ED < best_norm_ED: best_norm_ED = current_norm_ED torch.save( model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED.pth' ) best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}' print(best_model_log) log.write(best_model_log + '\n') # save model per 1e+5 iter. if (i + 1) % 1e+5 == 0: torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth') if i == opt.num_iter: print('end the training') sys.exit() i += 1
def train(config): """ Train the model. :param config: Contains the configurations to be used. """ with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.train_eval_file, "r") as fh: train_eval_file = json.load(fh) with open(config.dev_eval_file, "r") as fh: dev_eval_file = json.load(fh) with open(config.dev_meta, "r") as fh: meta = json.load(fh) dev_total = meta["total"] print("Building model...") parser = get_record_parser(config) graph = tf.Graph() with graph.as_default() as g: train_dataset = get_batch_dataset(config.train_record_file, parser, config) dev_dataset = get_dataset(config.dev_record_file, parser, config) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) train_iterator = train_dataset.make_one_shot_iterator() dev_iterator = dev_dataset.make_one_shot_iterator() model = Model(config, iterator, word_mat, char_mat, graph=g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True loss_save = 100.0 patience = 0 best_f1 = 0. best_em = 0. with tf.Session(config=sess_config) as sess: writer = tf.summary.FileWriter(config.log_dir) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() train_handle = sess.run(train_iterator.string_handle()) dev_handle = sess.run(dev_iterator.string_handle()) if os.path.exists(os.path.join(config.save_dir, "checkpoint")): print('Restoring last saved model.') saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) else: print('Starting a fresh model.') global_step = max(sess.run(model.global_step), 1) for _ in tqdm(range(global_step, config.num_steps + 1)): global_step = sess.run(model.global_step) + 1 loss, train_op = sess.run([model.loss, model.train_op], feed_dict={ handle: train_handle, model.dropout: config.dropout}) if global_step % config.period == 0: loss_sum = tf.Summary(value=[tf.Summary.Value( tag="model/loss", simple_value=loss), ]) writer.add_summary(loss_sum, global_step) if global_step % config.checkpoint == 0: _, summ = evaluate_batch( model, config.val_num_batches, train_eval_file, sess, "train", handle, train_handle) for s in summ: writer.add_summary(s, global_step) metrics, summ = evaluate_batch( model, dev_total // config.batch_size + 1, dev_eval_file, sess, "dev", handle, dev_handle) dev_f1 = metrics["f1"] dev_em = metrics["exact_match"] if dev_f1 < best_f1 and dev_em < best_em: patience += 1 if patience > config.early_stop: print('Exited due to early stop.') break else: patience = 0 best_em = max(best_em, dev_em) best_f1 = max(best_f1, dev_f1) for s in summ: writer.add_summary(s, global_step) writer.flush() filename = os.path.join( config.save_dir, "model_{}.ckpt".format(global_step)) saver.save(sess, filename)
# pylint: disable=import-error """Contains DENSENET Model.""" import torchvision from torchvision import transforms as T from model import Model from imagenet_classes import IMAGENET_CLASSES DENSENET = Model(torchvision.models.densenet161(pretrained=True), 'DenseNet is a convolutional neural network introduced ' + 'in the paper "Densely Connected Convolutional Networks".' + ' Accepts 224x224 RGB images.', IMAGENET_CLASSES, lambda img: T.Compose([T.Resize(256), T.CenterCrop(224)])(img[:3]))
import numpy as np import torch import torch.utils.data import torch.optim as optim from model import Model import os os.environ['CUDA_VISIBLE_DEVICES'] = '3' model = Model().cuda() model.load_state_dict(torch.load('./models/model_epoch0_iter2100.pth')) train_data = np.load('../data/first/train_full.npy') train_data = torch.Tensor(train_data) data = train_data data = data.unsqueeze(2) print(data.shape) data = (data - torch.mean(data, dim=1, keepdim=True)) / torch.std( data, dim=1, keepdim=True) print(data.mean()) length = train_data.shape[0] print(length) batch_size = 1024 predict = [] for i in range(length / batch_size + 1): if (i + 1) * batch_size >= length: last = length else: last = (i + 1) * batch_size batch_data = data[i * batch_size:last, :, :] batch_data = batch_data.cuda()
n_image_features, train_data, valid_data, test_data = load_pretrained_features( 'data/mscoco', BATCH_SIZE, K) print('\nUsing {} image features\n'.format(n_image_features)) # Settings dumps_dir = './dumps' if should_dump and not os.path.exists(dumps_dir): os.mkdir(dumps_dir) current_model_dir = '{}/{}'.format(dumps_dir, model_id) if should_dump and not os.path.exists(current_model_dir): os.mkdir(current_model_dir) model = Model(n_image_features, vocab_size, EMBEDDING_DIM, HIDDEN_SIZE, bound_idx, max_sentence_length, vl_loss_weight, bound_weight, should_train_visual, rsa_sampling, use_gpu) if use_gpu: model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) es = EarlyStopping(mode="max", patience=10, threshold=0.005, threshold_mode="rel") # Not 30 patience # Init metric trackers losses_meters = [] eval_losses_meters = []
tokenizer = load_tokenizer(args.src_vocab, args.tgt_vocab) print("Prepare data") train_ds = MaskDataset(args.train_file, tokenizer) test_ds = MaskDataset(args.test_file, tokenizer, use_mask=False) train_dl = DataLoader(train_ds, shuffle=True, batch_size=args.batch_size) test_dl = DataLoader(test_ds, shuffle=False, batch_size=args.batch_size) print("Init model") src_vocab_len = len(tokenizer.src_stoi) tgt_vocab_len = len(tokenizer.tgt_stoi) if args.model_config: with open(args.model_config) as f: config = json.load(f) else: config = {} model = Model(src_vocab_len, tgt_vocab_len, **config) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.98), eps=1e-9) sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=len(train_dl)) trainner = Trainer( model, optimizer, train_dl, test_dl, device=args.device, scheduler=sched, log_dir=args.log_dir, weight_dir=args.weight_dir ) print("Start training") trainner.train(args.num_epoch)
def test_algorithm(algorithm): global results global random_seed case_name = "test" num_layers = 1 optimizer = "adam" learning_rate = 0.01 batch_size = 256 num_callbacks = 50 hidden_dim_size = 32 num_iterations_between_reports = 100000 grad_clipping = 100 predict_only_outcome = True final_trace_only = True trace_length_modifier = 1.0 truncate_unknowns = False max_num_words = 50 results = Model( case_name=case_name, dataset_name=dataset_name, algorithm=algorithm, num_layers=num_layers, optimizer=optimizer, learning_rate=learning_rate, batch_size=batch_size, num_callbacks=num_callbacks, hidden_dim_size=hidden_dim_size, num_iterations_between_reports=num_iterations_between_reports, grad_clipping=grad_clipping, predict_only_outcome=predict_only_outcome, final_trace_only=final_trace_only, trace_length_modifier=trace_length_modifier, max_num_words=max_num_words, truncate_unknowns=truncate_unknowns, rng=np.random.RandomState(random_seed)) results = Model( case_name=case_name, dataset_name=dataset_name, algorithm=algorithm, num_layers=num_layers, optimizer=optimizer, learning_rate=learning_rate, batch_size=batch_size, num_callbacks=num_callbacks, hidden_dim_size=hidden_dim_size, num_iterations_between_reports=num_iterations_between_reports, grad_clipping=grad_clipping, predict_only_outcome=predict_only_outcome, final_trace_only=final_trace_only, trace_length_modifier=0.5, max_num_words=max_num_words, truncate_unknowns=truncate_unknowns, rng=np.random.RandomState(random_seed)) results = Model( case_name=case_name, dataset_name=dataset_name, algorithm=algorithm, num_layers=num_layers, optimizer=optimizer, learning_rate=learning_rate, batch_size=batch_size, num_callbacks=num_callbacks, hidden_dim_size=hidden_dim_size, num_iterations_between_reports=num_iterations_between_reports, grad_clipping=grad_clipping, predict_only_outcome=predict_only_outcome, final_trace_only=False, trace_length_modifier=trace_length_modifier, max_num_words=max_num_words, truncate_unknowns=truncate_unknowns, rng=np.random.RandomState(random_seed)) results = Model( case_name=case_name, dataset_name=dataset_name, algorithm=algorithm, num_layers=num_layers, optimizer=optimizer, learning_rate=learning_rate, batch_size=batch_size, num_callbacks=num_callbacks, hidden_dim_size=hidden_dim_size, num_iterations_between_reports=num_iterations_between_reports, grad_clipping=grad_clipping, predict_only_outcome=False, final_trace_only=final_trace_only, trace_length_modifier=trace_length_modifier, max_num_words=max_num_words, truncate_unknowns=truncate_unknowns, rng=np.random.RandomState(random_seed)) results = Model( case_name=case_name, dataset_name=dataset_name, algorithm=algorithm, num_layers=num_layers, optimizer=optimizer, learning_rate=learning_rate, batch_size=batch_size, num_callbacks=num_callbacks, hidden_dim_size=hidden_dim_size, num_iterations_between_reports=num_iterations_between_reports, grad_clipping=grad_clipping, predict_only_outcome=False, final_trace_only=False, trace_length_modifier=trace_length_modifier, max_num_words=max_num_words, truncate_unknowns=truncate_unknowns, rng=np.random.RandomState(random_seed)) results = Model( case_name=case_name, dataset_name=dataset_name, algorithm=algorithm, num_layers=num_layers, optimizer=optimizer, learning_rate=learning_rate, batch_size=batch_size, num_callbacks=num_callbacks, hidden_dim_size=16, num_iterations_between_reports=num_iterations_between_reports, grad_clipping=grad_clipping, predict_only_outcome=predict_only_outcome, final_trace_only=final_trace_only, trace_length_modifier=trace_length_modifier, max_num_words=max_num_words, truncate_unknowns=truncate_unknowns, rng=np.random.RandomState(random_seed))
def main(): c = Controller(Model(0), View()) # ////////////// DEBUGGGG////////////// isLogged = False username = "" op = 10 # Bucle para mostrar menu while op != 0: #Menu si esta registrado if isLogged: c.principal(username) op = input() if op == 1: c.show_movies() c.buy_a_ticket(username) if op == 2: c.show_my_tickets(username) if op == 9: isLogged = False else: #Menu si no esta registrado c.principal_not_logged() op = input() #Show movies if op == 1: c.show_movies() #Login if op == 2: # Login Attempt while isLogged == False: logged, username = c.login() if logged == 1: isLogged = True if logged == 2: c.error_handler(1) if logged == 3: c.error_handler(2) if op == 3: if c.sing_up(): isLogged = True
import torch.optim as optim if __name__ == '__main__': args = options.parser.parse_args() torch.manual_seed(args.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dataset = Dataset(args) if not os.path.exists('./ckpt/'): os.makedirs('./ckpt/') if not os.path.exists('./logs/' + args.model_name): os.makedirs('./logs/' + args.model_name) logger = Logger('./logs/' + args.model_name) model = Model(dataset.feature_size, dataset.num_class).to(device) if args.pretrained_ckpt is not None: model.load_state_dict( torch.load(args.pretrained_ckpt, map_location=device)) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) for itr in range(args.max_iter): if itr == 10000: optimizer = optim.Adam(model.parameters(), lr=args.lr / 10, weight_decay=0.0005) train(itr, dataset, args, model, optimizer, logger, device) if itr % 1 == 0 and not itr == 0: torch.save(model.state_dict(),
def get_model(): model = Model(input_steps, embedding_size, hidden_size, vocab_size, slot_size, intent_size, epoch_num, None, batch_size, n_layers) model.build() return model
def train(args): datasets = range(2) # Remove the leaveDataset from datasets datasets.remove(args.leaveDataset) # Create the data loader object. This object would preprocess the data in terms of # batches each of size args.batch_size, of length args.seq_length data_loader = DataLoader(args.batch_size, args.seq_length, datasets, forcePreProcess=True) # Save the arguments int the config file with open(os.path.join('save', 'config.pkl'), 'wb') as f: pickle.dump(args, f) # Create a Vanilla LSTM model with the arguments model = Model(args) # Initialize a TensorFlow session with tf.Session() as sess: # Initialize all the variables in the graph sess.run(tf.initialize_all_variables()) # Add all the variables to the list of variables to be saved saver = tf.train.Saver(tf.all_variables()) # For each epoch for e in range(args.num_epochs): # Assign the learning rate (decayed acc. to the epoch number) sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) # Reset the pointers in the data loader object data_loader.reset_batch_pointer() # Get the initial cell state of the LSTM state = sess.run(model.initial_state) # For each batch in this epoch for b in range(data_loader.num_batches): # Tic start = time.time() # Get the source and target data of the current batch # x has the source data, y has the target data x, y = data_loader.next_batch() # Feed the source, target data and the initial LSTM state to the model feed = { model.input_data: x, model.target_data: y, model.initial_state: state } # Fetch the loss of the model on this batch, the final LSTM state from the session train_loss, state, _ = sess.run( [model.cost, model.final_state, model.train_op], feed) # Toc end = time.time() # Print epoch, batch, loss and time taken print( "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) # Save the model if the current epoch and batch number match the frequency if (e * data_loader.num_batches + b) % args.save_every == 0 and ( (e * data_loader.num_batches + b) > 0): checkpoint_path = os.path.join('save', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def __init__(self, config): self.logger = ModelLogger(config, dirname=config['dir'], pretrained=config['pretrained']) self.dirname = self.logger.dirname cuda = config['cuda'] cuda_id = config['cuda_id'] if not cuda: self.device = torch.device('cpu') else: self.device = torch.device('cuda' if cuda_id < 0 else 'cuda:%d' % cuda_id) datasets = prepare_datasets(config) train_set = datasets['train'] dev_set = datasets['dev'] test_set = datasets['test'] # Evaluation Metrics: self._train_loss = AverageMeter() self._train_f1 = AverageMeter() self._train_em = AverageMeter() self._dev_f1 = AverageMeter() self._dev_em = AverageMeter() if train_set: self.train_loader = DataLoader(train_set, batch_size=config['batch_size'], shuffle=config['shuffle'], collate_fn=lambda x: x, pin_memory=True) self._n_train_batches = len(train_set) // config['batch_size'] else: self.train_loader = None if dev_set: self.dev_loader = DataLoader(dev_set, batch_size=config['batch_size'], shuffle=False, collate_fn=lambda x: x, pin_memory=True) self._n_dev_batches = len(dev_set) // config['batch_size'] else: self.dev_loader = None if test_set: self.test_loader = DataLoader(test_set, batch_size=config['batch_size'], shuffle=False, collate_fn=lambda x: x, pin_memory=True) self._n_test_batches = len(test_set) // config['batch_size'] self._n_test_examples = len(test_set) else: self.test_loader = None self._n_train_examples = 0 self.model = Model(config, train_set) self.model.network = self.model.network.to(self.device) self.config = self.model.config self.is_test = False
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.input_encoding) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "words_vocab.pkl") ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f: saved_words, saved_vocab = cPickle.load(f) assert saved_words == data_loader.words, "Data and loaded model disagree on word set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocab), f) model = Model(args) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(args.log_dir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_writer.add_graph(sess.graph) tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(model.epoch_pointer.eval(), args.num_epochs): sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) speed = 0 if args.init_from is None: assign_op = model.epoch_pointer.assign(e) sess.run(assign_op) if args.init_from is not None: data_loader.pointer = model.batch_pointer.eval() args.init_from = None for b in range(data_loader.pointer, data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = { model.input_data: x, model.targets: y, model.initial_state: state, model.batch_time: speed } summary, train_loss, state, _, _ = sess.run([ merged, model.cost, model.final_state, model.train_op, model.inc_batch_pointer_op ], feed) train_writer.add_summary(summary, e * data_loader.num_batches + b) speed = time.time() - start if (e * data_loader.num_batches + b) % args.batch_size == 0: print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, speed)) if (e * data_loader.num_batches + b) % args.save_every == 0 \ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) train_writer.close()
def main(): # Define the parser parser = argparse.ArgumentParser() # Observed length of the trajectory parameter parser.add_argument('--obs_length', type=int, default=5, help='Observed length of the trajectory') # Predicted length of the trajectory parameter parser.add_argument('--pred_length', type=int, default=3, help='Predicted length of the trajectory') # Test dataset parser.add_argument('--test_dataset', type=int, default=1, help='Dataset to be tested on') # Read the arguments sample_args = parser.parse_args() # Load the saved arguments to the model from the config file with open(os.path.join('save', 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) # Initialize with the saved args # yzn : infer = true,则batch_size和seq_length固定为1,不参考保存的参数. model = Model(saved_args, True) # Initialize TensorFlow session sess = tf.InteractiveSession() # Initialize TensorFlow saver saver = tf.train.Saver() # Get the checkpoint state to load the model from ckpt = tf.train.get_checkpoint_state('save') print('loading model: ', ckpt.model_checkpoint_path) # Restore the model at the checpoint saver.restore(sess, ckpt.model_checkpoint_path) # Dataset to get data from dataset = [sample_args.test_dataset] # Initialize the dataloader object to # Get sequences of length obs_length+pred_length data_loader = DataLoader(1, sample_args.pred_length + sample_args.obs_length, dataset, True) # Reset the data pointers of the data loader object data_loader.reset_batch_pointer() # Maintain the total_error until now total_error = 0. counter = 0. for b in range(data_loader.num_batches): # Get the source, target data for the next batch x, y = data_loader.next_batch() # The observed part of the trajectory obs_traj = x[0][:sample_args.obs_length] # Get the complete trajectory with both the observed and the predicted part from the model complete_traj = model.sample(sess, obs_traj, num=sample_args.pred_length) # Compute the mean error between the predicted part and the true trajectory total_error += get_mean_error(complete_traj, x[0], sample_args.obs_length) print("Processed trajectory number : ", b, "out of ", data_loader.num_batches, " trajectories") # Print the mean error across all the batches print("Total mean error of the model is ", total_error / data_loader.num_batches)
from model import Model import torch.nn as nn from torchvision import models model_name = "densenet_161" weights = [.5, .5] print(model_name, weights) model = Model(models.densenet161, model_name, class_weights=weights) model.build_model() model.train_model() model.test_model()
with open("data/vocab.txt", "r") as f: vocab = [(v.split(" ")[0], i) for i, v in enumerate(f.readlines())][:vocab_size] gloveList = [] with open("data/gloveEmbs200.txt", "r") as f: for row in f.readlines(): arr = row.strip().split() gloveList += [[float(x) for x in arr[1:]]] if len(gloveList) == settings.vocab_size: break gloveArr = np.array(gloveList) gloveU = gloveArr[:, :50] gloveV = gloveArr[:, 51:-1] relmat["co"] = comat m = Model(vocab, relmat, embedding_dimension=50, lambdaB=1e-3, lambdaUV=1e-3, logistic=False) if __name__ == "__main__": m.U = torch.FloatTensor(gloveU).cuda() m.V = torch.FloatTensor(gloveV).cuda() m.updateB() print(m.estimateLL()) m.save("data/gloveRel200.pkl") else: m.load("data/gloveRel200.pkl")
'epochs': 50, # maximum number of epochs to run 'init_wt': 0.01, # standard deviation of the initial random weights 'context_len': 3, # number of context words used 'embedding_dim': 16, # number of dimensions in embedding 'vocab_size': 250, # number of words in vocabulary 'num_hid': 128, # number of hidden units 'model_file': 'model.pk', # filename to save best model } # dataloaders loader_test = data.DataLoader(args['batch_size'], 'Test') loader_valid = data.DataLoader(args['batch_size'], 'Valid') loader_train = data.DataLoader(args['batch_size'], 'Train') # create model model = Model(args, loader_train.vocab) # load weights # model.load('./provided_model.pk') model.load('./model.pk') # testing total_acc, total_loss = 0, 0 for batch in trange(math.ceil(loader_test.get_size() / args['batch_size']), leave=False): model.model.zero_grad() input, label = loader_test.get_batch() output = model.model.forward(input) loss, acc = model.criterion.forward(output, label) total_acc += acc total_loss += loss total_acc = total_acc / loader_test.get_size()
def demo(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'./log_demo_result.txt', 'a') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') log.write( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n') log.close()
parser.add_argument('--n_resblocks', type=int, default=32, help='number of residual blocks') parser.add_argument('--scale', type=int, default=4, help='by how much the image is to be scaled') parser.add_argument('--epochs', type=int, default=1000, help='number of iterations') opt = parser.parse_args() model = Model(ni=opt.ni, nf=opt.nf, n_resblocks=opt.n_resblocks, scale=opt.scale) optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) model.cuda() X = pickle.load(open("X.pickle", "rb")) y = pickle.load(open("y.pickle", "rb")) LQ_train, LQ_test, HQ_train, HQ_test = train_test_split(X, y, test_size=0.20,
#!/usr/bin/env python import os import brain from model import Model DIR = os.path.dirname(__file__) class MockCard(object): def __init__(self, card_id): self.id = card_id def log_name(self): return self.id if __name__ == "__main__": print("initializing...") with open(os.path.join(DIR, "test_state.json")) as f: raw = f.read() game = brain.Status.parse(raw).game_state game.screen_type = "CARD_REWARD" game.screen_state.cards = list( map(MockCard, ["Demon Form", "Whirlwind", "Bloodletting"])) Model("ironclad") assert game.can_predict_card_choice() print(list(game.predict_card_choice()))