def main(): np.random.seed(0) (train_x, train_y), (valid_x, valid_y) = create_dataset() transformer = tf.keras.Model(*juxt( identity, op(NUM_BLOCKS, D_MODEL, NUM_HEADS, D_FF, X_VOCAB_SIZE, Y_VOCAB_SIZE, X_MAXIMUM_POSITION, Y_MAXIMUM_POSITION, DROPOUT_RATE) )([tf.keras.Input(shape=(None, )), tf.keras.Input(shape=(None, ))])) transformer.compile(tf.keras.optimizers.Adam(LearningRateSchedule(D_MODEL), beta_1=0.9, beta_2=0.98, epsilon=1e-9), loss=Loss(), metrics=('accuracy', )) transformer.fit((train_x, train_y[:, :-1]), train_y[:, 1:], batch_size=64, epochs=100, validation_data=((valid_x, valid_y[:, :-1]), valid_y[:, 1:])) transformer.save_weights('./model/transformer_weights')
def main(): np.random.seed(0) _, (valid_x, valid_y) = create_dataset() transformer = tf.keras.Model(*juxt( identity, op(NUM_BLOCKS, D_MODEL, NUM_HEADS, D_FF, X_VOCAB_SIZE, Y_VOCAB_SIZE, X_MAXIMUM_POSITION, Y_MAXIMUM_POSITION, DROPOUT_RATE) )([tf.keras.Input(shape=(None, )), tf.keras.Input(shape=(None, ))])) transformer.compile(tf.keras.optimizers.Adam(LearningRateSchedule(D_MODEL), beta_1=0.9, beta_2=0.98, epsilon=1e-9), loss=Loss(), metrics=('accuracy', )) transformer.load_weights('./model/transformer_weights') # transformer = tf.keras.models.load_model('./model') # tf.linalg.band_partが失敗しちゃう。2.4で修正済み。 c = 0 for x, y in zip(valid_x, valid_y): y_pred = translate(transformer, x) print('question: {}'.format( decode(x).replace('^', '').replace('$', ''))) print('answer: {}'.format( decode(y).replace('^', '').replace('$', ''))) print('prediction: {}'.format( decode(y_pred).replace('^', '').replace('$', ''))) if np.shape(y_pred) == np.shape( y[y != 0]) and all(y_pred == y[y != 0]): c += 1 else: print('NG') print() print('{:0.3f}'.format(c / len(valid_x)))
def prepare_model(V, P, d_embed, d_lstm, layers, nhead, dropout=.2, lr=.001, l2=0, smoothing=.1, device='cpu'): model = TransformerSentiment(V, P, d_embed, d_lstm, layers, nhead=nhead, dropout=dropout).to(device) criterion = Loss(smoothing=smoothing, n_classes=150) optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2) return model, criterion, optimizer
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if is_training: image, weight, label = inputs else: image = inputs[0] image = self.preprocess(image) featuremap = unet3d('unet3d', image) # final upsampled feturemap if is_training: loss = Loss(featuremap, weight, label) wd_cost = regularize_cost( '(?:unet3d)/.*kernel', l2_regularizer(1e-5), name='wd_cost') total_cost = tf.add_n([loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_probs = tf.nn.softmax(featuremap, name="final_probs") #[b,d,h,w,num_class] final_pred = tf.argmax(final_probs, axis=-1, name="final_pred")
def train(): scores = [] for train, test in data: model = Model() loss = Loss() optimizer = Optimizer(model) batch_num = 0 print("\n\nStarting new K-fold") for epoch in range(1, config["num_epoch"]): print("\n\nStarting epoch", epoch) for X, y in train: optimizer.zero_grad() y_hat = model(X) error = loss(y_hat, y) if batch_num == 0 or batch_num % config["display_rate"] == 0: if torch.cuda.is_available(): cost = "Cost: %.4f" % (error.detach().cpu().numpy()) print(cost) else: cost = "Cost: %.4f" % (error.detach().numpy()) print(cost) batch_num += 1 error.backward() optimizer.step() evaluate(model, test) scores.append(evaluate(model, test)) return scores
def train(args): start_time = time.time() device = torch.device('cuda' if args.cuda else 'cpu') pprint(args.__dict__) interface = FileInterface(**args.__dict__) piqa_model = Baseline(**args.__dict__).to(device) loss_model = Loss().to(device) optimizer = torch.optim.Adam(p for p in piqa_model.parameters() if p.requires_grad) batch_size = args.batch_size char_vocab_size = args.char_vocab_size glove_vocab_size = args.glove_vocab_size word_vocab_size = args.word_vocab_size glove_size = args.glove_size elmo = args.elmo draft = args.draft def preprocess(interface_): # get data print('Loading train and dev data') train_examples = load_squad(interface_.train_path, draft=draft) dev_examples = load_squad(interface_.test_path, draft=draft) # iff creating processor print('Loading GloVe') glove_words, glove_emb_mat = load_glove( glove_size, vocab_size=args.glove_vocab_size - 2, glove_dir=interface_.glove_dir, draft=draft) print('Constructing processor') processor = SquadProcessor(char_vocab_size, glove_vocab_size, word_vocab_size, elmo=elmo) processor.construct(train_examples, glove_words) # data loader print('Preprocessing datasets') train_dataset = tuple( processor.preprocess(example) for example in train_examples) dev_dataset = tuple( processor.preprocess(example) for example in dev_examples) print('Creating data loaders') train_sampler = SquadSampler(train_dataset, max_context_size=256, max_question_size=32, bucket=True, shuffle=True) train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=processor.collate, sampler=train_sampler) dev_sampler = SquadSampler(dev_dataset, bucket=True) dev_loader = DataLoader(dev_dataset, batch_size=batch_size, collate_fn=processor.collate, sampler=dev_sampler) if args.preload: train_loader = tuple(train_loader) dev_loader = tuple(dev_loader) out = { 'glove_emb_mat': glove_emb_mat, 'processor': processor, 'train_dataset': train_dataset, 'dev_dataset': dev_dataset, 'train_loader': train_loader, 'dev_loader': dev_loader } return out out = interface.cache( preprocess, interface_=interface) if args.cache else preprocess(interface) glove_emb_mat = out['glove_emb_mat'] processor = out['processor'] train_dataset = out['train_dataset'] dev_dataset = out['dev_dataset'] train_loader = out['train_loader'] dev_loader = out['dev_loader'] print("Initializing model weights") piqa_model.load_glove(torch.tensor(glove_emb_mat)) bind_model(interface, processor, piqa_model, optimizer=optimizer) step = 0 best_report = None print('Training') piqa_model.train() for epoch_idx in range(args.epochs): for i, train_batch in enumerate(train_loader): train_batch = { key: val.to(device) for key, val in train_batch.items() } model_output = piqa_model(step=step, **train_batch) train_results = processor.postprocess_batch( train_dataset, train_batch, model_output) train_loss = loss_model(step=step, **model_output, **train_batch) train_f1 = float( np.mean([result['f1'] for result in train_results])) train_em = float( np.mean([result['em'] for result in train_results])) # optimize optimizer.zero_grad() train_loss.backward() optimizer.step() step += 1 # report & eval & save if step % args.report_period == 1: report = OrderedDict(step=step, train_loss=train_loss.item(), train_f1=train_f1, train_em=train_em, time=time.time() - start_time) interface.report(**report) print(', '.join('%s=%.5r' % (s, r) for s, r in report.items())) if step % args.eval_save_period == 1: with torch.no_grad(): piqa_model.eval() loss_model.eval() pred = {} dev_losses, dev_results = [], [] for dev_batch, _ in zip(dev_loader, range(args.eval_steps)): dev_batch = { key: val.to(device) for key, val in dev_batch.items() } model_output = piqa_model(**dev_batch) results = processor.postprocess_batch( dev_dataset, dev_batch, model_output) dev_loss = loss_model(step=step, **dev_batch, **model_output) for result in results: pred[result['id']] = result['pred'] dev_results.extend(results) dev_losses.append(dev_loss.item()) dev_loss = float(np.mean(dev_losses)) dev_f1 = float( np.mean([result['f1'] for result in dev_results])) dev_em = float( np.mean([result['em'] for result in dev_results])) report = OrderedDict(step=step, dev_loss=dev_loss, dev_f1=dev_f1, dev_em=dev_em, time=time.time() - start_time) summary = False if best_report is None or report['dev_f1'] > best_report[ 'dev_f1']: best_report = report summary = True interface.save(iteration=step) interface.pred(pred) interface.report(summary=summary, **report) print( ', '.join('%s=%.5r' % (s, r) for s, r in report.items()), '(dev_f1_best=%.5r @%d)' % (best_report['dev_f1'], best_report['step'])) piqa_model.train() loss_model.train() if step == args.train_steps: break if step == args.train_steps: break
valid_features = torch.load('valid_features.pt') # list of torch train_vals = torch.load('train_vals.pt') # list of torchs valid_vals = torch.load('valid_vals.pt') # list of torchs # model, optimzer, loss function feature_size = 2048 learning_rate = 0.0001 model = LSTM(feature_size).cuda() model = torch.load("../problem2/best_rnnbased.pth") optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True) loss_function = Loss() for param_group in optimizer.param_groups: param_group['lr'] = learning_rate # some training parameters BATCH_SIZE = 32 EPOCH_NUM = 500 datalen = len(train_features) datalen_valid = len(valid_features) max_accuracy = 0 # start training model.train() train_loss = [] valid_acc = []
from data import DataLoader from data import Epoch from model import Model from model import Loss from model import Optimizer from visual import Plot import calc data = DataLoader() model = Model() loss = Loss() optimizer = Optimizer(model) plot = Plot("Baseline") plot.clear() plot.line("Loss", "Epoch", "Loss", "loss") plot.line("Accuracy", "Epoch", "Accuracy (%)", "acc") plot.line("F1 Score", "Epoch", "F1 Score", "score") plot.line("Recall", "Epoch", "Recall", "recall") plot.cm("Confusion Matrix (Train)") plot.cm("Confusion Matrix (Val)") for epoch in Epoch(): print("Epoch", epoch) # train model for X, y in data.train: optimizer.zero_grad() y_hat = model(X)
def main(): # Configurations lr = 0.00000001 # learning rate batch_size = 64 # batch_size last_epoch = 1 # the last training epoch. (defulat: 1) max_epoch = 553 # maximum epoch for the training. num_boxes = 2 # the number of boxes for each grid in Yolo v1. num_classes = 20 # the number of classes in Pascal VOC Detection. grid_size = 7 # 3x224x224 image is reduced to (5*num_boxes+num_classes)x7x7. lambda_coord = 7 # weight for coordinate regression loss. lambda_noobj = 0.5 # weight for no-objectness confidence loss. """ dataset load """ train_dset = VOCDetection(root=data_root, split='train') train_dloader = DataLoader(train_dset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=8) #drop_last 마지막 애매하게 남는 데이터들은 버림 test_dset = VOCDetection(root=data_root, split='test') test_dloader = DataLoader(test_dset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=8) """ model load """ model = Yolo(grid_size, num_boxes, num_classes) #model = nn.DataParallel(model, device_ids = [5,6,7]) model = model.to(DEVICE) #pretrained_weights = torch.load(pretrained_backbone_path) #model.load_state_dict(pretrained_weights) """ optimizer / loss """ model.features.requires_grad_(False) model_params = [v for v in model.parameters() if v.requires_grad is True] optimizer = optim.Adam(model_params, lr=lr, betas=[0.9, 0.999]) # Load the last checkpoint if exits. ckpt_path = os.path.join(ckpt_dir, 'last_best.pth') if os.path.exists(ckpt_path): ckpt = torch.load(ckpt_path, map_location='cuda:3') model.load_state_dict(ckpt['model']) optimizer.load_state_dict(ckpt['optimizer']) last_epoch = ckpt['epoch'] + 1 print('Last checkpoint is loaded. start_epoch:', last_epoch) else: print('No checkpoint is found.') Yolov1Loss = Loss(7, 2, 20) #ckpt_path = os.path.join(ckpt_dir, 'last_best.pth') """ training """ # Training & Testing. model = model.to(DEVICE) best_loss = 1 for epoch in range(1, max_epoch): step = 0 # Learning rate scheduling if epoch in [50, 150, 550, 600]: lr *= 0.1 for param_group in optimizer.param_groups: param_group['lr'] = lr if epoch < last_epoch: continue model.train() for x, y in train_dloader: step += 1 imgs = Variable(x) gt_outs = Variable(y) imgs, gt_outs = imgs.to(DEVICE), gt_outs.to(DEVICE) model_outs = model(imgs) loss = Yolov1Loss(model_outs, gt_outs) if loss < best_loss: best_loss = loss ckpt = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(ckpt, ckpt_path) optimizer.zero_grad() loss.backward() optimizer.step() print('step:{}/{} | loss:{:.8f}'.format(step, len(train_dloader), loss.item())) model.eval() val_loss = 0.0 with torch.no_grad(): for x, y in test_dloader: imgs = Variable(x) gt_outs = Variable(y) imgs, gt_outs = imgs.to(DEVICE), gt_outs.to(DEVICE) model_outs = model(imgs) loss = Yolov1Loss(model_outs, gt_outs) loss_iter = loss.item() print('Epoch [%d/%d], Val Loss: %.4f' % (epoch, max_epoch, loss_iter)) ckpt = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(ckpt, ckpt_path) ''' test ''' test_image_dir = os.path.join(root, 'test_images') image_path_list = [ os.path.join(test_image_dir, path) for path in os.listdir(test_image_dir) ] for image_path in image_path_list: inference(model, image_path)
def train_net(args, writer, dtype='train'): is_shuffle = dtype == 'train' dataloader = data.DataLoader(Dataset(num_spixel=100, patch_size=[200, 200], root=args.root_dir, dtype=dtype), batch_size=16, shuffle=is_shuffle, num_workers=4) # build model model = create_ssn_net(num_spixels=100, num_iter=args.num_steps, num_spixels_h=10, num_spixels_w=10, dtype=dtype) # loss function criten = Loss() device = torch.device('cpu') if torch.cuda.is_available(): model = torch.nn.DataParallel(model) model.cuda() device = torch.device('cuda') optim = torch.optim.Adam(model.parameters(), lr=args.l_rate) if dtype == 'train' or dtype == 'test': if dtype == 'train': model.train() logger = loss_logger() for epoch in range(100000): logger.clear() for iter, [inputs, num_h, num_w, init_index, cir_index, p2sp_index_, invisible] in enumerate(dataloader): with torch.autograd.set_detect_anomaly(True): t0 = time.time() img = inputs['img'].to(device) label = inputs['label'].to(device) problabel = inputs['problabel'].to(device) num_h = num_h.to(device) num_w = num_w.to(device) init_index = [x.to(device) for x in init_index] cir_index = [x.to(device) for x in cir_index] p2sp_index_ = p2sp_index_.to(device) invisible = invisible.to(device) t1 = time.time() recon_feat2, recon_label = model(img, p2sp_index_, invisible, init_index, cir_index, problabel, num_h, num_w, device) loss, loss_1, loss_2 = criten(recon_feat2, img, recon_label, label) t2 = time.time() # optimizer optim.zero_grad() loss.backward() optim.step() t3 = time.time() print(f'epoch:{epoch}, iter:{iter}, total_loss:{loss}, pos_loss:{loss_1}, rec_loss:{loss_2}') print(f'forward time:{t2-t1:.3f}, backward time:{t3-t2:.3f}, total time:{t3-t0:.3f}') logger.add(loss.data, loss_1.data, loss_2.data) logger.ave() writer.add_scalar('train/total_loss', logger.loss, epoch) writer.add_scalar('train/pos_loss', logger.loss1, epoch) writer.add_scalar('train/rec_loss', logger.loss2, epoch) if epoch % 100 == 0 and epoch != 0: torch.save(model.state_dict(), f'./checkpoints/checkpoints/{epoch}_{loss:.3f}_model.pt') else: pass else: pass
def compute_spixels(num_spixel, num_steps, pre_model, out_folder): if not os.path.exists(out_folder): os.makedirs(out_folder) # os.makedirs(out_folder+'png') # os.makedirs(out_folder + 'mat') dtype = 'test' dataloader = data.DataLoader(Dataset_T(num_spixel=num_spixel), batch_size=1, shuffle=False, num_workers=1) model = create_ssn_net(num_spixels=num_spixel, num_iter=num_steps, num_spixels_h=10, num_spixels_w=10, dtype=dtype, ssn=0) model = torch.nn.DataParallel(model) if pre_model is not None: if torch.cuda.is_available(): model.load_state_dict(torch.load(pre_model)) else: model.load_state_dict(torch.load(pre_model, map_location='cpu')) else: raise ('no model') criten = Loss() device = torch.device('cpu') if torch.cuda.is_available(): model.cuda() device = torch.device('cuda') for iter, [ inputs, num_h, num_w, init_index, cir_index, p2sp_index_, invisible, file_name ] in enumerate(dataloader): with torch.no_grad(): img = inputs['img'].to(device) label = inputs['label'].to(device) problabel = inputs['problabel'].to(device) num_h = num_h.to(device) num_w = num_w.to(device) init_index = [x.to(device) for x in init_index] cir_index = [x.to(device) for x in cir_index] p2sp_index_ = p2sp_index_.to(device) invisible = invisible.to(device) recon_feat2, recon_label, new_spix_indices = model( img, p2sp_index_, invisible, init_index, cir_index, problabel, num_h, num_w, device) # loss, loss_1, loss_2 = criten(recon_feat2, img, recon_label, label) given_img = np.asarray(Image.open(file_name[0])) h, w = given_img.shape[0], given_img.shape[1] new_spix_indices = new_spix_indices[:, :h, :w].contiguous() spix_index = new_spix_indices.cpu().numpy()[0] spix_index = spix_index.astype(int) if enforce_connectivity: segment_size = (given_img.shape[0] * given_img.shape[1]) / ( int(num_h * num_w) * 1.0) min_size = int(0.06 * segment_size) max_size = int(3 * segment_size) spix_index = enforce_connectivity(spix_index[np.newaxis, :, :], min_size, max_size)[0] # given_img_ = np.zeros([spix_index.shape[0], spix_index.shape[1], 3], dtype=np.int) # h, w = given_img.shape[0], given_img.shape[1] # given_img_[:h, :w] = given_img counter_image = np.zeros_like(given_img) counter_image = get_spixel_image(counter_image, spix_index) spixel_image = get_spixel_image(given_img, spix_index) imgname = file_name[0].split('/')[-1][:-4] out_img_file = out_folder + imgname + '_bdry_.jpg' imageio.imwrite(out_img_file, spixel_image) # out_file = out_folder + imgname + '.npy' # np.save(out_file, spix_index) # validation code only for sp_pix 400 # out_file_mat = out_folder + 'mat/'+ imgname + '.mat' # scio.savemat(out_file_mat, {'segs': spix_index}) # out_count_file = out_folder + 'png/' + imgname + '.png' # imageio.imwrite(out_count_file, counter_image) print(iter)
def train( dataset, train_loader, checkpoint_dir, log_event_path, nepochs, learning_rate, eval_per_step, generator_step, discriminator_step, lambda_adv, checkpoint_path, seed, ): torch.manual_seed(seed) device = torch.device("cuda" if use_cuda else "cpu") criterion = Loss(device, **loss_config) # Model model = Model(**network_config["nsf_config"]).to(device) discriminator = Discriminator( **network_config["discriminator_config"]).to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) discriminator_optim = optim.Adam(discriminator.parameters(), lr=learning_rate) writer = SummaryWriter(log_event_path) # train epoch = 1 total_step = 0 current_lr = learning_rate os.makedirs(checkpoint_dir, exist_ok=True) if checkpoint_path != "": model, discriminator, total_step, epoch = load_checkpoint( checkpoint_path, model, optimizer, discriminator, discriminator_optim) current_lr = optimizer.param_groups[0]["lr"] while epoch <= nepochs: running_loss = 0 print("{}epoch:".format(epoch)) for step, (wav, mel, f0) in tqdm(enumerate(train_loader)): model.train() discriminator.train() # configから操作できるようにはしたい if total_step > 0 and current_lr > 1e-6 and total_step % 100000 == 0: current_lr = current_lr / 2 for g_param_group, d_param_group in zip( optimizer.param_groups, discriminator_optim.param_groups): g_param_group["lr"] = current_lr d_param_group["lr"] = current_lr optimizer.zero_grad() discriminator_optim.zero_grad() wav, mel, f0 = wav.to(device), mel.to(device), f0.to(device) # Generator if (total_step < generator_step or total_step > generator_step + discriminator_step): outputs = model(mel, f0) stft_loss = criterion.stft_loss(outputs[:, :wav.size(-1)], wav) if total_step < generator_step: loss = stft_loss adv_loss = None else: adv = discriminator(outputs.unsqueeze(1)) adv_loss = criterion.adversarial_loss(adv) loss = stft_loss + lambda_adv * adv_loss loss.backward() optimizer.step() else: loss = None stft_loss = None adv_loss = None # Discriminator if total_step > generator_step: with torch.no_grad(): outputs = model(mel, f0) real = discriminator(wav.unsqueeze(1)) fake = discriminator(outputs.unsqueeze(1).detach()) real_loss, fake_loss = criterion.discriminator_loss(real, fake) dis_loss = real_loss + fake_loss dis_loss.backward() discriminator_optim.step() else: dis_loss = None if loss is not None: writer.add_scalar("loss", float(loss.item()), total_step) writer.add_scalar("stft_loss", float(stft_loss.item()), total_step) if adv_loss is not None: writer.add_scalar("adv_loss", float(adv_loss.item()), total_step) if dis_loss is not None: writer.add_scalar("dis_loss", float(dis_loss.item()), total_step) writer.add_scalar("real_loss", float(real_loss.item()), total_step) writer.add_scalar("fake_loss", float(fake_loss.item()), total_step) writer.add_scalar("learning_rate", current_lr, total_step) total_step += 1 # running_loss += loss.item() if total_step % eval_per_step == 0: idx = np.random.randint(0, len(dataset.val_wav)) eval_model( total_step, writer, device, model, dataset.get_all_length_data(idx), checkpoint_dir, data_config["mel_config"], ) save_checkpoint( model, optimizer, discriminator, discriminator_optim, total_step, checkpoint_dir, epoch, ) # averaged_loss = running_loss / (len(train_loader)) # writer.add_scalar("loss (per epoch)", averaged_loss, epoch) # print("Loss: {}".format(running_loss / (len(train_loader)))) epoch += 1