Exemplo n.º 1
0
def main():
    np.random.seed(0)

    (train_x, train_y), (valid_x, valid_y) = create_dataset()

    transformer = tf.keras.Model(*juxt(
        identity,
        op(NUM_BLOCKS, D_MODEL, NUM_HEADS, D_FF, X_VOCAB_SIZE, Y_VOCAB_SIZE,
           X_MAXIMUM_POSITION, Y_MAXIMUM_POSITION, DROPOUT_RATE)
    )([tf.keras.Input(shape=(None, )),
       tf.keras.Input(shape=(None, ))]))
    transformer.compile(tf.keras.optimizers.Adam(LearningRateSchedule(D_MODEL),
                                                 beta_1=0.9,
                                                 beta_2=0.98,
                                                 epsilon=1e-9),
                        loss=Loss(),
                        metrics=('accuracy', ))
    transformer.fit((train_x, train_y[:, :-1]),
                    train_y[:, 1:],
                    batch_size=64,
                    epochs=100,
                    validation_data=((valid_x, valid_y[:, :-1]), valid_y[:,
                                                                         1:]))

    transformer.save_weights('./model/transformer_weights')
Exemplo n.º 2
0
def main():
    np.random.seed(0)

    _, (valid_x, valid_y) = create_dataset()

    transformer = tf.keras.Model(*juxt(
        identity,
        op(NUM_BLOCKS, D_MODEL, NUM_HEADS, D_FF, X_VOCAB_SIZE, Y_VOCAB_SIZE,
           X_MAXIMUM_POSITION, Y_MAXIMUM_POSITION, DROPOUT_RATE)
    )([tf.keras.Input(shape=(None, )),
       tf.keras.Input(shape=(None, ))]))
    transformer.compile(tf.keras.optimizers.Adam(LearningRateSchedule(D_MODEL),
                                                 beta_1=0.9,
                                                 beta_2=0.98,
                                                 epsilon=1e-9),
                        loss=Loss(),
                        metrics=('accuracy', ))
    transformer.load_weights('./model/transformer_weights')

    # transformer = tf.keras.models.load_model('./model')  # tf.linalg.band_partが失敗しちゃう。2.4で修正済み。

    c = 0

    for x, y in zip(valid_x, valid_y):
        y_pred = translate(transformer, x)

        print('question:   {}'.format(
            decode(x).replace('^', '').replace('$', '')))
        print('answer:     {}'.format(
            decode(y).replace('^', '').replace('$', '')))
        print('prediction: {}'.format(
            decode(y_pred).replace('^', '').replace('$', '')))

        if np.shape(y_pred) == np.shape(
                y[y != 0]) and all(y_pred == y[y != 0]):
            c += 1
        else:
            print('NG')

        print()

    print('{:0.3f}'.format(c / len(valid_x)))
Exemplo n.º 3
0
def prepare_model(V,
                  P,
                  d_embed,
                  d_lstm,
                  layers,
                  nhead,
                  dropout=.2,
                  lr=.001,
                  l2=0,
                  smoothing=.1,
                  device='cpu'):
    model = TransformerSentiment(V,
                                 P,
                                 d_embed,
                                 d_lstm,
                                 layers,
                                 nhead=nhead,
                                 dropout=dropout).to(device)
    criterion = Loss(smoothing=smoothing, n_classes=150)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2)
    return model, criterion, optimizer
Exemplo n.º 4
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if is_training:
            image, weight, label = inputs
        else:
            image = inputs[0]
        image = self.preprocess(image)
        featuremap = unet3d('unet3d', image) # final upsampled feturemap
        if is_training:
            loss = Loss(featuremap, weight, label)
            wd_cost = regularize_cost(
                    '(?:unet3d)/.*kernel',
                    l2_regularizer(1e-5), name='wd_cost')

            total_cost = tf.add_n([loss, wd_cost], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_probs = tf.nn.softmax(featuremap, name="final_probs") #[b,d,h,w,num_class]
            final_pred = tf.argmax(final_probs, axis=-1, name="final_pred")
Exemplo n.º 5
0
def train():
    scores = []
    for train, test in data:

        model = Model()
        loss = Loss()
        optimizer = Optimizer(model)
        batch_num = 0

        print("\n\nStarting new K-fold")

        for epoch in range(1, config["num_epoch"]):

            print("\n\nStarting epoch", epoch)

            for X, y in train:

                optimizer.zero_grad()

                y_hat = model(X)

                error = loss(y_hat, y)

                if batch_num == 0 or batch_num % config["display_rate"] == 0:
                    if torch.cuda.is_available():
                        cost = "Cost: %.4f" % (error.detach().cpu().numpy())
                        print(cost)
                    else:
                        cost = "Cost: %.4f" % (error.detach().numpy())
                        print(cost)
                batch_num += 1

                error.backward()

                optimizer.step()

            evaluate(model, test)
        scores.append(evaluate(model, test))
    return scores
Exemplo n.º 6
0
def train(args):
    start_time = time.time()
    device = torch.device('cuda' if args.cuda else 'cpu')

    pprint(args.__dict__)
    interface = FileInterface(**args.__dict__)
    piqa_model = Baseline(**args.__dict__).to(device)

    loss_model = Loss().to(device)
    optimizer = torch.optim.Adam(p for p in piqa_model.parameters()
                                 if p.requires_grad)

    batch_size = args.batch_size
    char_vocab_size = args.char_vocab_size
    glove_vocab_size = args.glove_vocab_size
    word_vocab_size = args.word_vocab_size
    glove_size = args.glove_size
    elmo = args.elmo
    draft = args.draft

    def preprocess(interface_):
        # get data
        print('Loading train and dev data')
        train_examples = load_squad(interface_.train_path, draft=draft)
        dev_examples = load_squad(interface_.test_path, draft=draft)

        # iff creating processor
        print('Loading GloVe')
        glove_words, glove_emb_mat = load_glove(
            glove_size,
            vocab_size=args.glove_vocab_size - 2,
            glove_dir=interface_.glove_dir,
            draft=draft)

        print('Constructing processor')
        processor = SquadProcessor(char_vocab_size,
                                   glove_vocab_size,
                                   word_vocab_size,
                                   elmo=elmo)
        processor.construct(train_examples, glove_words)

        # data loader
        print('Preprocessing datasets')
        train_dataset = tuple(
            processor.preprocess(example) for example in train_examples)
        dev_dataset = tuple(
            processor.preprocess(example) for example in dev_examples)

        print('Creating data loaders')
        train_sampler = SquadSampler(train_dataset,
                                     max_context_size=256,
                                     max_question_size=32,
                                     bucket=True,
                                     shuffle=True)
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  collate_fn=processor.collate,
                                  sampler=train_sampler)

        dev_sampler = SquadSampler(dev_dataset, bucket=True)
        dev_loader = DataLoader(dev_dataset,
                                batch_size=batch_size,
                                collate_fn=processor.collate,
                                sampler=dev_sampler)

        if args.preload:
            train_loader = tuple(train_loader)
            dev_loader = tuple(dev_loader)

        out = {
            'glove_emb_mat': glove_emb_mat,
            'processor': processor,
            'train_dataset': train_dataset,
            'dev_dataset': dev_dataset,
            'train_loader': train_loader,
            'dev_loader': dev_loader
        }

        return out

    out = interface.cache(
        preprocess,
        interface_=interface) if args.cache else preprocess(interface)
    glove_emb_mat = out['glove_emb_mat']
    processor = out['processor']
    train_dataset = out['train_dataset']
    dev_dataset = out['dev_dataset']
    train_loader = out['train_loader']
    dev_loader = out['dev_loader']

    print("Initializing model weights")
    piqa_model.load_glove(torch.tensor(glove_emb_mat))

    bind_model(interface, processor, piqa_model, optimizer=optimizer)

    step = 0
    best_report = None

    print('Training')
    piqa_model.train()
    for epoch_idx in range(args.epochs):
        for i, train_batch in enumerate(train_loader):
            train_batch = {
                key: val.to(device)
                for key, val in train_batch.items()
            }
            model_output = piqa_model(step=step, **train_batch)
            train_results = processor.postprocess_batch(
                train_dataset, train_batch, model_output)
            train_loss = loss_model(step=step, **model_output, **train_batch)
            train_f1 = float(
                np.mean([result['f1'] for result in train_results]))
            train_em = float(
                np.mean([result['em'] for result in train_results]))

            # optimize
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            step += 1

            # report & eval & save
            if step % args.report_period == 1:
                report = OrderedDict(step=step,
                                     train_loss=train_loss.item(),
                                     train_f1=train_f1,
                                     train_em=train_em,
                                     time=time.time() - start_time)
                interface.report(**report)
                print(', '.join('%s=%.5r' % (s, r) for s, r in report.items()))

            if step % args.eval_save_period == 1:
                with torch.no_grad():
                    piqa_model.eval()
                    loss_model.eval()
                    pred = {}
                    dev_losses, dev_results = [], []
                    for dev_batch, _ in zip(dev_loader,
                                            range(args.eval_steps)):
                        dev_batch = {
                            key: val.to(device)
                            for key, val in dev_batch.items()
                        }
                        model_output = piqa_model(**dev_batch)
                        results = processor.postprocess_batch(
                            dev_dataset, dev_batch, model_output)

                        dev_loss = loss_model(step=step,
                                              **dev_batch,
                                              **model_output)

                        for result in results:
                            pred[result['id']] = result['pred']
                        dev_results.extend(results)
                        dev_losses.append(dev_loss.item())

                    dev_loss = float(np.mean(dev_losses))
                    dev_f1 = float(
                        np.mean([result['f1'] for result in dev_results]))
                    dev_em = float(
                        np.mean([result['em'] for result in dev_results]))

                    report = OrderedDict(step=step,
                                         dev_loss=dev_loss,
                                         dev_f1=dev_f1,
                                         dev_em=dev_em,
                                         time=time.time() - start_time)
                    summary = False
                    if best_report is None or report['dev_f1'] > best_report[
                            'dev_f1']:
                        best_report = report
                        summary = True
                        interface.save(iteration=step)
                        interface.pred(pred)
                    interface.report(summary=summary, **report)
                    print(
                        ', '.join('%s=%.5r' % (s, r)
                                  for s, r in report.items()),
                        '(dev_f1_best=%.5r @%d)' %
                        (best_report['dev_f1'], best_report['step']))
                    piqa_model.train()
                    loss_model.train()

            if step == args.train_steps:
                break
        if step == args.train_steps:
            break
Exemplo n.º 7
0
valid_features = torch.load('valid_features.pt')  # list of torch
train_vals = torch.load('train_vals.pt')  # list of torchs
valid_vals = torch.load('valid_vals.pt')  # list of torchs

# model, optimzer, loss function
feature_size = 2048
learning_rate = 0.0001
model = LSTM(feature_size).cuda()
model = torch.load("../problem2/best_rnnbased.pth")
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       mode='min',
                                                       factor=0.5,
                                                       patience=5,
                                                       verbose=True)
loss_function = Loss()

for param_group in optimizer.param_groups:
    param_group['lr'] = learning_rate

# some training parameters
BATCH_SIZE = 32
EPOCH_NUM = 500
datalen = len(train_features)
datalen_valid = len(valid_features)
max_accuracy = 0

# start training
model.train()
train_loss = []
valid_acc = []
Exemplo n.º 8
0
from data import DataLoader
from data import Epoch
from model import Model
from model import Loss
from model import Optimizer
from visual import Plot
import calc

data = DataLoader()
model = Model()
loss = Loss()
optimizer = Optimizer(model)

plot = Plot("Baseline")
plot.clear()
plot.line("Loss", "Epoch", "Loss", "loss")
plot.line("Accuracy", "Epoch", "Accuracy (%)", "acc")
plot.line("F1 Score", "Epoch", "F1 Score", "score")
plot.line("Recall", "Epoch", "Recall", "recall")

plot.cm("Confusion Matrix (Train)")
plot.cm("Confusion Matrix (Val)")

for epoch in Epoch():

    print("Epoch", epoch)

    # train model
    for X, y in data.train:
        optimizer.zero_grad()
        y_hat = model(X)
Exemplo n.º 9
0
def main():
    # Configurations
    lr = 0.00000001  # learning rate
    batch_size = 64  # batch_size
    last_epoch = 1  # the last training epoch. (defulat: 1)
    max_epoch = 553  # maximum epoch for the training.

    num_boxes = 2  # the number of boxes for each grid in Yolo v1.
    num_classes = 20  # the number of classes in Pascal VOC Detection.
    grid_size = 7  # 3x224x224 image is reduced to (5*num_boxes+num_classes)x7x7.
    lambda_coord = 7  # weight for coordinate regression loss.
    lambda_noobj = 0.5  # weight for no-objectness confidence loss.
    """ dataset load """
    train_dset = VOCDetection(root=data_root, split='train')
    train_dloader = DataLoader(train_dset,
                               batch_size=batch_size,
                               shuffle=True,
                               drop_last=True,
                               num_workers=8)
    #drop_last 마지막 애매하게 남는 데이터들은 버림
    test_dset = VOCDetection(root=data_root, split='test')
    test_dloader = DataLoader(test_dset,
                              batch_size=batch_size,
                              shuffle=False,
                              drop_last=False,
                              num_workers=8)
    """ model load """
    model = Yolo(grid_size, num_boxes, num_classes)
    #model = nn.DataParallel(model, device_ids = [5,6,7])
    model = model.to(DEVICE)

    #pretrained_weights = torch.load(pretrained_backbone_path)
    #model.load_state_dict(pretrained_weights)
    """ optimizer / loss """
    model.features.requires_grad_(False)
    model_params = [v for v in model.parameters() if v.requires_grad is True]
    optimizer = optim.Adam(model_params, lr=lr, betas=[0.9, 0.999])
    # Load the last checkpoint if exits.
    ckpt_path = os.path.join(ckpt_dir, 'last_best.pth')
    if os.path.exists(ckpt_path):
        ckpt = torch.load(ckpt_path, map_location='cuda:3')
        model.load_state_dict(ckpt['model'])
        optimizer.load_state_dict(ckpt['optimizer'])
        last_epoch = ckpt['epoch'] + 1
        print('Last checkpoint is loaded. start_epoch:', last_epoch)
    else:
        print('No checkpoint is found.')

    Yolov1Loss = Loss(7, 2, 20)
    #ckpt_path = os.path.join(ckpt_dir, 'last_best.pth')
    """ training """
    # Training & Testing.
    model = model.to(DEVICE)
    best_loss = 1
    for epoch in range(1, max_epoch):
        step = 0
        # Learning rate scheduling
        if epoch in [50, 150, 550, 600]:
            lr *= 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        if epoch < last_epoch:
            continue

        model.train()
        for x, y in train_dloader:
            step += 1
            imgs = Variable(x)
            gt_outs = Variable(y)
            imgs, gt_outs = imgs.to(DEVICE), gt_outs.to(DEVICE)
            model_outs = model(imgs)
            loss = Yolov1Loss(model_outs, gt_outs)

            if loss < best_loss:
                best_loss = loss
                ckpt = {
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'epoch': epoch
                }
                torch.save(ckpt, ckpt_path)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print('step:{}/{} | loss:{:.8f}'.format(step, len(train_dloader),
                                                    loss.item()))

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for x, y in test_dloader:
                imgs = Variable(x)
                gt_outs = Variable(y)
                imgs, gt_outs = imgs.to(DEVICE), gt_outs.to(DEVICE)

                model_outs = model(imgs)
                loss = Yolov1Loss(model_outs, gt_outs)
                loss_iter = loss.item()
            print('Epoch [%d/%d], Val Loss: %.4f' %
                  (epoch, max_epoch, loss_iter))

        ckpt = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch
        }
        torch.save(ckpt, ckpt_path)
    ''' test '''

    test_image_dir = os.path.join(root, 'test_images')
    image_path_list = [
        os.path.join(test_image_dir, path)
        for path in os.listdir(test_image_dir)
    ]

    for image_path in image_path_list:
        inference(model, image_path)
Exemplo n.º 10
0
def train_net(args, writer, dtype='train'):
    is_shuffle = dtype == 'train'
    dataloader = data.DataLoader(Dataset(num_spixel=100, patch_size=[200, 200], root=args.root_dir, dtype=dtype),
                                 batch_size=16, shuffle=is_shuffle, num_workers=4)

    # build model
    model = create_ssn_net(num_spixels=100, num_iter=args.num_steps, num_spixels_h=10, num_spixels_w=10, dtype=dtype)
    # loss function
    criten = Loss()

    device = torch.device('cpu')
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model)
        model.cuda()
        device = torch.device('cuda')
    optim = torch.optim.Adam(model.parameters(), lr=args.l_rate)

    if dtype == 'train' or dtype == 'test':
        if dtype == 'train':
            model.train()
            logger = loss_logger()
            for epoch in range(100000):
                logger.clear()
                for iter, [inputs, num_h, num_w, init_index, cir_index, p2sp_index_, invisible] in enumerate(dataloader):
                    with torch.autograd.set_detect_anomaly(True):
                        t0 = time.time()
                        img = inputs['img'].to(device)
                        label = inputs['label'].to(device)
                        problabel = inputs['problabel'].to(device)
                        num_h = num_h.to(device)
                        num_w = num_w.to(device)
                        init_index = [x.to(device) for x in init_index]
                        cir_index = [x.to(device) for x in cir_index]
                        p2sp_index_ = p2sp_index_.to(device)
                        invisible = invisible.to(device)

                        t1 = time.time()
                        recon_feat2, recon_label = model(img, p2sp_index_, invisible, init_index, cir_index, problabel, num_h, num_w, device)
                        loss, loss_1, loss_2 = criten(recon_feat2, img, recon_label, label)
                        t2 = time.time()

                        # optimizer
                        optim.zero_grad()
                        loss.backward()
                        optim.step()
                        t3 = time.time()
                        print(f'epoch:{epoch}, iter:{iter}, total_loss:{loss}, pos_loss:{loss_1}, rec_loss:{loss_2}')
                        print(f'forward time:{t2-t1:.3f}, backward time:{t3-t2:.3f}, total time:{t3-t0:.3f}')
                        logger.add(loss.data, loss_1.data, loss_2.data)

                logger.ave()
                writer.add_scalar('train/total_loss', logger.loss, epoch)
                writer.add_scalar('train/pos_loss', logger.loss1, epoch)
                writer.add_scalar('train/rec_loss', logger.loss2, epoch)

                if epoch % 100 == 0 and epoch != 0:
                    torch.save(model.state_dict(), f'./checkpoints/checkpoints/{epoch}_{loss:.3f}_model.pt')
        else:
            pass

    else:
        pass
Exemplo n.º 11
0
def compute_spixels(num_spixel, num_steps, pre_model, out_folder):

    if not os.path.exists(out_folder):
        os.makedirs(out_folder)
        # os.makedirs(out_folder+'png')
        # os.makedirs(out_folder + 'mat')

    dtype = 'test'
    dataloader = data.DataLoader(Dataset_T(num_spixel=num_spixel),
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=1)
    model = create_ssn_net(num_spixels=num_spixel,
                           num_iter=num_steps,
                           num_spixels_h=10,
                           num_spixels_w=10,
                           dtype=dtype,
                           ssn=0)
    model = torch.nn.DataParallel(model)
    if pre_model is not None:
        if torch.cuda.is_available():
            model.load_state_dict(torch.load(pre_model))
        else:
            model.load_state_dict(torch.load(pre_model, map_location='cpu'))
    else:
        raise ('no model')
    criten = Loss()
    device = torch.device('cpu')
    if torch.cuda.is_available():
        model.cuda()
        device = torch.device('cuda')
    for iter, [
            inputs, num_h, num_w, init_index, cir_index, p2sp_index_,
            invisible, file_name
    ] in enumerate(dataloader):
        with torch.no_grad():
            img = inputs['img'].to(device)
            label = inputs['label'].to(device)
            problabel = inputs['problabel'].to(device)
            num_h = num_h.to(device)
            num_w = num_w.to(device)
            init_index = [x.to(device) for x in init_index]
            cir_index = [x.to(device) for x in cir_index]
            p2sp_index_ = p2sp_index_.to(device)
            invisible = invisible.to(device)
            recon_feat2, recon_label, new_spix_indices = model(
                img, p2sp_index_, invisible, init_index, cir_index, problabel,
                num_h, num_w, device)
            # loss, loss_1, loss_2 = criten(recon_feat2, img, recon_label, label)

            given_img = np.asarray(Image.open(file_name[0]))
            h, w = given_img.shape[0], given_img.shape[1]
            new_spix_indices = new_spix_indices[:, :h, :w].contiguous()
            spix_index = new_spix_indices.cpu().numpy()[0]
            spix_index = spix_index.astype(int)

            if enforce_connectivity:
                segment_size = (given_img.shape[0] * given_img.shape[1]) / (
                    int(num_h * num_w) * 1.0)
                min_size = int(0.06 * segment_size)
                max_size = int(3 * segment_size)
                spix_index = enforce_connectivity(spix_index[np.newaxis, :, :],
                                                  min_size, max_size)[0]
            # given_img_ = np.zeros([spix_index.shape[0], spix_index.shape[1], 3], dtype=np.int)
            # h, w = given_img.shape[0], given_img.shape[1]
            # given_img_[:h, :w] = given_img

            counter_image = np.zeros_like(given_img)
            counter_image = get_spixel_image(counter_image, spix_index)
            spixel_image = get_spixel_image(given_img, spix_index)

            imgname = file_name[0].split('/')[-1][:-4]
            out_img_file = out_folder + imgname + '_bdry_.jpg'
            imageio.imwrite(out_img_file, spixel_image)
            # out_file = out_folder + imgname + '.npy'
            # np.save(out_file, spix_index)

            # validation code only for sp_pix 400
            # out_file_mat = out_folder + 'mat/'+ imgname + '.mat'
            # scio.savemat(out_file_mat, {'segs': spix_index})

            # out_count_file = out_folder + 'png/' + imgname + '.png'
            # imageio.imwrite(out_count_file, counter_image)
            print(iter)
Exemplo n.º 12
0
def train(
    dataset,
    train_loader,
    checkpoint_dir,
    log_event_path,
    nepochs,
    learning_rate,
    eval_per_step,
    generator_step,
    discriminator_step,
    lambda_adv,
    checkpoint_path,
    seed,
):
    torch.manual_seed(seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    criterion = Loss(device, **loss_config)

    # Model
    model = Model(**network_config["nsf_config"]).to(device)
    discriminator = Discriminator(
        **network_config["discriminator_config"]).to(device)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    discriminator_optim = optim.Adam(discriminator.parameters(),
                                     lr=learning_rate)

    writer = SummaryWriter(log_event_path)

    # train
    epoch = 1
    total_step = 0
    current_lr = learning_rate

    os.makedirs(checkpoint_dir, exist_ok=True)

    if checkpoint_path != "":
        model, discriminator, total_step, epoch = load_checkpoint(
            checkpoint_path, model, optimizer, discriminator,
            discriminator_optim)
        current_lr = optimizer.param_groups[0]["lr"]
    while epoch <= nepochs:
        running_loss = 0
        print("{}epoch:".format(epoch))
        for step, (wav, mel, f0) in tqdm(enumerate(train_loader)):
            model.train()
            discriminator.train()
            # configから操作できるようにはしたい
            if total_step > 0 and current_lr > 1e-6 and total_step % 100000 == 0:
                current_lr = current_lr / 2
                for g_param_group, d_param_group in zip(
                        optimizer.param_groups,
                        discriminator_optim.param_groups):
                    g_param_group["lr"] = current_lr
                    d_param_group["lr"] = current_lr
            optimizer.zero_grad()
            discriminator_optim.zero_grad()

            wav, mel, f0 = wav.to(device), mel.to(device), f0.to(device)

            # Generator
            if (total_step < generator_step
                    or total_step > generator_step + discriminator_step):
                outputs = model(mel, f0)

                stft_loss = criterion.stft_loss(outputs[:, :wav.size(-1)], wav)
                if total_step < generator_step:
                    loss = stft_loss
                    adv_loss = None
                else:
                    adv = discriminator(outputs.unsqueeze(1))
                    adv_loss = criterion.adversarial_loss(adv)
                    loss = stft_loss + lambda_adv * adv_loss
                loss.backward()
                optimizer.step()
            else:
                loss = None
                stft_loss = None
                adv_loss = None

            # Discriminator
            if total_step > generator_step:
                with torch.no_grad():
                    outputs = model(mel, f0)
                real = discriminator(wav.unsqueeze(1))
                fake = discriminator(outputs.unsqueeze(1).detach())
                real_loss, fake_loss = criterion.discriminator_loss(real, fake)
                dis_loss = real_loss + fake_loss
                dis_loss.backward()
                discriminator_optim.step()
            else:
                dis_loss = None

            if loss is not None:
                writer.add_scalar("loss", float(loss.item()), total_step)
                writer.add_scalar("stft_loss", float(stft_loss.item()),
                                  total_step)
            if adv_loss is not None:
                writer.add_scalar("adv_loss", float(adv_loss.item()),
                                  total_step)
            if dis_loss is not None:
                writer.add_scalar("dis_loss", float(dis_loss.item()),
                                  total_step)
                writer.add_scalar("real_loss", float(real_loss.item()),
                                  total_step)
                writer.add_scalar("fake_loss", float(fake_loss.item()),
                                  total_step)
            writer.add_scalar("learning_rate", current_lr, total_step)
            total_step += 1
            # running_loss += loss.item()

            if total_step % eval_per_step == 0:
                idx = np.random.randint(0, len(dataset.val_wav))
                eval_model(
                    total_step,
                    writer,
                    device,
                    model,
                    dataset.get_all_length_data(idx),
                    checkpoint_dir,
                    data_config["mel_config"],
                )
                save_checkpoint(
                    model,
                    optimizer,
                    discriminator,
                    discriminator_optim,
                    total_step,
                    checkpoint_dir,
                    epoch,
                )

        # averaged_loss = running_loss / (len(train_loader))
        # writer.add_scalar("loss (per epoch)", averaged_loss, epoch)
        # print("Loss: {}".format(running_loss / (len(train_loader))))
        epoch += 1