示例#1
0
def loss_plot(in_path, out_path):
    """
    Generate plot of train and valid loss from log
    at <in_path>. Store .png at <out_path>
    """
    df = parse_log(in_path)

    fig = plt.figure(figsize=(8, 5))
    plt.title(in_path)
    plt.xlabel('Time Step')
    plt.ylabel('Loss')
    plt.grid(which='both')

    train = df[~df['train_loss'].isnull()]
    plt.plot(train['time_step'].values,
             train['train_loss'].values,
             color='green',
             label='Train')

    valid = df[~df['valid_loss'].isnull()]
    plt.plot(valid['time_step'].values,
             valid['valid_loss'].values,
             color='orange',
             label='Valid')

    plt.legend(loc='upper right')
    utils.create_dir_if_not_exists(out_path)
    plt.savefig(out_path)
示例#2
0
def note_sequence_to_midi_file(note_sequence, path):
    """
    Save <note_sequence> to .midi file at <path>
    """
    create_dir_if_not_exists(path)
    mm.sequence_proto_to_midi_file(note_sequence, path)
示例#3
0
    def train():
        # Turn on training mode which enables dropout.
        global train_step, train_loss, best_val_loss, eval_start_time, log_start_time
        model.train()
        if model_conf['batch_chunk'] > 1:
            mems = [tuple() for _ in range(model_conf['batch_chunk'])]
        else:
            mems = tuple()
        train_iter = tr_iter.get_varlen_iter(
        ) if model_conf['varlen'] else tr_iter
        for batch, (data, target, seq_len) in enumerate(train_iter):
            model.zero_grad()
            if model_conf['batch_chunk'] > 1:
                data_chunks = torch.chunk(data, model_conf['batch_chunk'], 1)
                target_chunks = torch.chunk(target, model_conf['batch_chunk'],
                                            1)
                for i in range(model_conf['batch_chunk']):
                    data_i = data_chunks[i].contiguous()
                    target_i = target_chunks[i].contiguous()
                    ret = para_model(data_i, target_i, *mems[i])
                    loss, mems[i] = ret[0], ret[1:]
                    loss = loss.float().mean().type_as(
                        loss) / model_conf['batch_chunk']
                    if model_conf['fp16']:
                        optimizer.backward(loss)
                    else:
                        loss.backward()
                    train_loss += loss.float().item()
            else:
                ret = para_model(data, target, *mems)
                loss, mems = ret[0], ret[1:]
                loss = loss.float().mean().type_as(loss)
                if model_conf['fp16']:
                    optimizer.backward(loss)
                else:
                    loss.backward()
                train_loss += loss.float().item()

            if model_conf['fp16']:
                optimizer.clip_master_grads(model_conf['clip'])
            else:
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               model_conf['clip'])

            optimizer.step()
            if model_conf['sample_softmax'] > 0:
                optimizer_sparse.step()

            # step-wise learning rate annealing
            train_step += 1
            if model_conf['scheduler'] in ['cosine', 'constant', 'dev_perf']:
                # linear warmup stage
                if train_step < model_conf['warmup_steps']:
                    curr_lr = model_conf[
                        'learning_rate'] * train_step / model_conf[
                            'warmup_steps']
                    optimizer.param_groups[0]['lr'] = curr_lr
                    if model_conf['sample_softmax'] > 0:
                        optimizer_sparse.param_groups[0][
                            'learning_rate'] = curr_lr * 2
                else:
                    if model_conf['scheduler'] == 'cosine':
                        scheduler.step(train_step)
                        if model_conf['sample_softmax'] > 0:
                            scheduler_sparse.step(train_step)
            elif model_conf['scheduler'] == 'inv_sqrt':
                scheduler.step(train_step)

            if train_step % model_conf['log_interval'] == 0:
                cur_loss = train_loss / model_conf['log_interval']
                elapsed = time.time() - log_start_time
                log_str = '| epoch {:3d} step {:>8d} | {:>6d} batches | lr {:.3g} ' \
                          '| ms/batch {:5.2f} | loss {:5.2f}'.format(
                    epoch, train_step, batch+1, optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / model_conf['log_interval'], cur_loss)
                log_str += ' | ppl {:9.3f}'.format(math.exp(cur_loss))
                logging(log_str)
                train_loss = 0
                log_start_time = time.time()

            if train_step == 1 or train_step % model_conf['eval_interval'] == 0:
                val_loss = evaluate(va_iter)
                logging('-' * 100)
                log_str = '| Eval {:3d} at step {:>8d} | time: {:5.2f}s ' \
                          '| valid loss {:5.2f}'.format(
                    train_step // model_conf['eval_interval'], train_step,
                    (time.time() - eval_start_time), val_loss)
                log_str += ' | valid ppl {:9.3f}'.format(math.exp(val_loss))
                logging(log_str)
                logging('-' * 100)
                # Save the model if the validation loss is the best we've seen so far.
                if not best_val_loss or val_loss < best_val_loss:
                    create_dir_if_not_exists(
                        os.path.join(model_conf['work_dir'],
                                     f'train_step_{train_step}', ''))
                    if not model_conf['debug']:
                        with open(
                                os.path.join(model_conf['work_dir'],
                                             f'train_step_{train_step}',
                                             'model.pt'), 'wb') as f:
                            torch.save(model, f)
                        with open(
                                os.path.join(model_conf['work_dir'],
                                             f'train_step_{train_step}',
                                             'optimizer.pt'), 'wb') as f:
                            torch.save(optimizer.state_dict(), f)
                    best_val_loss = val_loss

                # dev-performance based learning rate annealing
                if model_conf['scheduler'] == 'dev_perf':
                    scheduler.step(val_loss)
                    if model_conf['sample_softmax'] > 0:
                        scheduler_sparse.step(val_loss)

                eval_start_time = time.time()

            if train_step == model_conf['max_step']:
                break
示例#4
0
def model_main(conf, pitch_classes, time_steps_vocab):
    """
    Run model pipeline from setup specified in <conf>

    Params
    ======
    conf: dict
        config from conf/train_conf.yaml 
    pitch_classes: dict
        Dict of drum pitch mappings (from conf/drum_pitches.yaml)
    time_steps_vocab: dict
        Dict of tick:token mappings (from conf/time_steps_vocab.yaml)
    """
    model_conf = conf['model']
    data_conf = conf['data']

    if model_conf['d_embed'] < 0:
        model_conf['d_embed'] = model_conf['d_model']

    assert model_conf[
        'ext_len'] >= 0, 'extended context length must be non-negative'
    assert model_conf['train_batch_size'] % model_conf['batch_chunk'] == 0

    model_conf['work_dir'] = '{}-{}'.format(model_conf['work_dir'],
                                            data_conf['dataset'])
    model_conf['work_dir'] = os.path.join(model_conf['work_dir'],
                                          time.strftime('%Y%m%d-%H%M%S'))
    #logging = create_exp_dir(model_conf['work_dir'],
    #   scripts_to_save=['train.py', 'mem_transformer.py'], debug=model_conf['debug'])
    logging = create_exp_dir(model_conf['work_dir'],
                             scripts_to_save=None,
                             debug=model_conf['debug'])

    # Set the random seed manually for reproducibility.
    #np.random.seed(model_conf['seed'])
    #torch.manual_seed(model_conf['seed'])
    if torch.cuda.is_available():
        if not model_conf['cuda']:
            print(
                'WARNING: You have a CUDA device, so you should probably run with --cuda'
            )
        else:
            pass
            #torch.cuda.manual_seed_all(model_conf['seed'])

    # Validate `--fp16` option
    if model_conf['fp16']:
        if not model_conf['cuda']:
            print('WARNING: --fp16 requires --cuda, ignoring --fp16 option')
            model_conf['fp16'] = False
        else:
            try:
                from apex.fp16_utils import FP16_Optimizer
            except:
                print('WARNING: apex not installed, ignoring --fp16 option')
                model_conf['fp16'] = False

    device = torch.device('cuda' if model_conf['cuda'] else 'cpu')

    ###############################################################################
    # Load data
    ###############################################################################
    corpus = get_corpus(data_conf['dataset'], data_conf['data_dir'],
                        pitch_classes, time_steps_vocab, conf['processing'])
    ntokens = corpus.vocab_size
    model_conf['n_token'] = ntokens

    cutoffs, tie_projs = [], [False]

    eval_batch_size = 10
    tr_iter = corpus.get_iterator('train',
                                  model_conf['train_batch_size'],
                                  model_conf['tgt_len'],
                                  device=device,
                                  ext_len=model_conf['ext_len'])
    va_iter = corpus.get_iterator('valid',
                                  eval_batch_size,
                                  model_conf['tgt_len'],
                                  device=device,
                                  ext_len=model_conf['ext_len'])
    te_iter = corpus.get_iterator('test',
                                  eval_batch_size,
                                  model_conf['tgt_len'],
                                  device=device,
                                  ext_len=model_conf['ext_len'])

    ###############################################################################
    # Build the model
    ###############################################################################
    def init_weight(weight):
        if model_conf['init'] == 'uniform':
            nn.init.uniform_(weight, -model_conf['init_range'],
                             model_conf['init_range'])
        elif model_conf['init'] == 'normal':
            nn.init.normal_(weight, 0.0, model_conf['init_std'])

    def init_bias(bias):
        nn.init.constant_(bias, 0.0)

    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Linear') != -1:
            if hasattr(m, 'weight') and m.weight is not None:
                init_weight(m.weight)
            if hasattr(m, 'bias') and m.bias is not None:
                init_bias(m.bias)
        elif classname.find('AdaptiveEmbedding') != -1:
            if hasattr(m, 'emb_projs'):
                for i in range(len(m.emb_projs)):
                    if m.emb_projs[i] is not None:
                        nn.init.normal_(m.emb_projs[i], 0.0,
                                        model_conf['proj_init_std'])
        elif classname.find('Embedding') != -1:
            if hasattr(m, 'weight'):
                init_weight(m.weight)
        elif classname.find('ProjectedAdaptiveLogSoftmax') != -1:
            if hasattr(m, 'cluster_weight') and m.cluster_weight is not None:
                init_weight(m.cluster_weight)
            if hasattr(m, 'cluster_bias') and m.cluster_bias is not None:
                init_bias(m.cluster_bias)
            if hasattr(m, 'out_projs'):
                for i in range(len(m.out_projs)):
                    if m.out_projs[i] is not None:
                        nn.init.normal_(m.out_projs[i], 0.0,
                                        model_conf['proj_init_std'])
        elif classname.find('LayerNorm') != -1:
            if hasattr(m, 'weight'):
                nn.init.normal_(m.weight, 1.0, model_conf['init_std'])
            if hasattr(m, 'bias') and m.bias is not None:
                init_bias(m.bias)
        elif classname.find('TransformerLM') != -1:
            if hasattr(m, 'r_emb'):
                init_weight(m.r_emb)
            if hasattr(m, 'r_w_bias'):
                init_weight(m.r_w_bias)
            if hasattr(m, 'r_r_bias'):
                init_weight(m.r_r_bias)
            if hasattr(m, 'r_bias'):
                init_bias(m.r_bias)

    def update_dropout(m):
        classname = m.__class__.__name__
        if classname.find('Dropout') != -1:
            if hasattr(m, 'p'):
                m.p = model_conf['dropout']

    def update_dropatt(m):
        if hasattr(m, 'dropatt'):
            m.dropatt.p = model_conf['dropatt']

    if model_conf['restart']:
        with open(os.path.join(model_conf['restart_dir'], 'model.pt'),
                  'rb') as f:
            model = torch.load(f)
        if not model_conf['fp16']:
            model = model.float()
        model.apply(update_dropout)
        model.apply(update_dropatt)
    else:
        model = MemTransformerLM(ntokens,
                                 model_conf['n_layer'],
                                 model_conf['n_head'],
                                 model_conf['d_model'],
                                 model_conf['d_head'],
                                 model_conf['d_inner'],
                                 model_conf['dropout'],
                                 model_conf['dropatt'],
                                 tie_weight=model_conf['not_tied'],
                                 d_embed=model_conf['d_embed'],
                                 div_val=model_conf['div_val'],
                                 tie_projs=tie_projs,
                                 pre_lnorm=model_conf['pre_lnorm'],
                                 tgt_len=model_conf['tgt_len'],
                                 ext_len=model_conf['ext_len'],
                                 mem_len=model_conf['mem_len'],
                                 cutoffs=cutoffs,
                                 same_length=model_conf['same_length'],
                                 attn_type=model_conf['attn_type'],
                                 clamp_len=model_conf['clamp_len'],
                                 sample_softmax=model_conf['sample_softmax'])
        model.apply(weights_init)
        model.word_emb.apply(
            weights_init
        )  # ensure embedding init is not overridden by out_layer in case of weight sharing
    model_conf['n_all_param'] = sum([p.nelement() for p in model.parameters()])
    model_conf['n_nonemb_param'] = sum(
        [p.nelement() for p in model.layers.parameters()])

    if model_conf['fp16']:
        model = model.half()

    if model_conf['multi_gpu']:
        model = model.to(device)
        if model_conf['gpu0_bsz'] >= 0:
            para_model = BalancedDataParallel(model_conf['gpu0_bsz'] //
                                              model_conf['batch_chunk'],
                                              model,
                                              dim=1).to(device)
        else:
            para_model = nn.DataParallel(model, dim=1).to(device)
    else:
        para_model = model.to(device)

    #### optimizer
    if model_conf['optim'].lower() == 'sgd':
        if model_conf['sample_softmax'] > 0:
            dense_params, sparse_params = [], []
            for param in model.parameters():
                if param.size() == model.word_emb.weight.size():
                    sparse_params.append(param)
                else:
                    dense_params.append(param)
            optimizer_sparse = optim.SGD(sparse_params,
                                         lr=model_conf['learning_rate'] * 2)
            optimizer = optim.SGD(dense_params,
                                  lr=model_conf['learning_rate'],
                                  momentum=model_conf['mom'])
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=model_conf['learning_rate'],
                                  momentum=model_conf['mom'])
    elif model_conf['optim'].lower() == 'adam':
        if model_conf['sample_softmax'] > 0:
            dense_params, sparse_params = [], []
            for param in model.parameters():
                if param.size() == model.word_emb.weight.size():
                    sparse_params.append(param)
                else:
                    dense_params.append(param)
            optimizer_sparse = optim.SparseAdam(sparse_params,
                                                lr=model_conf['learning_rate'])
            optimizer = optim.Adam(dense_params,
                                   lr=model_conf['learning_rate'])
        else:
            optimizer = optim.Adam(model.parameters(),
                                   lr=model_conf['learning_rate'])
    elif model_conf['optim'].lower() == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=model_conf['learning_rate'])

    #### scheduler
    if model_conf['scheduler'] == 'cosine':
        # here we do not set eta_min to lr_min to be backward compatible
        # because in previous versions eta_min is default to 0
        # rather than the default value of lr_min 1e-6
        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, model_conf['max_step'],
            eta_min=model_conf['eta_min'])  # should use eta_min arg
        if model_conf['sample_softmax'] > 0:
            scheduler_sparse = optim.lr_scheduler.CosineAnnealingLR(
                optimizer_sparse,
                model_conf['max_step'],
                eta_min=model_conf['eta_min'])  # should use eta_min arg
    elif model_conf['scheduler'] == 'inv_sqrt':
        # originally used for Transformer (in Attention is all you need)
        def lr_lambda(step):
            # return a multiplier instead of a learning rate
            if step == 0 and model_conf['warmup_steps'] == 0:
                return 1.
            else:
                return 1. / (step ** 0.5) if step > model_conf['warmup_steps'] \
                       else step / (model_conf['warmup_steps'] ** 1.5)

        scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)
    elif model_conf['scheduler'] == 'dev_perf':
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            factor=model_conf['decay_rate'],
            patience=model_conf['patience'],
            min_lr=model_conf['lr_min'])
        if model_conf['sample_softmax'] > 0:
            scheduler_sparse = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer_sparse,
                factor=model_conf['decay_rate'],
                patience=model_conf['patience'],
                min_lr=model_conf['lr_min'])
    elif model_conf['scheduler'] == 'constant':
        pass

    if model_conf['cuda'] and model_conf['fp16']:
        # If model_conf['dynamic_loss_scale'] is False, static_loss_scale will be used.
        # If model_conf['dynamic_loss_scale'] is True, it will take precedence over static_loss_scale.
        optimizer = FP16_Optimizer(
            optimizer,
            static_loss_scale=model_conf['static_loss_scale'],
            dynamic_loss_scale=model_conf['dynamic_loss_scale'],
            dynamic_loss_args={'init_scale': 2**16})

    if model_conf['restart']:
        if os.path.exists(
                os.path.join(model_conf['restart_dir'], 'optimizer.pt')):
            with open(os.path.join(model_conf['restart_dir'], 'optimizer.pt'),
                      'rb') as f:
                opt_state_dict = torch.load(f)
                optimizer.load_state_dict(opt_state_dict)
        else:
            print('Optimizer was not saved. Start from scratch.')

    logging('=' * 100)
    for k, v in model_conf.items():
        logging('    - {} : {}'.format(k, v))
    logging('=' * 100)
    logging('#params = {}'.format(model_conf['n_all_param']))
    logging('#non emb params = {}'.format(model_conf['n_nonemb_param']))

    ###############################################################################
    # Training code
    ###############################################################################

    def evaluate(eval_iter):
        # Turn on evaluation mode which disables dropout.
        model.eval()

        # If the model does not use memory at all, make the ext_len longer.
        # Otherwise, make the mem_len longer and keep the ext_len the same.
        if model_conf['mem_len'] == 0:
            model.reset_length(
                model_conf['eval_tgt_len'], model_conf['ext_len'] +
                model_conf['tgt_len'] - model_conf['eval_tgt_len'],
                model_conf['mem_len'])
        else:
            model.reset_length(
                model_conf['eval_tgt_len'], model_conf['ext_len'],
                model_conf['mem_len'] + model_conf['tgt_len'] -
                model_conf['eval_tgt_len'])

        # Evaluation
        total_len, total_loss = 0, 0.
        with torch.no_grad():
            mems = tuple()
            for i, (data, target, seq_len) in enumerate(eval_iter):
                if model_conf['max_eval_steps'] > 0 and i >= model_conf[
                        'max_eval_steps']:
                    break
                ret = model(data, target, *mems)
                loss, mems = ret[0], ret[1:]
                loss = loss.mean()
                total_loss += seq_len * loss.float().item()
                total_len += seq_len

        # Switch back to the training mode
        model.reset_length(model_conf['tgt_len'], model_conf['ext_len'],
                           model_conf['mem_len'])
        model.train()

        return total_loss / total_len

    def train():
        # Turn on training mode which enables dropout.
        global train_step, train_loss, best_val_loss, eval_start_time, log_start_time
        model.train()
        if model_conf['batch_chunk'] > 1:
            mems = [tuple() for _ in range(model_conf['batch_chunk'])]
        else:
            mems = tuple()
        train_iter = tr_iter.get_varlen_iter(
        ) if model_conf['varlen'] else tr_iter
        for batch, (data, target, seq_len) in enumerate(train_iter):
            model.zero_grad()
            if model_conf['batch_chunk'] > 1:
                data_chunks = torch.chunk(data, model_conf['batch_chunk'], 1)
                target_chunks = torch.chunk(target, model_conf['batch_chunk'],
                                            1)
                for i in range(model_conf['batch_chunk']):
                    data_i = data_chunks[i].contiguous()
                    target_i = target_chunks[i].contiguous()
                    ret = para_model(data_i, target_i, *mems[i])
                    loss, mems[i] = ret[0], ret[1:]
                    loss = loss.float().mean().type_as(
                        loss) / model_conf['batch_chunk']
                    if model_conf['fp16']:
                        optimizer.backward(loss)
                    else:
                        loss.backward()
                    train_loss += loss.float().item()
            else:
                ret = para_model(data, target, *mems)
                loss, mems = ret[0], ret[1:]
                loss = loss.float().mean().type_as(loss)
                if model_conf['fp16']:
                    optimizer.backward(loss)
                else:
                    loss.backward()
                train_loss += loss.float().item()

            if model_conf['fp16']:
                optimizer.clip_master_grads(model_conf['clip'])
            else:
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               model_conf['clip'])

            optimizer.step()
            if model_conf['sample_softmax'] > 0:
                optimizer_sparse.step()

            # step-wise learning rate annealing
            train_step += 1
            if model_conf['scheduler'] in ['cosine', 'constant', 'dev_perf']:
                # linear warmup stage
                if train_step < model_conf['warmup_steps']:
                    curr_lr = model_conf[
                        'learning_rate'] * train_step / model_conf[
                            'warmup_steps']
                    optimizer.param_groups[0]['lr'] = curr_lr
                    if model_conf['sample_softmax'] > 0:
                        optimizer_sparse.param_groups[0][
                            'learning_rate'] = curr_lr * 2
                else:
                    if model_conf['scheduler'] == 'cosine':
                        scheduler.step(train_step)
                        if model_conf['sample_softmax'] > 0:
                            scheduler_sparse.step(train_step)
            elif model_conf['scheduler'] == 'inv_sqrt':
                scheduler.step(train_step)

            if train_step % model_conf['log_interval'] == 0:
                cur_loss = train_loss / model_conf['log_interval']
                elapsed = time.time() - log_start_time
                log_str = '| epoch {:3d} step {:>8d} | {:>6d} batches | lr {:.3g} ' \
                          '| ms/batch {:5.2f} | loss {:5.2f}'.format(
                    epoch, train_step, batch+1, optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / model_conf['log_interval'], cur_loss)
                log_str += ' | ppl {:9.3f}'.format(math.exp(cur_loss))
                logging(log_str)
                train_loss = 0
                log_start_time = time.time()

            if train_step == 1 or train_step % model_conf['eval_interval'] == 0:
                val_loss = evaluate(va_iter)
                logging('-' * 100)
                log_str = '| Eval {:3d} at step {:>8d} | time: {:5.2f}s ' \
                          '| valid loss {:5.2f}'.format(
                    train_step // model_conf['eval_interval'], train_step,
                    (time.time() - eval_start_time), val_loss)
                log_str += ' | valid ppl {:9.3f}'.format(math.exp(val_loss))
                logging(log_str)
                logging('-' * 100)
                # Save the model if the validation loss is the best we've seen so far.
                if not best_val_loss or val_loss < best_val_loss:
                    create_dir_if_not_exists(
                        os.path.join(model_conf['work_dir'],
                                     f'train_step_{train_step}', ''))
                    if not model_conf['debug']:
                        with open(
                                os.path.join(model_conf['work_dir'],
                                             f'train_step_{train_step}',
                                             'model.pt'), 'wb') as f:
                            torch.save(model, f)
                        with open(
                                os.path.join(model_conf['work_dir'],
                                             f'train_step_{train_step}',
                                             'optimizer.pt'), 'wb') as f:
                            torch.save(optimizer.state_dict(), f)
                    best_val_loss = val_loss

                # dev-performance based learning rate annealing
                if model_conf['scheduler'] == 'dev_perf':
                    scheduler.step(val_loss)
                    if model_conf['sample_softmax'] > 0:
                        scheduler_sparse.step(val_loss)

                eval_start_time = time.time()

            if train_step == model_conf['max_step']:
                break

    # Loop over epochs.
    train_step = 0
    train_loss = 0
    best_val_loss = None

    log_start_time = time.time()
    eval_start_time = time.time()

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in itertools.count(start=1):
            train()
            if train_step == model_conf['max_step']:
                logging('-' * 100)
                logging('End of training')
                break
    except KeyboardInterrupt:
        logging('-' * 100)
        logging('Exiting from training early')

    create_dir_if_not_exists(model_conf['work_dir'])
    # Load the best saved model.
    with open(os.path.join(model_conf['work_dir'], 'model.pt'), 'rb') as f:
        model = torch.load(f)
    para_model = model.to(device)

    # Run on test data.
    test_loss = evaluate(te_iter)
    logging('=' * 100)
    logging('| End of training | test loss {:5.2f} | test ppl {:9.3f}'.format(
        test_loss, math.exp(test_loss)))
    logging('=' * 100)