Пример #1
0
def train():
    args = get_train_args()
    current_device = torch.device("cuda" if args.with_cuda else "cpu")
    criterion = nn.CrossEntropyLoss(ignore_index=0, reduction='sum')
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    if args.use_logging:
        log_filename = str(dt.datetime.now(tz=TIMEZONE))
        logging.basicConfig(level=logging.INFO, format='%(message)s')
        logger = logging.getLogger()
        logger.addHandler(logging.FileHandler(log_filename + '.log', 'a'))
        print = logger.info

    model = pretrained_model(args.pretrained_model, args.num_epochs,
                             args.batch_size, args.max_sentence_length,
                             current_device, args.save_dir, args.patience)
    train_dataset = model.tokenize_data('./train_lyrics.jsonl',
                                        args.max_sentence_length)
    valid_dataset = model.tokenize_data('./valid_lyrics.jsonl',
                                        args.max_sentence_length, 'valid')
    optimizer = AdamW(model.model.parameters(),
                      lr=args.learning_rate,
                      eps=args.eps,
                      weight_decay=args.weight_decay)
    model.train(train_dataset, valid_dataset, optimizer, args.step_size,
                args.gamma)
Пример #2
0
def main():
    global logs_path

    args = get_train_args()

    train_id = args.train_id
    num_processes = args.num_processes
    num_timesteps = args.timesteps
    game = args.game
    level = args.level
    model_save_path = args.save_dir + train_id + ".pkl"
    logs_path = os.path.join(
        args.logs_dir, check_subfolder_availability(args.logs_dir, train_id))
    is_joint = args.joint
    load_model_path = args.load_model
    algo_name = args.algo
    policy_name = args.policy

    print(
        "\n\n===============================================================")
    print("Num processes:\t\t", num_processes)
    print("Train timesteps:\t", num_timesteps)
    print("Model save path:\t", model_save_path)
    print("Logs path:\t\t", logs_path)
    if not is_joint:
        print("Game:\t\t\t", game)
        print("Level:\t\t\t", level)
    else:
        print("Joint Training")
    if load_model_path:
        print("Loading model:\t\t", load_model_path)
    else:
        print("Creating new model")
    print(
        "===============================================================\n\n")

    train(
        train_id=train_id,
        game=game,
        level=level,
        num_processes=num_processes,
        num_timesteps=num_timesteps,
        algo_name=algo_name,
        policy_name=policy_name,
        is_joint=is_joint,
        model_save_path=model_save_path,
        load_model_path=load_model_path,
        logs_path=logs_path,
        hyper_opt=args.hyper_opt,
        short_life=args.short_life,
    )
Пример #3
0
            preds, _ = util.convert_tokens(gold_dict,
                                           ids.tolist(),
                                           starts.tolist(),
                                           ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg),
                    ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict


if __name__ == '__main__':
    args = get_train_args()
    if args.model_name == 'BiDAF':
        print('BiDAF Model')
        main(args)
    else:
        print('QANet Model')
        train_QaNet(args)

Пример #4
0
                                 save_best_only=True,
                                 mode='auto')
    return [checkpoint]


def build_model(args):
    if args.model == "VGG19":
        model = VGG19(num_classes=7, input_shape=(48, 48, 3), dropout=0.5)

    else:
        model = build_resnet(args.model, input_shape=(48, 48, 3), classes=7)
    return model


if __name__ == "__main__":
    args_ = get_train_args()
    # Load data
    X_train, y_train, X_dev, y_dev, X_test, y_test, weight = data_loader(args_)

    # Optimizer, image augmentation and add class weight
    sgd = optimizer(args_)
    train_generator = train_generator()
    class_weights = class_weights(weight)
    checkpoint = callback(args_)
    model = build_model(args_)

    model.compile(optimizer=sgd,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit_generator(train_generator.flow(X_train, y_train,
                                             args_.batch_size),
Пример #5
0
def main():
    opts = get_train_args()
    print("load data ...")
    data = ELMoDataset(opts)
    dataloader = DataLoader(data, shuffle=True, batch_size=opts.batch_size)
    valid_data = ELMoDataset(opts, split='validation')
    validloader = DataLoader(valid_data,
                             shuffle=True,
                             batch_size=opts.batch_size)
    print("load model ...")
    model = ELMo(opts, [data.word_vocab_size, data.char_size])
    optimizer = optim.Adam(model.parameters(), lr=opts.learning_rate)
    early_stopping = EarlyStopping(5, 0.0)
    if opts.multi == True:
        model = torch.nn.DataParallel(model)
    if opts.resume == True:
        print("resume training")
        model.load_state_dict(torch.load('model.pt'))
    model.cuda()
    loss = torch.nn.CrossEntropyLoss(ignore_index=0)
    train_batch_num = math.ceil(data.data_size / opts.batch_size)
    valid_batch_num = math.ceil(valid_data.data_size / opts.batch_size)

    print("start training")
    for epoch in range(1, opts.epochs + 1):
        print("epoch : " + str(epoch))
        model.train()
        epoch_start = time.time()
        epoch_loss = 0
        tot = 0
        for i, batch_data in enumerate(dataloader):
            optimizer.zero_grad()
            word_idx, char_idx = batch_data
            pred = model(word_idx, char_idx)
            train_loss = loss(pred, word_idx[:, 1:].reshape(-1))
            train_loss.backward()
            optimizer.step()
            batch_loss = train_loss.item()
            tot += word_idx.size(0)
            print('\r{:>10} epoch {} progress {} loss: {} perplexity : {}\n'.
                  format('', epoch, tot / data.__len__(), batch_loss,
                         2**batch_loss),
                  end='')
            epoch_loss += batch_loss
        end = time.time()
        time_used = end - epoch_start
        print('one epoch time: {} minutes'.format(time_used / 60))
        print('{} epochs'.format(epoch))
        print('epoch {} loss : {} perplexity : {}'.format(
            epoch, epoch_loss / train_batch_num,
            2**(epoch_loss / train_batch_num)))

        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for i, batch_data in enumerate(validloader):
                word_idx, char_idx = batch_data
                pred = model(word_idx, char_idx)
                batch_loss = loss(pred, word_idx[:, 1:].reshape(-1))
                valid_loss += batch_loss.item()

        print('valid loss : {} preplexity : {}'.format(
            valid_loss / valid_batch_num, 2**(valid_loss / valid_batch_num)))

        with open('log.txt', 'a') as f:
            f.write(
                str(epoch) + ' epoch :' + str(epoch_loss / train_batch_num) +
                ' ' + str(2**(epoch_loss / train_batch_num)) + '\n')
            f.write(
                str(epoch) + ' valid :' + str(valid_loss / valid_batch_num) +
                ' ' + str(2**(valid_loss / valid_batch_num)) + '\n')

        # check early stopping
        if early_stopping(valid_loss):
            print("[Training is early stopped in %d Epoch.]" % epoch)
            if not os.path.exists(opts.model_path):
                os.mkdir(opts.model_path)
            state_dict = model.state_dict()
            torch.save(state_dict,
                       os.path.abspath(opts.model_path + '/model.pt'))
            print("[Saved the trained model successfully.]")
            break

        if epoch % opts.save_step == 0:
            print("save model...")
            torch.save(model.state_dict(), 'model.pt')
Пример #6
0
def train():
    """
    Main script for training.
    """
    args, train_config = get_train_args()

    num_classes = args.num_classes

    # Communicator and Context
    from nnabla.ext_utils import get_extension_context
    extension_module = "cudnn"  # TODO: Hard coded!!!
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)
    # To utilize TensorCore in FP16
    channels = 4 if args.type_config == 'half' else 3

    from nnabla_ext.cuda import StreamEventHandler
    stream_event_handler = StreamEventHandler(int(comm.ctx.device_id))

    # Create data iterater
    data, vdata = get_data_iterators(args, comm, channels)

    # Create mixup object
    mixup = create_mixup_or_none(train_config.mixup, num_classes, comm)

    # Network for training
    t_model = get_model(args,
                        num_classes,
                        test=False,
                        channel_last=args.channel_last,
                        mixup=mixup,
                        channels=channels,
                        label_smoothing=train_config.label_smoothing,
                        ctx_for_loss=comm.ctx_float)

    # Network for validation
    v_model = get_model(args,
                        num_classes,
                        test=True,
                        channel_last=args.channel_last,
                        channels=channels)

    # Solver
    # lr will be set later
    solver = MomentumNoWeightDecayBn(1, train_config.momentum)
    solver.set_parameters(nn.get_parameters())

    # Learning rate scheduler
    learning_rate_scheduler = create_learning_rate_scheduler(train_config)

    # Monitors
    monitor = None
    if comm.rank == 0:
        if not os.path.isdir(args.monitor_path):
            os.makedirs(args.monitor_path)
        monitor = M.Monitor(args.monitor_path)

    # Epoch runner
    loss_scaling = train_config.loss_scaling if args.type_config == 'half' else 1
    train_epoch = EpochTrainer(t_model, solver, learning_rate_scheduler, data,
                               comm, monitor, loss_scaling,
                               train_config.weight_decay, stream_event_handler,
                               mixup)
    val_epoch = None
    if args.val_interval > 0:
        val_epoch = EpochValidator(v_model, vdata, comm, monitor,
                                   stream_event_handler)

    # Epoch loop
    for epoch in range(train_config.epochs):
        # Save parameters
        if epoch > 0 and epoch % (
                args.model_save_interval) == 0 and comm.rank == 0:
            nn.save_parameters(
                os.path.join(args.monitor_path, 'param_%03d.h5' % epoch))

        # Run validation for examples in an epoch
        if val_epoch is not None \
           and epoch > 0 \
           and epoch % args.val_interval == 0:
            val_epoch.run(epoch)

        # Run training for examples in an epoch
        train_epoch.run(epoch)

    # Run final validation
    if val_epoch is not None:
        val_epoch.run(train_config.epochs)

    # Save the final model.
    if comm.rank == 0:
        nn.save_parameters(
            os.path.join(args.monitor_path,
                         'param_%03d.h5' % (train_config.epochs)))
Пример #7
0
def main():
    opts = get_train_args()
    print("load data ...")
    train_data = datasets.ImageFolder(
        root="data/train",
        transform=transforms.Compose([
            transforms.Resize((256, 256)),  # 한 축을 128로 조절하고
            #transforms.CenterCrop(256),  # square를 한 후,
            transforms.ToTensor(),  # Tensor로 바꾸고 (0~1로 자동으로 normalize)
            transforms.Normalize(
                (0.5, 0.5, 0.5),  # -1 ~ 1 사이로 normalize
                (0.5, 0.5, 0.5)),  # (c - m)/s 니까...
        ]))

    test_data = datasets.ImageFolder(
        root="data/test",
        transform=transforms.Compose([
            transforms.Resize((256, 256)),  # 한 축을 128로 조절하고
            #transforms.CenterCrop(256),  # square를 한 후,
            transforms.ToTensor(),  # Tensor로 바꾸고 (0~1로 자동으로 normalize)
            transforms.Normalize(
                (0.5, 0.5, 0.5),  # -1 ~ 1 사이로 normalize
                (0.5, 0.5, 0.5)),  # (c - m)/s 니까...
        ]))
    test_loader = DataLoader(test_data,
                             batch_size=opts.batch_size,
                             shuffle=False,
                             num_workers=opts.num_processes)

    classes = train_data.classes
    print(classes)

    print("load model ...")
    if opts.model == 'resnet':
        model = models.resnet50(progress=True)
        model.load_state_dict(torch.load('resnet_model.pt'))
    elif opts.model == 'vggnet':
        model = models.vgg13_bn(progress=True)
        model.load_state_dict(torch.load('vggnet_model.pt'))
    elif opts.model == 'googlenet':
        model = models.googlenet(progress=True)
        model.load_state_dict(torch.load('googlenet_model.pt'))
    elif opts.model == 'densenet':
        model = models.densenet121(progress=True)
        model.load_state_dict(torch.load('densenet_model.pt'))
    else:
        model = models.resnext50_32x4d(progress=True)
        model.load_state_dict(torch.load('resnext_model.pt'))
    print(opts.model)
    model.cuda()

    print("start inference")

    idx = 0
    with torch.no_grad():
        with open(opts.model + '_result.txt', 'a') as f:
            for i, (inputs, labels) in enumerate(test_loader):
                inputs = inputs.cuda()
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                for j, meta in enumerate(predicted):
                    predicted_class = classes[meta]
                    plant_class = predicted_class.split('_')[0]
                    disease_class = predicted_class.split('_')[1]
                    f.write(
                        str(test_loader.dataset.samples[idx][0].split('/')
                            [-1].split('.')[0]) + '\t' + plant_class + '\t' +
                        disease_class + '\n')
                    idx += 1
Пример #8
0
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict


if __name__ == '__main__':
    parser = argparse.ArgumentParser('Test a trained model on SQuAD')
    parser.add_argument(
        '--use_adv',
        default="no",
        help='Whether or not to test/train on adversarial dataset.')
    args, unknown = parser.parse_known_args()
    use_adv = True if (args.use_adv == 'yes') else False

    main(get_train_args(use_adv))
Пример #9
0
def main():
    args, log = get_train_args()
    log.info('[Program starts. Loading data...]')
    train, dev, dev_y, embedding, opt = load_data(vars(args))
    device, args.gpu_ids = util.get_available_devices()
    log.info('[Data loaded.]')
    if args.save_dawn_logs:
        dawn_start = datetime.now()
        log.info('dawn_entry: epoch\tf1Score\thours')
    
    model = DRQA(opt, embedding = embedding)
    model = nn.DataParallel(model, args.gpu_ids)
    model = model.to(device)
    
    epoch_0 = 0
    best_val_score = 0.0
    # ema = util.EMA(model, args.ema_decay)
    
    ## get optimizer and scheduler
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adamax(parameters, weight_decay = opt['weight_decay'])
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)
    train_loss = AverageMeter()

    for epoch in range(epoch_0, epoch_0 + args.epochs):
        log.warning('Epoch {}'.format(epoch))
        # train
        batches = BatchGen(train, batch_size=args.batch_size, gpu=args.cuda)
        start = datetime.now()
        updates = 0
        model.train()
        for i, batch in enumerate(batches):
            # Transfer to GPU
            inputs = [e.to(device) for e in batch[:7]]
            target_s = batch[7].to(device)
            target_e = batch[8].to(device)
            optimizer.zero_grad()
            
            # Forward
            score_s,score_e = model(*inputs)
            loss = F.nll_loss(score_s, target_s) + F.nll_loss(score_e, target_e)
            train_loss.update(loss.item())
            
            # Backward
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),opt['grad_clipping'])
            optimizer.step()
            updates +=1
            
            # Clip gradients

            if i % args.log_per_updates == 0:
                log.info('> epoch [{0:2}] updates[{1:6}] train loss[{2:.5f}] remaining[{3}]'.format(
                    epoch,updates, train_loss.value,
                    str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0]))
        log.debug('\n')
        # eval
        batches = BatchGen(dev, batch_size=args.batch_size, evaluation=True, gpu=args.cuda)
        predictions = []
        for i, batch in enumerate(batches):
            model.eval()
            inputs = [e.to(device) for e in batch[:7]]
            
            # Run forward
            with torch.no_grad():
                score_s, score_e = model(*inputs)
                
            # Get argmax test spans
            text = batch[-2]
            spans = batch[-1]
            pred = []
            max_len = opt['max_len'] or score_s.size(1)
            for i in range(score_s.size(0)):
                scores = torch.ger(score_s[i], score_e[i])
                scores.triu_().tril_(max_len - 1)
                scores = scores.cpu().clone().numpy()
                s_idx, e_idx = np.unravel_index(np.argmax(scores), scores.shape)
                s_offset, e_offset = spans[i][s_idx][0], spans[i][e_idx][1]
                pred.append(text[i][s_offset:e_offset])
            predictions.extend(pred)
            log.debug('> evaluating [{}/{}]'.format(i, len(batches)))
        em, f1 = score(predictions, dev_y)
        log.warning("dev EM: {} F1: {}".format(em, f1))
        if args.save_dawn_logs:
            time_diff = datetime.now() - dawn_start
            log.warning("dawn_entry: {}\t{}\t{}".format(epoch, f1/100.0, float(time_diff.total_seconds() / 3600.0)))
        # save
        if not args.save_last_only or epoch == epoch_0 + args.epochs - 1:
            model_file = os.path.join(args.model_dir, 'checkpoint_epoch_{}.pt'.format(epoch))
            params = {
                'state_dict': {
                    'network': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'updates': updates,
                    'loss': train_loss.state_dict()
                },
                'config': opt,
                'epoch': epoch,
                'em': em,
                'f1': f1,
                'best_eval': best_val_score,
                'random_state': random.getstate(),
                'torch_state': torch.random.get_rng_state(),
                #'torch_cuda_state': torch.cuda.get_rng_state()
            }
            torch.save(params, model_file)
            log.info('model saved to {}'.format(model_file))
            if f1 > best_val_score:
                best_val_score = f1
                copyfile(
                    model_file,
                    os.path.join(args.model_dir, 'best_model.pt'))
                log.info('[new best model saved.]')
Пример #10
0
def train():
    """
    Main script for training.
    """
    args, train_config = get_train_args()

    num_classes = args.num_classes

    # Communicator and Context
    from nnabla.ext_utils import get_extension_context
    extension_module = "cudnn"  # TODO: Hard coded!!!
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)
    # To utilize TensorCore in FP16
    channels = 4 if args.type_config == 'half' else 3

    from nnabla_ext.cuda import StreamEventHandler
    stream_event_handler = StreamEventHandler(int(comm.ctx.device_id))

    # Create data iterater
    data, vdata = get_data_iterators(args, comm, channels, args.spatial_size)

    # Create mixup object
    mixup = create_mixup_or_none(train_config.mixup, num_classes, comm)

    # Load model for fine-tuning
    if args.finetune:
        assert args.model_load_path is not None, "`--model-load-path` must be set in finetuning mode."
        if comm.rank == 0:
            logger.info(f'Loading parameter file `{args.model_load_path}.`')
            logger.info(
                "NOTE: It doesn't verify the compatibility between the parameter file and the architecture you choose."
            )
        nn.load_parameters(args.model_load_path)
        # String assumption that the last two paramters is the classification layer.
        param_keys = list(nn.get_parameters().keys())
        bkey = param_keys[-1]
        wkey = param_keys[-2]
        if comm.rank == 0:
            logger.info(
                f'Removing the last two parameter for fine tuning under an assumption that those correspond to the final affine layer parameters; `{wkey}` and `{bkey}`.'
            )
        nn.parameter.pop_parameter(wkey)
        nn.parameter.pop_parameter(bkey)

    # Network for training
    t_model = get_model(args,
                        num_classes,
                        test=False,
                        channel_last=args.channel_last,
                        mixup=mixup,
                        channels=channels,
                        spatial_size=args.spatial_size,
                        label_smoothing=train_config.label_smoothing,
                        ctx_for_loss=comm.ctx_float)

    # Network for validation
    v_model = get_model(args,
                        num_classes,
                        test=True,
                        channel_last=args.channel_last,
                        spatial_size=args.spatial_size,
                        channels=channels)

    # Solver
    # lr will be set later
    solver = MomentumNoWeightDecayBn(1, train_config.momentum)
    solver.set_parameters(nn.get_parameters())

    # Learning rate scheduler
    learning_rate_scheduler = create_learning_rate_scheduler(train_config)

    # Monitors
    monitor = None
    if comm.rank == 0:
        if not os.path.isdir(args.monitor_path):
            os.makedirs(args.monitor_path)
        monitor = M.Monitor(args.monitor_path)
        save_args(args, train_config)

    # Epoch runner
    loss_scaling = train_config.loss_scaling if args.type_config == 'half' else 1
    train_epoch = EpochTrainer(t_model, solver, learning_rate_scheduler, data,
                               comm, monitor, loss_scaling,
                               train_config.weight_decay, stream_event_handler,
                               mixup)
    val_epoch = None
    if args.val_interval > 0:
        val_epoch = EpochValidator(v_model, vdata, comm, monitor,
                                   stream_event_handler)

    # Epoch loop
    for epoch in range(train_config.epochs):
        # Save parameters
        if epoch > 0 and epoch % (
                args.model_save_interval) == 0 and comm.rank == 0:
            nn.save_parameters(
                os.path.join(args.monitor_path, 'param_%03d.h5' % epoch))

        # Run validation for examples in an epoch
        if val_epoch is not None \
           and epoch > 0 \
           and epoch % args.val_interval == 0:
            val_epoch.run(epoch)

        # Run training for examples in an epoch
        train_epoch.run(epoch)

    # Run final validation
    if val_epoch is not None:
        val_epoch.run(train_config.epochs)

    # Save the final model.
    if comm.rank == 0:
        nn.save_parameters(
            os.path.join(args.monitor_path,
                         'param_%03d.h5' % (train_config.epochs)))
Пример #11
0
def train():
    # Check NNabla version
    if utils.get_nnabla_version_integer() < 11900:
        raise ValueError(
            'Please update the nnabla version to v1.19.0 or latest version since memory efficiency of core engine is improved in v1.19.0'
        )

    parser, args = get_train_args()

    # Get context.
    ctx = get_extension_context(args.context, device_id=args.device_id)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)
    ext = import_extension_module(args.context)

    # Monitors
    # setting up monitors for logging
    monitor_path = args.output
    monitor = Monitor(monitor_path)

    monitor_best_epoch = MonitorSeries('Best epoch', monitor, interval=1)
    monitor_traing_loss = MonitorSeries('Training loss', monitor, interval=1)
    monitor_validation_loss = MonitorSeries('Validation loss',
                                            monitor,
                                            interval=1)
    monitor_lr = MonitorSeries('learning rate', monitor, interval=1)
    monitor_time = MonitorTimeElapsed("training time per iteration",
                                      monitor,
                                      interval=1)

    if comm.rank == 0:
        if not os.path.isdir(args.output):
            os.makedirs(args.output)

    # Initialize DataIterator for MUSDB18.
    train_source, valid_source, args = load_datasources(parser, args)

    train_iter = data_iterator(
        train_source,
        args.batch_size,
        RandomState(args.seed),
        with_memory_cache=False,
    )

    valid_iter = data_iterator(
        valid_source,
        1,
        RandomState(args.seed),
        with_memory_cache=False,
    )

    if comm.n_procs > 1:
        train_iter = train_iter.slice(rng=None,
                                      num_of_slices=comm.n_procs,
                                      slice_pos=comm.rank)

        valid_iter = valid_iter.slice(rng=None,
                                      num_of_slices=comm.n_procs,
                                      slice_pos=comm.rank)

    # Calculate maxiter per GPU device.
    # Change max_iter, learning_rate and weight_decay according no. of gpu devices for multi-gpu training.
    default_batch_size = 16
    train_scale_factor = (comm.n_procs * args.batch_size) / default_batch_size
    max_iter = int((train_source._size // args.batch_size) // comm.n_procs)
    weight_decay = args.weight_decay * train_scale_factor
    args.lr = args.lr * train_scale_factor

    # Calculate the statistics (mean and variance) of the dataset
    scaler_mean, scaler_std = utils.get_statistics(args, train_source)

    # clear cache memory
    ext.clear_memory_cache()

    max_bin = utils.bandwidth_to_max_bin(train_source.sample_rate, args.nfft,
                                         args.bandwidth)

    # Get X-UMX/UMX computation graph and variables as namedtuple
    model = get_model(args, scaler_mean, scaler_std, max_bin=max_bin)

    # Create Solver and set parameters.
    solver = S.Adam(args.lr)
    solver.set_parameters(nn.get_parameters())

    # Initialize Early Stopping
    es = utils.EarlyStopping(patience=args.patience)

    # Initialize LR Scheduler (ReduceLROnPlateau)
    lr_scheduler = ReduceLROnPlateau(lr=args.lr,
                                     factor=args.lr_decay_gamma,
                                     patience=args.lr_decay_patience)
    best_epoch = 0

    # AverageMeter for mean loss calculation over the epoch
    losses = utils.AverageMeter()

    # Training loop.
    for epoch in trange(args.epochs):
        # TRAINING
        losses.reset()
        for batch in range(max_iter):
            model.mixture_audio.d, model.target_audio.d = train_iter.next()
            solver.zero_grad()
            model.loss.forward(clear_no_need_grad=True)
            if comm.n_procs > 1:
                all_reduce_callback = comm.get_all_reduce_callback()
                model.loss.backward(clear_buffer=True,
                                    communicator_callbacks=all_reduce_callback)
            else:
                model.loss.backward(clear_buffer=True)
            solver.weight_decay(weight_decay)
            solver.update()
            losses.update(model.loss.d.copy(), args.batch_size)
        training_loss = losses.get_avg()

        # clear cache memory
        ext.clear_memory_cache()

        # VALIDATION
        losses.reset()
        for batch in range(int(valid_source._size // comm.n_procs)):
            x, y = valid_iter.next()
            dur = int(valid_source.sample_rate * args.valid_dur)
            sp, cnt = 0, 0
            loss_tmp = nn.NdArray()
            loss_tmp.zero()
            while 1:
                model.vmixture_audio.d = x[Ellipsis, sp:sp + dur]
                model.vtarget_audio.d = y[Ellipsis, sp:sp + dur]
                model.vloss.forward(clear_no_need_grad=True)
                cnt += 1
                sp += dur
                loss_tmp += model.vloss.data
                if x[Ellipsis,
                     sp:sp + dur].shape[-1] < dur or x.shape[-1] == cnt * dur:
                    break
            loss_tmp = loss_tmp / cnt
            if comm.n_procs > 1:
                comm.all_reduce(loss_tmp, division=True, inplace=True)
            losses.update(loss_tmp.data.copy(), 1)
        validation_loss = losses.get_avg()

        # clear cache memory
        ext.clear_memory_cache()

        lr = lr_scheduler.update_lr(validation_loss, epoch=epoch)
        solver.set_learning_rate(lr)
        stop = es.step(validation_loss)

        if comm.rank == 0:
            monitor_best_epoch.add(epoch, best_epoch)
            monitor_traing_loss.add(epoch, training_loss)
            monitor_validation_loss.add(epoch, validation_loss)
            monitor_lr.add(epoch, lr)
            monitor_time.add(epoch)

            if validation_loss == es.best:
                best_epoch = epoch
                # save best model
                if args.umx_train:
                    nn.save_parameters(os.path.join(args.output,
                                                    'best_umx.h5'))
                else:
                    nn.save_parameters(
                        os.path.join(args.output, 'best_xumx.h5'))

        if args.umx_train:
            # Early stopping for UMX after `args.patience` (140) number of epochs
            if stop:
                print("Apply Early Stopping")
                break
Пример #12
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from args import get_train_args

config = get_train_args()
D = config.connector_dim
Nh = config.num_heads
Dword = config.glove_dim
Dchar = config.char_dim
batch_size = config.batch_size
dropout = config.dropout
dropout_char = config.dropout_char

Lc = config.para_limit
Lq = config.ques_limit


def mask_logits(inputs, mask):
    mask = mask.type(torch.float32)
    return inputs + (-1e30) * (1 - mask)


class Initialized_Conv1d(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=1,
                 relu=False,
                 stride=1,
Пример #13
0
def train():
    # Check NNabla version
    if get_nnabla_version_integer() < 11900:
        raise ValueError(
            'Please update the nnabla version to v1.19.0 or latest version since memory efficiency of core engine is improved in v1.19.0'
        )

    parser, args = get_train_args()

    # Get context.
    ctx = get_extension_context(args.context, device_id=args.device_id)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)
    ext = import_extension_module(args.context)

    # Monitors
    # setting up monitors for logging
    monitor_path = os.path.join(args.output, args.target)
    monitor = Monitor(monitor_path)

    monitor_traing_loss = MonitorSeries('Training loss', monitor, interval=1)
    monitor_lr = MonitorSeries('learning rate', monitor, interval=1)
    monitor_time = MonitorTimeElapsed("training time per epoch",
                                      monitor,
                                      interval=1)

    if comm.rank == 0:
        if not os.path.isdir(args.output):
            os.makedirs(args.output)

    # Initialize DataIterator for MUSDB.
    train_source, args = load_datasources(parser, args)

    train_iter = data_iterator(train_source,
                               args.batch_size,
                               RandomState(args.seed),
                               with_memory_cache=False)

    if comm.n_procs > 1:
        train_iter = train_iter.slice(rng=None,
                                      num_of_slices=comm.n_procs,
                                      slice_pos=comm.rank)

    # Change max_iter, learning_rate and weight_decay according no. of gpu devices for multi-gpu training.
    default_batch_size = 6
    train_scale_factor = (comm.n_procs * args.batch_size) / default_batch_size

    max_iter = int(train_source._size // (comm.n_procs * args.batch_size))
    weight_decay = args.weight_decay * train_scale_factor
    args.lr = args.lr * train_scale_factor

    print(f"max_iter per GPU-device:{max_iter}")

    # Calculate the statistics (mean and variance) of the dataset
    scaler_mean, scaler_std = get_statistics(args, train_source)

    # clear cache memory
    ext.clear_memory_cache()

    # Create input variables.
    mixture_audio = nn.Variable([args.batch_size] +
                                list(train_source._get_data(0)[0].shape))
    target_audio = nn.Variable([args.batch_size] +
                               list(train_source._get_data(0)[1].shape))

    with open(f"./configs/{args.target}.yaml") as file:
        # Load target specific Hyper parameters
        hparams = yaml.load(file, Loader=yaml.FullLoader)

    # create training graph
    mix_spec = spectogram(*stft(mixture_audio,
                                n_fft=hparams['fft_size'],
                                n_hop=hparams['hop_size'],
                                patch_length=256),
                          mono=(hparams['n_channels'] == 1))
    target_spec = spectogram(*stft(target_audio,
                                   n_fft=hparams['fft_size'],
                                   n_hop=hparams['hop_size'],
                                   patch_length=256),
                             mono=(hparams['n_channels'] == 1))

    with nn.parameter_scope(args.target):
        d3net = D3NetMSS(hparams,
                         comm=comm.comm,
                         input_mean=scaler_mean,
                         input_scale=scaler_std,
                         init_method='xavier')
        pred_spec = d3net(mix_spec)

    loss = F.mean(F.squared_error(pred_spec, target_spec))
    loss.persistent = True

    # Create Solver and set parameters.
    solver = S.Adam(args.lr)
    solver.set_parameters(nn.get_parameters())

    # Initialize LR Scheduler (AnnealingScheduler)
    lr_scheduler = AnnealingScheduler(init_lr=args.lr,
                                      anneal_steps=[40],
                                      anneal_factor=0.1)

    # AverageMeter for mean loss calculation over the epoch
    losses = AverageMeter()

    for epoch in range(args.epochs):
        # TRAINING
        losses.reset()
        for batch in range(max_iter):
            mixture_audio.d, target_audio.d = train_iter.next()
            solver.zero_grad()
            loss.forward(clear_no_need_grad=True)
            if comm.n_procs > 1:
                all_reduce_callback = comm.get_all_reduce_callback()
                loss.backward(clear_buffer=True,
                              communicator_callbacks=all_reduce_callback)
            else:
                loss.backward(clear_buffer=True)
            solver.weight_decay(weight_decay)
            solver.update()
            losses.update(loss.d.copy(), args.batch_size)
        training_loss = losses.get_avg()

        # clear cache memory
        ext.clear_memory_cache()

        lr = lr_scheduler.get_learning_rate(epoch)
        solver.set_learning_rate(lr)

        if comm.rank == 0:
            monitor_traing_loss.add(epoch, training_loss)
            monitor_lr.add(epoch, lr)
            monitor_time.add(epoch)

            # save intermediate weights
            nn.save_parameters(f"{os.path.join(args.output, args.target)}.h5")

    if comm.rank == 0:
        # save final weights
        nn.save_parameters(
            f"{os.path.join(args.output, args.target)}_final.h5")
Пример #14
0
def main():
    opts = get_train_args()
    print("load data ...")
    data = DataSet('data/modified_triples.txt')
    dataloader = DataLoader(data, shuffle=True, batch_size=opts.batch_size)
    print("load model ...")
    if opts.model_type == 'transe':
        model = TransE(opts, data.ent_tot, data.rel_tot)
    elif opts.model_type == "distmult":
        model = DistMult(opts, data.ent_tot, data.rel_tot)
    if opts.optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=opts.lr)
    elif opts.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=opts.lr)
    model.cuda()
    model.relation_normalize()
    loss = torch.nn.MarginRankingLoss(margin=opts.margin)

    print("start training")
    for epoch in range(1, opts.epochs + 1):
        print("epoch : " + str(epoch))
        model.train()
        epoch_start = time.time()
        epoch_loss = 0
        tot = 0
        cnt = 0
        for i, batch_data in enumerate(dataloader):
            optimizer.zero_grad()
            batch_h, batch_r, batch_t, batch_n = batch_data
            batch_h = torch.LongTensor(batch_h).cuda()
            batch_r = torch.LongTensor(batch_r).cuda()
            batch_t = torch.LongTensor(batch_t).cuda()
            batch_n = torch.LongTensor(batch_n).cuda()
            pos_score, neg_score, dist = model.forward(batch_h, batch_r,
                                                       batch_t, batch_n)
            pos_score = pos_score.cpu()
            neg_score = neg_score.cpu()
            dist = dist.cpu()
            train_loss = loss(pos_score, neg_score,
                              torch.ones(pos_score.size(-1))) + dist
            train_loss.backward()
            optimizer.step()
            batch_loss = torch.sum(train_loss)
            epoch_loss += batch_loss
            batch_size = batch_h.size(0)
            tot += batch_size
            cnt += 1
            print('\r{:>10} epoch {} progress {} loss: {}\n'.format(
                '', epoch, tot / data.__len__(), train_loss),
                  end='')
        end = time.time()
        time_used = end - epoch_start
        epoch_loss /= cnt
        print('one epoch time: {} minutes'.format(time_used / 60))
        print('{} epochs'.format(epoch))
        print('epoch {} loss: {}'.format(epoch, epoch_loss))

        if epoch % opts.save_step == 0:
            print("save model...")
            model.entity_normalize()
            torch.save(model.state_dict(), 'model.pt')

    print("save model...")
    model.entity_normalize()
    torch.save(model.state_dict(), 'model.pt')
    print("[Saving embeddings of whole entities & relations...]")
    save_embeddings(model, opts, data.id2ent, data.id2rel)
    print("[Embedding results are saved successfully.]")
Пример #15
0
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)






if __name__ == '__main__':
    main(get_train_args())




Пример #16
0
def train():
    # Check NNabla version
    if utils.get_nnabla_version_integer() < 11900:
        raise ValueError(
            'Please update the nnabla version to v1.19.0 or latest version since memory efficiency of core engine is improved in v1.19.0'
        )

    parser, args = get_train_args()

    # Get context.
    ctx = get_extension_context(args.context, device_id=args.device_id)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)
    ext = import_extension_module(args.context)

    # Monitors
    # setting up monitors for logging
    monitor_path = args.output
    monitor = Monitor(monitor_path)

    monitor_best_epoch = MonitorSeries('Best epoch', monitor, interval=1)
    monitor_traing_loss = MonitorSeries('Training loss', monitor, interval=1)
    monitor_validation_loss = MonitorSeries('Validation loss',
                                            monitor,
                                            interval=1)
    monitor_lr = MonitorSeries('learning rate', monitor, interval=1)
    monitor_time = MonitorTimeElapsed("training time per iteration",
                                      monitor,
                                      interval=1)

    if comm.rank == 0:
        print("Mixing coef. is {}, i.e., MDL = {}*TD-Loss + FD-Loss".format(
            args.mcoef, args.mcoef))
        if not os.path.isdir(args.output):
            os.makedirs(args.output)

    # Initialize DataIterator for MUSDB.
    train_source, valid_source, args = load_datasources(parser, args)

    train_iter = data_iterator(train_source,
                               args.batch_size,
                               RandomState(args.seed),
                               with_memory_cache=False,
                               with_file_cache=False)

    valid_iter = data_iterator(valid_source,
                               1,
                               RandomState(args.seed),
                               with_memory_cache=False,
                               with_file_cache=False)

    if comm.n_procs > 1:
        train_iter = train_iter.slice(rng=None,
                                      num_of_slices=comm.n_procs,
                                      slice_pos=comm.rank)

        valid_iter = valid_iter.slice(rng=None,
                                      num_of_slices=comm.n_procs,
                                      slice_pos=comm.rank)

    # Calculate maxiter per GPU device.
    max_iter = int((train_source._size // args.batch_size) // comm.n_procs)
    weight_decay = args.weight_decay * comm.n_procs

    print("max_iter", max_iter)

    # Calculate the statistics (mean and variance) of the dataset
    scaler_mean, scaler_std = utils.get_statistics(args, train_source)

    max_bin = utils.bandwidth_to_max_bin(train_source.sample_rate, args.nfft,
                                         args.bandwidth)

    unmix = OpenUnmix_CrossNet(input_mean=scaler_mean,
                               input_scale=scaler_std,
                               nb_channels=args.nb_channels,
                               hidden_size=args.hidden_size,
                               n_fft=args.nfft,
                               n_hop=args.nhop,
                               max_bin=max_bin)

    # Create input variables.
    mixture_audio = nn.Variable([args.batch_size] +
                                list(train_source._get_data(0)[0].shape))
    target_audio = nn.Variable([args.batch_size] +
                               list(train_source._get_data(0)[1].shape))

    vmixture_audio = nn.Variable(
        [1] + [2, valid_source.sample_rate * args.valid_dur])
    vtarget_audio = nn.Variable([1] +
                                [8, valid_source.sample_rate * args.valid_dur])

    # create training graph
    mix_spec, M_hat, pred = unmix(mixture_audio)
    Y = Spectrogram(*STFT(target_audio, n_fft=unmix.n_fft, n_hop=unmix.n_hop),
                    mono=(unmix.nb_channels == 1))
    loss_f = mse_loss(mix_spec, M_hat, Y)
    loss_t = sdr_loss(mixture_audio, pred, target_audio)
    loss = args.mcoef * loss_t + loss_f
    loss.persistent = True

    # Create Solver and set parameters.
    solver = S.Adam(args.lr)
    solver.set_parameters(nn.get_parameters())

    # create validation graph
    vmix_spec, vM_hat, vpred = unmix(vmixture_audio, test=True)
    vY = Spectrogram(*STFT(vtarget_audio, n_fft=unmix.n_fft,
                           n_hop=unmix.n_hop),
                     mono=(unmix.nb_channels == 1))
    vloss_f = mse_loss(vmix_spec, vM_hat, vY)
    vloss_t = sdr_loss(vmixture_audio, vpred, vtarget_audio)
    vloss = args.mcoef * vloss_t + vloss_f
    vloss.persistent = True

    # Initialize Early Stopping
    es = utils.EarlyStopping(patience=args.patience)

    # Initialize LR Scheduler (ReduceLROnPlateau)
    lr_scheduler = ReduceLROnPlateau(lr=args.lr,
                                     factor=args.lr_decay_gamma,
                                     patience=args.lr_decay_patience)
    best_epoch = 0

    # Training loop.
    for epoch in trange(args.epochs):
        # TRAINING
        losses = utils.AverageMeter()
        for batch in range(max_iter):
            mixture_audio.d, target_audio.d = train_iter.next()
            solver.zero_grad()
            loss.forward(clear_no_need_grad=True)
            if comm.n_procs > 1:
                all_reduce_callback = comm.get_all_reduce_callback()
                loss.backward(clear_buffer=True,
                              communicator_callbacks=all_reduce_callback)
            else:
                loss.backward(clear_buffer=True)
            solver.weight_decay(weight_decay)
            solver.update()
            losses.update(loss.d.copy(), args.batch_size)
        training_loss = losses.avg

        # clear cache memory
        ext.clear_memory_cache()

        # VALIDATION
        vlosses = utils.AverageMeter()
        for batch in range(int(valid_source._size // comm.n_procs)):
            x, y = valid_iter.next()
            dur = int(valid_source.sample_rate * args.valid_dur)
            sp, cnt = 0, 0
            loss_tmp = nn.NdArray()
            loss_tmp.zero()
            while 1:
                vmixture_audio.d = x[Ellipsis, sp:sp + dur]
                vtarget_audio.d = y[Ellipsis, sp:sp + dur]
                vloss.forward(clear_no_need_grad=True)
                cnt += 1
                sp += dur
                loss_tmp += vloss.data
                if x[Ellipsis,
                     sp:sp + dur].shape[-1] < dur or x.shape[-1] == cnt * dur:
                    break
            loss_tmp = loss_tmp / cnt
            if comm.n_procs > 1:
                comm.all_reduce(loss_tmp, division=True, inplace=True)
            vlosses.update(loss_tmp.data.copy(), 1)
        validation_loss = vlosses.avg

        # clear cache memory
        ext.clear_memory_cache()

        lr = lr_scheduler.update_lr(validation_loss, epoch=epoch)
        solver.set_learning_rate(lr)
        stop = es.step(validation_loss)

        if comm.rank == 0:
            monitor_best_epoch.add(epoch, best_epoch)
            monitor_traing_loss.add(epoch, training_loss)
            monitor_validation_loss.add(epoch, validation_loss)
            monitor_lr.add(epoch, lr)
            monitor_time.add(epoch)

            if validation_loss == es.best:
                # save best model
                nn.save_parameters(os.path.join(args.output, 'best_xumx.h5'))
                best_epoch = epoch

        if stop:
            print("Apply Early Stopping")
            break
Пример #17
0
def main():
    early_stopping = EarlyStopping(5, 0.0)
    opts = get_train_args()
    print("load data ...")
    train_data = datasets.ImageFolder(
        root="data/train",
        transform=transforms.Compose([
            transforms.Resize((256, 256)),  # 한 축을 128로 조절하고
            #transforms.CenterCrop(256),  # square를 한 후,
            transforms.ToTensor(),  # Tensor로 바꾸고 (0~1로 자동으로 normalize)
            transforms.Normalize(
                (0.5, 0.5, 0.5),  # -1 ~ 1 사이로 normalize
                (0.5, 0.5, 0.5)),  # (c - m)/s 니까...
        ]))

    valid_data = datasets.ImageFolder(
        root="data/val",
        transform=transforms.Compose([
            transforms.Resize((256, 256)),  # 한 축을 128로 조절하고
            #transforms.CenterCrop(128),  # square를 한 후,
            transforms.ToTensor(),  # Tensor로 바꾸고 (0~1로 자동으로 normalize)
            transforms.Normalize(
                (0.5, 0.5, 0.5),  # -1 ~ 1 사이로 normalize
                (0.5, 0.5, 0.5)),  # (c - m)/s 니까...
        ]))
    train_loader = DataLoader(train_data,
                              batch_size=opts.batch_size,
                              shuffle=True,
                              num_workers=opts.num_processes)

    valid_loader = DataLoader(valid_data,
                              batch_size=opts.batch_size,
                              shuffle=True,
                              num_workers=opts.num_processes)

    classes = train_data.classes
    print(classes)

    print("load model ...")
    if opts.model == 'resnet':
        model = models.resnet50(progress=True)
    elif opts.model == 'vggnet':
        model = models.vgg13_bn(progress=True)
    elif opts.model == 'googlenet':
        model = models.googlenet(progress=True)
    elif opts.model == 'densenet':
        model = models.densenet121(progress=True)
    else:
        model = models.resnext50_32x4d(progress=True)
    print(opts.model)
    optimizer = optim.Adam(model.parameters(), lr=opts.lr)
    model.cuda()
    loss = torch.nn.CrossEntropyLoss()
    batch_nums = np.round(14400 / opts.batch_size)
    valid_nums = np.round(1600 / opts.batch_size)

    print("start training")
    for epoch in range(1, opts.epochs + 1):
        print("epoch : " + str(epoch))
        model.train()
        epoch_loss = 0
        tot = 0
        cnt = 0
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            inputs, labels = inputs.cuda(), labels.cuda()
            train_loss = loss(model(inputs), labels)
            train_loss.backward()
            optimizer.step()
            batch_loss = train_loss.item()
            epoch_loss += batch_loss
            cnt += 1
            print('\r{:>10} epoch {} progress {} loss: {}\n'.format(
                '', epoch, tot / 14400, train_loss))

        with open(str(opts.model) + ' log.txt', 'a') as f:
            f.write(
                str(epoch) + ' loss : ' + str(epoch_loss / batch_nums) + '\n')
        model.eval()
        valid_loss = 0

        total = 0
        correct = 0
        with torch.no_grad():
            for i, (inputs, labels) in enumerate(valid_loader):
                inputs, labels = inputs.cuda(), labels.cuda()
                outputs = model(inputs)
                batch_loss = loss(outputs, labels)
                batch_loss = batch_loss.item()
                valid_loss += batch_loss
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

            acc = 100 * correct / total

        with open(str(opts.model) + ' log.txt', 'a') as f:
            f.write(
                str(epoch) + ' loss : ' + str(valid_loss / valid_nums) +
                ' acc : ' + str(acc) + '\n')

        # check early stopping
        if early_stopping(valid_loss):
            print("[Training is early stopped in %d Epoch.]" % epoch)
            torch.save(model.state_dict(), str(opts.model) + '_model.pt')
            print("[Saved the trained model successfully.]")
            break

        if epoch % opts.save_step == 0:
            print("save model...")
            torch.save(model.state_dict(), str(opts.model) + '_model.pt')

    print("save model...")
    torch.save(model.state_dict(), str(opts.model) + '_model.pt')