def train_fold(save_dir, train_folds, val_folds, folds_data):
    train_dataset = StackingDataset(folds_data, train_folds,
                                    get_transforms(True), DATASET_SIZE)
    val_dataset = StackingDataset(folds_data, val_folds, get_transforms(False))

    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE * 2,
                            shuffle=False,
                            num_workers=NUM_WORKERS)

    model = StackingModel(PARAMS)

    callbacks = [
        MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=1),
        ReduceLROnPlateau(monitor='val_lwlrap',
                          patience=RS_PARAMS['patience'],
                          factor=RS_PARAMS['factor'],
                          min_lr=1e-8),
        EarlyStopping(monitor='val_lwlrap', patience=30),
        LoggingToFile(save_dir / 'log.txt'),
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=700,
              callbacks=callbacks,
              metrics=['multi_accuracy', 'lwlrap'])
def train_fold(save_dir, train_folds, val_folds, model_path):
    depth_trns = SimpleDepthTransform()
    train_trns = SaltTransform(IMAGE_SIZE, True, 'crop')
    val_trns = SaltTransform(IMAGE_SIZE, False, 'crop')
    train_dataset = SaltDataset(TRAIN_FOLDS_PATH, train_folds, train_trns,
                                depth_trns)
    val_dataset = SaltDataset(TRAIN_FOLDS_PATH, val_folds, val_trns,
                              depth_trns)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=8)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            num_workers=8)

    model = load_model(model_path)
    model.loss.lovasz_weight = 0.5
    model.loss.prob_weight = 0.5

    callbacks = [
        MonitorCheckpoint(save_dir,
                          monitor='val_crop_iout',
                          max_saves=3,
                          copy_last=False),
        LoggingToFile(os.path.join(save_dir, 'log.txt')), update_lr
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=500,
              callbacks=callbacks,
              metrics=['crop_iout'])
示例#3
0
    def test_monitor_checkpoint(self, tmpdir, test_engine, optimizer_state):
        path = Path(tmpdir.join("path/to/monitor_checkpoints/"))
        checkpoint = MonitorCheckpoint(dir_path=path,
                                       max_saves=3,
                                       monitor='val_loss',
                                       optimizer_state=optimizer_state)
        checkpoint.attach(test_engine)
        checkpoint.start(test_engine.state)

        decreasing_seq = list(range(30))[::-1]
        for i in range(1, len(decreasing_seq), 2):
            decreasing_seq[i] = 100

        for epoch, val_loss in enumerate(decreasing_seq, 1):
            checkpoint_step_epoch(checkpoint, test_engine, epoch, val_loss)
            expected_path = path / f'model-{epoch:03d}-{val_loss:.6f}.pth'
            if val_loss != 100:
                assert check_checkpoint(path,
                                        test_engine,
                                        epoch,
                                        val_loss,
                                        optimizer_state=optimizer_state)
            else:
                assert not expected_path.exists()

        assert len(list(path.glob('*.pth'))) == 3
def train_fold(base_model_path, save_dir, train_folds, val_folds, folds_data,
               noisy_data):
    train_transfrom = get_transforms(train=True,
                                     size=CROP_SIZE,
                                     wrap_pad_prob=WRAP_PAD_PROB)

    mixer = RandomMixer([
        SigmoidConcatMixer(sigmoid_range=(3, 12)),
        AddMixer(alpha_dist='uniform')
    ],
                        p=[0.6, 0.4])
    mixer = UseMixerWithProb(mixer, prob=MIXER_PROB)

    curated_dataset = FreesoundDataset(folds_data,
                                       train_folds,
                                       transform=train_transfrom,
                                       mixer=mixer)
    noisy_dataset = FreesoundNoisyDataset(noisy_data,
                                          transform=train_transfrom,
                                          mixer=mixer)
    train_dataset = RandomDataset([noisy_dataset, curated_dataset],
                                  p=[NOISY_PROB, 1 - NOISY_PROB],
                                  size=DATASET_SIZE)

    val_dataset = FreesoundDataset(folds_data, val_folds,
                                   get_transforms(False, CROP_SIZE))
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE * 2,
                            shuffle=False,
                            num_workers=NUM_WORKERS)

    model = load_model(base_model_path, device=DEVICE)
    model.set_lr(BASE_LR)

    callbacks = [
        MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=3),
        CosineAnnealing(T_0=10, T_mult=2, eta_min=0.00001),
        LoggingToFile(save_dir / 'log.txt'),
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=150,
              callbacks=callbacks,
              metrics=['multi_accuracy', 'lwlrap'])
示例#5
0
    def test_checkpoint_exceptions(self, tmpdir, test_engine, recwarn):
        path = Path(tmpdir.join("path/to/exception_checkpoints/"))
        with pytest.raises(ValueError):
            Checkpoint(dir_path=path, max_saves=-3)

        path.mkdir(parents=True)
        Checkpoint(dir_path=path)
        assert len(recwarn) == 1
        warn = recwarn.pop()
        assert f"Directory '{path}' already exists" == str(warn.message)

        with pytest.raises(ValueError):
            MonitorCheckpoint(dir_path=path, monitor='qwerty')

        checkpoint = MonitorCheckpoint(dir_path=path, monitor='train_loss')
        checkpoint.attach(test_engine)
        with pytest.raises(ValueError):
            checkpoint.epoch_complete(test_engine.state)
示例#6
0
def train_fold(save_path, train_folds, val_folds):
    train_loader, val_loader = get_data_loaders(BATCH_SIZE, train_folds,
                                                val_folds)
    model = ShipMetaModel(params)
    callbacks = [
        MonitorCheckpoint(save_path,
                          monitor='val_iout',
                          max_saves=2,
                          copy_last=True),
        EarlyStopping(monitor='val_iout', patience=40),
        ReduceLROnPlateau(monitor='val_iout',
                          patience=10,
                          factor=0.2,
                          min_lr=1e-8),
        LoggingToFile(os.path.join(save_path, 'log.txt'))
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=EPOCHS,
              callbacks=callbacks,
              metrics=['iout'])
示例#7
0
def test_pipeline(tmpdir, get_batch_function, linear_argus_model_instance):
    model = linear_argus_model_instance
    experiment_dir = Path(tmpdir.join("path/to/pipeline_experiment/"))
    train_dataset = TensorDataset(*get_batch_function(batch_size=4096))
    val_dataset = TensorDataset(*get_batch_function(batch_size=512))
    train_loader = DataLoader(train_dataset,
                              shuffle=True,
                              drop_last=True,
                              batch_size=32)
    val_loader = DataLoader(val_dataset, shuffle=False, batch_size=64)

    monitor_checkpoint = MonitorCheckpoint(dir_path=experiment_dir,
                                           monitor='val_loss',
                                           max_saves=1)
    callbacks = [
        monitor_checkpoint,
        EarlyStopping(monitor='val_loss', patience=9),
        ReduceLROnPlateau(monitor='val_loss', factor=0.64, patience=3),
        LoggingToFile(experiment_dir / 'log.txt'),
        LoggingToCSV(experiment_dir / 'log.csv')
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              num_epochs=100,
              callbacks=callbacks)

    val_loss = model.validate(val_loader)['val_loss']
    assert val_loss < 0.1

    model_paths = sorted(experiment_dir.glob('*.pth'))
    assert len(model_paths) == 1

    loaded_model = load_model(model_paths[0])
    loaded_val_loss = loaded_model.validate(val_loader)['val_loss']
    assert loaded_val_loss == monitor_checkpoint.best_value

    assert (experiment_dir / 'log.txt').exists()
    assert (experiment_dir / 'log.csv').exists()
示例#8
0
def train_fold(save_dir, train_folds, val_folds):
    depth_trns = SimpleDepthTransform()
    train_trns = SaltTransform(IMAGE_SIZE, True, 'crop')
    val_trns = SaltTransform(IMAGE_SIZE, False, 'crop')
    train_dataset = SaltDataset(TRAIN_FOLDS_PATH, train_folds, train_trns,
                                depth_trns)
    val_dataset = SaltDataset(TRAIN_FOLDS_PATH, val_folds, val_trns,
                              depth_trns)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=4)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            num_workers=4)

    model = SaltMetaModel(PARAMS)

    callbacks = [
        MonitorCheckpoint(save_dir,
                          monitor='val_crop_iout',
                          max_saves=3,
                          copy_last=False),
        EarlyStopping(monitor='val_crop_iout', patience=100),
        ReduceLROnPlateau(monitor='val_crop_iout',
                          patience=30,
                          factor=0.64,
                          min_lr=1e-8),
        LoggingToFile(os.path.join(save_dir, 'log.txt')),
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=700,
              callbacks=callbacks,
              metrics=['crop_iout'])
示例#9
0
                                  depth_trns)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  drop_last=True,
                                  num_workers=4)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=4)

        model = SaltMetaModel(params)

        callbacks = [
            MonitorCheckpoint(experiment_dir,
                              monitor='val_crop_iout',
                              max_saves=1,
                              copy_last=False),
            EarlyStopping(monitor='val_crop_iout', patience=100),
            ReduceLROnPlateau(monitor='val_crop_iout',
                              patience=30,
                              factor=0.7,
                              min_lr=1e-8),
            LoggingToFile(os.path.join(experiment_dir, 'log.txt'))
        ]

        with open(os.path.join(experiment_dir, 'random_params.json'),
                  'w') as outfile:
            json.dump(random_params, outfile)

        model.fit(train_loader,
                  val_loader=val_loader,
示例#10
0
                               train=True)  # define your dataset

    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=4,
    )

    val_dataset = OcrDataset(CV_CONFIG.get("data_path"), transforms=transforms)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            num_workers=4)
    model = CRNNModel(MODEL_PARAMS)

    callbacks = [
        MonitorCheckpoint(EXPERIMENT_DIR, monitor="val_cer", max_saves=6),
    ]
    # YOU CAN IMPLEMENT DIFFERENT METRICS AND USE THEM TO SEE HOW MANY CORRECT PREDICTION YOU HAVE
    metrics = [CER()]
    model.fit(
        train_loader,
        val_loader=val_loader,
        max_epochs=NUM_EPOCHS,
        metrics=metrics,
        callbacks=callbacks,
        metrics_on_train=True,
    )
        batch_size=BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=4,
    )
    val_dataset = OcrDataset(DATASET_PATHS[0],
                             transforms=val_transforms,
                             train=False)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            num_workers=4)

    model = CRNNModel(MODEL_PARAMS)

    callbacks = [
        MonitorCheckpoint(EXPERIMENT_DIR,
                          monitor="val_char_error_rate",
                          max_saves=6),
    ]

    metrics = [CER()]
    model.fit(
        train_loader,
        val_loader=val_loader,
        max_epochs=NUM_EPOCHS,
        metrics=metrics,
        callbacks=callbacks,
        metrics_on_train=True,
    )
示例#12
0
文件: mnist.py 项目: vfdev-5/argus
    params = {
        'nn_module': {
            'n_classes': 10,
            'p_dropout': args.dropout
        },
        'optimizer': {
            'lr': args.lr
        },
        'device': args.device
    }
    model = MnistModel(params)

    callbacks = [
        MonitorCheckpoint(dir_path='mnist/',
                          monitor='val_accuracy',
                          max_saves=3),
        EarlyStopping(monitor='val_accuracy', patience=9),
        ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3),
        LoggingToCSV('mnist/log.csv')
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              num_epochs=args.epochs,
              metrics=['accuracy'],
              callbacks=callbacks,
              metrics_on_train=True)

    del model
    model_path = Path("mnist/").glob("*.pth")
示例#13
0
    )
    # IT IS BETTER TO SPLIT DATA INTO TRAIN|VAL AND USE METRICS ON VAL
    val_dataset_paths = [p / "val" for p in DATASET_PATHS]
    val_dataset = ConcatDataset(
        [OcrDataset(p, transforms=transforms) for p in val_dataset_paths])
    #
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            num_workers=4)

    model = CRNNModel(MODEL_PARAMS)
    # YOU CAN ADD CALLBACK IF IT NEEDED, FIND MORE IN argus.callbacks
    callbacks = [
        MonitorCheckpoint(EXPERIMENT_DIR,
                          monitor="val_str_accuracy_letter",
                          max_saves=6),
        EarlyStopping(monitor='val_loss', patience=200),
    ]
    # YOU CAN IMPLEMENT DIFFERENT METRICS AND USE THEM TO SEE HOW MANY CORRECT PREDICTION YOU HAVE
    metrics = [StringAccuracy(), StringAccuracyLetters()]

    model.fit(
        train_loader,
        val_loader=val_loader,
        max_epochs=NUM_EPOCHS,
        metrics=metrics,
        callbacks=callbacks,
        metrics_on_train=True,
    )
示例#14
0
def train_folds(save_dir, folds_data):
    random_params = {
        'base_size': int(np.random.choice([64, 128, 256, 512])),
        'reduction_scale': int(np.random.choice([2, 4, 8, 16])),
        'p_dropout': float(np.random.uniform(0.0, 0.5)),
        'lr': float(np.random.uniform(0.0001, 0.00001)),
        'patience': int(np.random.randint(3, 12)),
        'factor': float(np.random.uniform(0.5, 0.8)),
        'batch_size': int(np.random.choice([32, 64, 128])),
    }
    pprint(random_params)

    save_dir.mkdir(parents=True, exist_ok=True)
    with open(save_dir / 'random_params.json', 'w') as outfile:
        json.dump(random_params, outfile)

    params = {
        'nn_module': ('FCNet', {
            'in_channels': len(config.classes) * len(EXPERIMENTS),
            'num_classes': len(config.classes),
            'base_size': random_params['base_size'],
            'reduction_scale': random_params['reduction_scale'],
            'p_dropout': random_params['p_dropout']
        }),
        'loss':
        'BCEWithLogitsLoss',
        'optimizer': ('Adam', {
            'lr': random_params['lr']
        }),
        'device':
        'cuda',
    }

    for fold in config.folds:
        val_folds = [fold]
        train_folds = list(set(config.folds) - set(val_folds))
        save_fold_dir = save_dir / f'fold_{fold}'
        print(f"Val folds: {val_folds}, Train folds: {train_folds}")
        print(f"Fold save dir {save_fold_dir}")

        train_dataset = StackingDataset(folds_data, train_folds,
                                        get_transforms(True), DATASET_SIZE)
        val_dataset = StackingDataset(folds_data, val_folds,
                                      get_transforms(False))

        train_loader = DataLoader(train_dataset,
                                  batch_size=random_params['batch_size'],
                                  shuffle=True,
                                  drop_last=True,
                                  num_workers=NUM_WORKERS)
        val_loader = DataLoader(val_dataset,
                                batch_size=random_params['batch_size'] * 2,
                                shuffle=False,
                                num_workers=NUM_WORKERS)

        model = StackingModel(params)

        callbacks = [
            MonitorCheckpoint(save_fold_dir, monitor='val_lwlrap',
                              max_saves=1),
            ReduceLROnPlateau(monitor='val_lwlrap',
                              patience=random_params['patience'],
                              factor=random_params['factor'],
                              min_lr=1e-8),
            EarlyStopping(monitor='val_lwlrap', patience=20),
            LoggingToFile(save_fold_dir / 'log.txt'),
        ]

        model.fit(train_loader,
                  val_loader=val_loader,
                  max_epochs=300,
                  callbacks=callbacks,
                  metrics=['multi_accuracy', 'lwlrap'])
示例#15
0
                              image_transform=val_trns)

    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              num_workers=8,
                              shuffle=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE,
                            num_workers=8,
                            shuffle=False)

    model = IterSizeMetaModel(PARAMS)

    callbacks = [
        MonitorCheckpoint(f'/workdir/data/experiments/{EXPERIMENT_NAME}',
                          monitor='val_map_at_k',
                          max_saves=10),
        EarlyStopping(monitor='val_map_at_k', patience=50),
        ReduceLROnPlateau(monitor='val_map_at_k',
                          factor=0.64,
                          patience=1,
                          min_lr=0.000001),
        LoggingToFile(f'/workdir/data/experiments/{EXPERIMENT_NAME}/log.txt')
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=1000,
              callbacks=callbacks,
              metrics=['accuracy', MAPatK(k=3)])
示例#16
0
    params = {
        'nn_module': {
            'model_name': 'tf_efficientnet_b0_ns',
            'pretrained': True,
            'num_classes': 10,
            'drop_rate': 0.2,
            'drop_path_rate': 0.2,
        },
        'optimizer': ('AdamW', {
            'lr': args.lr
        }),
        'loss': 'CrossEntropyLoss',
        'device': args.device
    }
    model = CifarModel(params)

    callbacks = [
        MonitorCheckpoint(dir_path=EXPERIMENT_DIR,
                          monitor='val_accuracy',
                          max_saves=3),
        EarlyStopping(monitor='val_accuracy', patience=9),
        ReduceLROnPlateau(monitor='val_accuracy', factor=0.64, patience=3),
        LoggingToCSV(EXPERIMENT_DIR / 'log.csv')
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              num_epochs=args.epochs,
              metrics=['accuracy'],
              callbacks=callbacks)
示例#17
0
def train_fold(save_dir, train_folds, val_folds, folds_data):
    train_transfrom = get_transforms(train=True,
                                     size=CROP_SIZE,
                                     wrap_pad_prob=0.0,
                                     resize_scale=(0.8, 1.0),
                                     resize_ratio=(1.7, 2.3),
                                     resize_prob=0.0,
                                     spec_num_mask=2,
                                     spec_freq_masking=0.15,
                                     spec_time_masking=0.20,
                                     spec_prob=0.0)
    val_transform = get_transforms(train=False, size=CROP_SIZE)

    if MIXER_PROB:
        mixer = get_mixer(mixer_prob=MIXER_PROB,
                          sigmoid_range=(3, 12),
                          alpha_dist='uniform',
                          random_prob=(0.6, 0.4))
    else:
        mixer = None

    train_dataset = BirdsongDataset(folds_data,
                                    folds=train_folds,
                                    transform=train_transfrom,
                                    mixer=mixer)
    val_dataset = BirdsongDataset(folds_data,
                                  folds=val_folds,
                                  transform=val_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE * 2 // ITER_SIZE,
                            shuffle=False,
                            num_workers=NUM_WORKERS)

    model = BirdsongModel(PARAMS)
    if 'pretrained' in model.params['nn_module'][1]:
        model.params['nn_module'][1]['pretrained'] = False

    if USE_AMP:
        initialize_amp(model)

    model.set_device(DEVICES)

    num_iterations = (5 * len(train_dataset)) // BATCH_SIZE
    callbacks = [
        MonitorCheckpoint(save_dir, monitor='val_loss', max_saves=1),
        CosineAnnealingLR(T_max=num_iterations,
                          eta_min=0,
                          step_on_iteration=True),
        EarlyStopping(monitor='val_loss', patience=12),
        LoggingToFile(save_dir / 'log.txt'),
        LoggingToCSV(save_dir / 'log.csv')
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              num_epochs=EPOCHS,
              callbacks=callbacks,
              metrics=['f1_score'])

    del model

    model_path = get_best_model_path(save_dir)
    model = load_model(model_path)
    val_dataset = BirdsongDataset(folds_data,
                                  folds=val_folds + [config.n_folds],
                                  transform=val_transform)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE * 2 // ITER_SIZE,
                            shuffle=False,
                            num_workers=NUM_WORKERS)
    model.set_device(DEVICES[0])
    model.validate(val_loader,
                   metrics=['f1_score'],
                   callbacks=[
                       LoggingToFile(save_dir / 'log.txt'),
                       LoggingToCSV(save_dir / 'log.csv')
                   ])
示例#18
0
    params = {
        'nn_module': {
            'n_classes': 10,
            'p_dropout': args.dropout
        },
        'optimizer': {
            'lr': args.lr
        },
        'device': args.device
    }
    model = MnistModel(params)

    callbacks = [
        MonitorCheckpoint(dir_path='mnist/',
                          monitor='val_accuracy',
                          max_saves=3,
                          copy_last=True),
        EarlyStopping(monitor='val_accuracy', patience=9),
        ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3),
        LoggingToCSV('mnist/log.csv')
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=args.epochs,
              metrics=['accuracy'],
              callbacks=callbacks,
              metrics_on_train=True)

    del model
    model = load_model('mnist/model-last.pth')
示例#19
0
        'loss': 'CrossEntropyLoss',
        'device': 'cuda'
    }
    print("Model params:", params)
    model = ArcfaceModel(params)

    train_metric_dataset = WhaleDataset(train_val_csv_path, True,
                                        **val_transforms)
    monitor_metric = CosMAPatK(train_metric_dataset,
                               k=5,
                               batch_size=batch_size,
                               num_workers=num_workers)
    monitor_metric_name = 'val_' + monitor_metric.name
    callbacks = [
        MonitorCheckpoint(experiment_dir,
                          monitor=monitor_metric_name,
                          max_saves=3),
        EarlyStopping(monitor=monitor_metric_name, patience=50),
        ReduceLROnPlateau(monitor=monitor_metric_name,
                          patience=10,
                          factor=0.64,
                          min_lr=1e-8),
        LoggingToFile(join(experiment_dir, 'log.txt'))
    ]

    with open(join(experiment_dir, 'source.py'), 'w') as outfile:
        outfile.write(open(__file__).read())

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=1000,
示例#20
0
        num_workers=4,
    )

    val_dataset = OcrDataset(val_files, transforms)

    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=4,
    )

    model = CRNNModel(MODEL_PARAMS)

    callbacks = [
        MonitorCheckpoint(EXPERIMENT_DIR, monitor="train_loss", max_saves=6),
        #Checkpoint(EXPERIMENT_DIR),
    ]

    metrics = [
        StringAccuracy(),
    ]

    model.fit(
        train_loader,
        val_loader=val_loader,
        max_epochs=NUM_EPOCHS,
        metrics=metrics,
        callbacks=callbacks,
        metrics_on_train=True,
    )
示例#21
0
def train_fold(save_dir, train_folds, val_folds, folds_data, noisy_data,
               corrected_noisy_data):
    train_transfrom = get_transforms(train=True,
                                     size=CROP_SIZE,
                                     wrap_pad_prob=WRAP_PAD_PROB,
                                     resize_scale=(0.8, 1.0),
                                     resize_ratio=(1.7, 2.3),
                                     resize_prob=0.33,
                                     spec_num_mask=2,
                                     spec_freq_masking=0.15,
                                     spec_time_masking=0.20,
                                     spec_prob=0.5)

    mixer = RandomMixer([
        SigmoidConcatMixer(sigmoid_range=(3, 12)),
        AddMixer(alpha_dist='uniform')
    ],
                        p=[0.6, 0.4])
    mixer = UseMixerWithProb(mixer, prob=MIXER_PROB)

    curated_dataset = FreesoundDataset(folds_data,
                                       train_folds,
                                       transform=train_transfrom,
                                       mixer=mixer)
    noisy_dataset = FreesoundNoisyDataset(noisy_data,
                                          transform=train_transfrom,
                                          mixer=mixer)
    corr_noisy_dataset = FreesoundCorrectedNoisyDataset(
        corrected_noisy_data, transform=train_transfrom, mixer=mixer)
    dataset_probs = [
        NOISY_PROB, CORR_NOISY_PROB, 1 - NOISY_PROB - CORR_NOISY_PROB
    ]
    print("Dataset probs", dataset_probs)
    print("Dataset lens", len(noisy_dataset), len(corr_noisy_dataset),
          len(curated_dataset))
    train_dataset = RandomDataset(
        [noisy_dataset, corr_noisy_dataset, curated_dataset],
        p=dataset_probs,
        size=DATASET_SIZE)

    val_dataset = FreesoundDataset(folds_data, val_folds,
                                   get_transforms(False, CROP_SIZE))
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE * 2,
                            shuffle=False,
                            num_workers=NUM_WORKERS)

    model = FreesoundModel(PARAMS)

    callbacks = [
        MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=1),
        ReduceLROnPlateau(monitor='val_lwlrap',
                          patience=6,
                          factor=0.6,
                          min_lr=1e-8),
        EarlyStopping(monitor='val_lwlrap', patience=18),
        LoggingToFile(save_dir / 'log.txt'),
    ]

    model.fit(train_loader,
              val_loader=val_loader,
              max_epochs=700,
              callbacks=callbacks,
              metrics=['multi_accuracy', 'lwlrap'])
def train_experiment(folds_data, noisy_data, num):
    experiment_dir = SAVE_DIR / f'{num:04}'
    np.random.seed(num)
    random.seed(num)

    random_params = {
        'p_dropout': float(np.random.uniform(0.1, 0.3)),
        'batch_size': int(np.random.choice([128])),
        'lr': float(np.random.choice([0.001, 0.0006, 0.0003])),
        'add_prob': float(np.random.uniform(0.0, 1.0)),
        'noisy_prob': float(np.random.uniform(0.0, 1.0)),
        'lsoft_beta': float(np.random.uniform(0.2, 0.8)),
        'noisy_weight': float(np.random.uniform(0.3, 0.7)),
        'patience': int(np.random.randint(2, 10)),
        'factor': float(np.random.uniform(0.5, 0.8))
    }
    pprint(random_params)

    params = {
        'nn_module': ('SimpleKaggle', {
            'num_classes': len(config.classes),
            'dropout': random_params['p_dropout'],
            'base_size': 64
        }),
        'loss': ('OnlyNoisyLSoftLoss', {
            'beta': random_params['lsoft_beta'],
            'noisy_weight': random_params['noisy_weight'],
            'curated_weight': 1 - random_params['noisy_weight']
        }),
        'optimizer': ('Adam', {'lr': random_params['lr']}),
        'device': 'cuda',
        'amp': {
            'opt_level': 'O2',
            'keep_batchnorm_fp32': True,
            'loss_scale': "dynamic"
        }
    }
    pprint(params)
    try:
        train_transfrom = get_transforms(True, CROP_SIZE)
        curated_dataset = FreesoundDataset(folds_data, TRAIN_FOLDS,
                                           transform=train_transfrom,
                                           add_prob=random_params['add_prob'])
        noisy_dataset = FreesoundNoisyDataset(noisy_data,
                                              transform=train_transfrom)
        train_dataset = CombinedDataset(noisy_dataset, curated_dataset,
                                        noisy_prob=random_params['noisy_prob'],
                                        size=DATASET_SIZE)

        val_dataset = FreesoundDataset(folds_data, VAL_FOLDS,
                                       get_transforms(False, CROP_SIZE))
        train_loader = DataLoader(train_dataset, batch_size=random_params['batch_size'],
                                  shuffle=True, drop_last=True,
                                  num_workers=NUM_WORKERS)
        val_loader = DataLoader(val_dataset, batch_size=random_params['batch_size'] * 2,
                                shuffle=False, num_workers=NUM_WORKERS)

        model = FreesoundModel(params)

        callbacks = [
            MonitorCheckpoint(experiment_dir, monitor='val_lwlrap', max_saves=1),
            ReduceLROnPlateau(monitor='val_lwlrap',
                              patience=random_params['patience'],
                              factor=random_params['factor'],
                              min_lr=1e-8),
            EarlyStopping(monitor='val_lwlrap', patience=20),
            LoggingToFile(experiment_dir / 'log.txt'),
        ]

        with open(experiment_dir / 'random_params.json', 'w') as outfile:
            json.dump(random_params, outfile)

        model.fit(train_loader,
                  val_loader=val_loader,
                  max_epochs=100,
                  callbacks=callbacks,
                  metrics=['multi_accuracy', 'lwlrap'])
    except KeyboardInterrupt as e:
        raise e
    except BaseException as e:
        print(f"Exception '{e}' with random params '{random_params}'")