示例#1
0
def train_model(learn, lr=0.001, lr_decay=0.8, batch_size=512, n_epochs=20, model_name='fastai_'):
    n = len(learn.data.train_dl)
    phases = [(TrainingPhase(n).schedule_hp('lr', lr * (lr_decay ** (i)))) for i in range(n_epochs)]
    sched = GeneralScheduler(learn, phases)
    learn.callbacks.append(sched)

    learn.fit(n_epochs,
              callbacks=[SaveModelCallback(learn, name=model_name),
                         EarlyStoppingCallback(learn, min_delta=0.001, patience=5)])
示例#2
0
 def fit(self, learner: Learner, weight_decay):
     if self.early_stop:
         learner.callbacks.append(
             EarlyStoppingCallback(learner,
                                   patience=self.early_stop.patience))
     fit_one_cycle(learner,
                   cyc_len=self.cyc_len,
                   tot_epochs=self.max_epochs,
                   max_lr=self.max_lr,
                   wd=weight_decay)
示例#3
0
    def fit(self,
            epochs=10,
            lr=slice(1e-4, 3e-3),
            one_cycle=True,
            early_stopping=False,
            checkpoint=True,
            **kwargs):
        """
        Train the model for the specified number of epocs and using the
        specified learning rates
        
        =====================   ===========================================
        **Argument**            **Description**
        ---------------------   -------------------------------------------
        epochs                  Required integer. Number of cycles of training
                                on the data. Increase it if underfitting.
        ---------------------   -------------------------------------------
        lr                      Required float or slice of floats. Learning rate
                                to be used for training the model. Select from
                                the `lr_find` plot.
        ---------------------   -------------------------------------------
        one_cycle               Optional boolean. Parameter to select 1cycle
                                learning rate schedule. If set to `False` no 
                                learning rate schedule is used.       
        ---------------------   -------------------------------------------
        early_stopping          Optional boolean. Parameter to add early stopping.
                                If set to `True` training will stop if validation
                                loss stops improving for 5 epochs.       
        ---------------------   -------------------------------------------
        checkpoint              Optional boolean. Parameter to save the best model
                                during training. If set to `True` the best model 
                                based on validation loss will be saved during 
                                training.
        =====================   ===========================================
        """
        callbacks = kwargs['callbacks'] if 'callbacks' in kwargs.keys() else []
        kwargs.pop('callbacks', None)
        if early_stopping:
            callbacks.append(
                EarlyStoppingCallback(learn=self.learn,
                                      monitor='val_loss',
                                      min_delta=0.01,
                                      patience=5))
        if checkpoint:
            callbacks.append(
                SaveModelCallback(self,
                                  monitor='val_loss',
                                  every='improvement',
                                  name='checkpoint'))

        if one_cycle:
            self.learn.fit_one_cycle(epochs, lr, callbacks=callbacks, **kwargs)
        else:
            self.learn.fit(epochs, lr, callbacks=callbacks, **kwargs)
def fastai_random_data_run_with_callback(iris_data, fit_variant, manual_run, callback, patience):
    # pylint: disable=unused-argument
    mlflow.fastai.autolog()

    model = fastai_model(iris_data)

    callbacks = []
    if callback == "early":
        callback = EarlyStoppingCallback(learn=model, patience=patience, min_delta=MIN_DELTA)
        callbacks.append(callback)

    if fit_variant == "fit_one_cycle":
        model.fit_one_cycle(NUM_EPOCHS, callbacks=callbacks)
    else:
        model.fit(NUM_EPOCHS, callbacks=callbacks)

    client = mlflow.tracking.MlflowClient()
    return model, client.get_run(client.list_run_infos(experiment_id="0")[0].run_id)
示例#5
0
def fastai_random_data_run_with_callback(iris_data, fit_variant, manual_run,
                                         callback, patience):
    mlflow.fastai.autolog()
    callbacks = []

    if callback == 'early':
        # min_delta is set as such to guarantee early stopping
        callbacks.append(lambda learn: EarlyStoppingCallback(
            learn, patience=patience, min_delta=MIN_DELTA))

    model = fastai_model(iris_data, callback_fns=callbacks)

    if fit_variant == 'fit_one_cycle':
        model.fit_one_cycle(NUM_EPOCHS)
    else:
        model.fit(NUM_EPOCHS)

    client = mlflow.tracking.MlflowClient()
    return model, client.get_run(
        client.list_run_infos(experiment_id='0')[0].run_id)
示例#6
0
def get_callbacks(learner,
                  mod_name,
                  early_stop=True,
                  patience=5,
                  monitor='accuracy',
                  min_delta=0.01):
    callbacks = [
        SaveModelCallback(learner,
                          every='improvement',
                          name=f'{mod_name}-opt_accuracy',
                          monitor='accuracy'),
        SaveModelCallback(learner,
                          every='improvement',
                          name=f'{mod_name}-opt_val_loss'),
        WandbCallback(learner, monitor=monitor, input_type='images', log='all')
    ]
    if early_stop:
        callbacks.append(
            EarlyStoppingCallback(learner,
                                  patience=patience,
                                  min_delta=min_delta,
                                  monitor=monitor))
    return callbacks
示例#7
0
def train(sacred_conf):
    valid_fold = sacred_conf.fold
    image_size = sacred_conf.image_size
    conf = sacred_conf


    # class_cnt = 5
    backbone_name = conf.backbone
    unfreeze = True #conf.unfreeze if 'unfreeze' in conf else False
    epoch = 50

    assert int(valid_fold) <= 4
    # batch_id = str(round(time.time()))

    df = pd.read_csv('./input/train.csv', names=['file_name', 'label'])
    df['fold'] = df.file_name%5
    df['file_name'] = df.file_name.astype('str')+'.jpg'

    # #print(df.head(), df.shape)
    # if class_cnt <= 2:
    #     df.label = np.where(df.label>=1, 1, 0)

    # def get_transforms(do_flip: bool = True, flip_vert: bool = False, max_rotate: float = 10., max_zoom: float = 1.1,
    #                    max_lighting: float = 0.2, max_warp: float = 0.2, p_affine: float = 0.75,
    #                    p_lighting: float = 0.75, xtra_tfms: Optional[Collection[Transform]] = None) -> Collection[Transform]:
    #     "Utility func to easily create a list of flip, rotate, `zoom`, warp, lighting transforms."
    #     res = [rand_crop()]
    #     if do_flip:    res.append(dihedral_affine() if flip_vert else flip_lr(p=0.5))
    #     if max_warp:   res.append(symmetric_warp(magnitude=(-max_warp, max_warp), p=p_affine))
    #     if max_rotate: res.append(rotate(degrees=(-max_rotate, max_rotate), p=p_affine))
    #     if max_zoom > 1: res.append(rand_zoom(scale=(1., max_zoom), p=p_affine))
    #     if max_lighting:
    #         res.append(brightness(change=(0.5 * (1 - max_lighting), 0.5 * (1 + max_lighting)), p=p_lighting))
    #         res.append(contrast(scale=(1 - max_lighting, 1 / (1 - max_lighting)), p=p_lighting))
    #     #       train                   , valid
    #     return (res + listify(xtra_tfms)+zoom_crop(scale=0.1), zoom_crop(scale=0.1))

    data = (ImageList.from_df(df, './input/train/', )
             .split_by_idx(df.loc[df.fold == valid_fold].index)
             #.split_from_df('label')
             # split_by_valid_func(lambda o: int(os.path.basename(o).split('.')[0])%5==i)
             .label_from_df()
             # .add_test_folder('./input/test')
             .transform(get_transforms(), size=image_size)
             .databunch(bs=16)).normalize(imagenet_stats)
    test_data = ImageList.from_folder(path="./input/test")
    data.add_test(test_data)

    # backbone = get_backbone(backbone_name)

    #print(to_models.resnet34())
    # model_fun = to_models.resnet34
    # model_name = model_fun.__name__

    if ',' in backbone_name or isinstance(backbone_name, list):
        learn = get_ens_learn(sacred_conf, data,)
        print(learn.model)
    elif 'raw' == conf.model_type:
        learn = get_cus_learner(sacred_conf, data, )
    else:
        learn = get_fastai_learn(sacred_conf, data)

    # learn = get_test_learn(data, backbone_name)


    model_name = backbone_name
    #print(model_name, learn.model)

    # ch_prefix = os.path.basename(file_name)
    # checkpoint_name = f'{model_name}_f{valid_fold}'
    callbacks = [EarlyStoppingCallback(learn, monitor='accuracy', min_delta=1e-5, patience=5),
                 #SaveModelCallback(learn, monitor='accuracy', name=checkpoint_name, every='improvement'),
                 Recorder_scared(ex, learn )
                 ]

    print(f'=====Fold:{valid_fold}, Total epoch:{epoch}, type#{conf.model_type}, lock#{conf.lock_layer}, model:{model_name}, image:{image_size} =========')

    learn.fit_one_cycle(epoch, callbacks=callbacks)

    oof_val = get_oof_df(learn, DatasetType.Valid)

    oof_test = get_oof_df(learn, DatasetType.Test)

    os.makedirs('./output/stacking/', exist_ok=True)
    import socket
    host_name = socket.gethostname()
    # score_list = np.array(learn.recorder.metrics)
    # best_epoch = np.argmax(score_list)
    # best_score = np.max(score_list)
    val_len = len(learn.data.valid_ds.items)
    train_len = len(learn.data.train_ds.items)

    from sklearn.metrics import accuracy_score
    best_score = accuracy_score(oof_val.iloc[:, :-1].idxmax(axis=1), oof_val.iloc[:, -1])

    conf_name_base = backbone_name
    oof_file = f'./output/stacking/{version}_{host_name[:5]}_s{best_score:6.5f}_{conf_name_base}_{conf.model_type}_f{valid_fold}_val{val_len}_trn{train_len}.h5'

    print(f'Stacking file save to:{oof_file}')
    save_stack_feature(oof_val, oof_test, oof_file)
示例#8
0
                           metrics=[METRIC],
                           device=DEVICE,
                           model_dir=LOGGING_FOLDER)

    if HS_MODEL is not None:
        learn.model.load_state_dict(torch.load(HS_MODEL)['model'])

    set_BN_momentum(learn.model, batch_size=BATCH_SIZE)
    learn.clip_grad(1.)

    # callbacks
    csv_logger = CSVLogger(learn=learn,
                           filename=f'{LOGGING_FOLDER}/fit_trace',
                           append=True)
    early_stopping = EarlyStoppingCallback(learn=learn,
                                           monitor='dice',
                                           patience=PATIENCE)
    save_model = SaveModelCallback(learn=learn,
                                   monitor='dice',
                                   name='best_model')
    acc_grad = AccumulateStep(learn, 64 // BATCH_SIZE)

    # # find optimal LR
    # learn.lr_find(stop_div=True, num_it=100)
    # learn.recorder.plot(suggestion=True)
    # opt_lr = learn.recorder.min_grad_lr
    # print(f'Initial optimal lr: {opt_lr}')

    if TRAIN_MODE:

        if HS_MODEL is None:
示例#9
0
data = (ImageList.from_csv(
    path, 'train_fastai_format.csv',
    folder='preprocessed/224/train').split_by_rand_pct(seed=42).label_from_df(
        label_delim=' ').transform(tfms, size=(
            sz,
            sz)).add_test(str(path) + '/preprocessed/224/test/' +
                          test_fns).databunch(
                              bs=bs, num_workers=8).normalize(imagenet_stats))

model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=6)

learn = Learner(data,
                model,
                metrics=[accuracy_thresh],
                model_dir=path / 'models/eff_net').to_fp16()

learn.unfreeze()
learn.load(pretrained_model)

learn.fit_one_cycle(10,
                    lr,
                    callbacks=[
                        EarlyStoppingCallback(learn,
                                              min_delta=0.001,
                                              patience=3),
                        SaveModelCallback(learn,
                                          every='epoch',
                                          name='effb0-224')
                    ])
示例#10
0
    def fit(self,
            epochs=10,
            lr=None,
            one_cycle=True,
            early_stopping=False,
            checkpoint=True,
            tensorboard=False,
            **kwargs):
        """
        Train the model for the specified number of epochs and using the
        specified learning rates
        
        =====================   ===========================================
        **Argument**            **Description**
        ---------------------   -------------------------------------------
        epochs                  Required integer. Number of cycles of training
                                on the data. Increase it if underfitting.
        ---------------------   -------------------------------------------
        lr                      Optional float or slice of floats. Learning rate
                                to be used for training the model. If ``lr=None``, 
                                an optimal learning rate is automatically deduced 
                                for training the model.
        ---------------------   -------------------------------------------
        one_cycle               Optional boolean. Parameter to select 1cycle
                                learning rate schedule. If set to `False` no 
                                learning rate schedule is used.       
        ---------------------   -------------------------------------------
        early_stopping          Optional boolean. Parameter to add early stopping.
                                If set to 'True' training will stop if validation
                                loss stops improving for 5 epochs.        
        ---------------------   -------------------------------------------
        checkpoint              Optional boolean. Parameter to save the best model
                                during training. If set to `True` the best model 
                                based on validation loss will be saved during 
                                training.
        ---------------------   -------------------------------------------
        tensorboard             Optional boolean. Parameter to write the training log. 
                                If set to 'True' the log will be saved at 
                                <dataset-path>/training_log which can be visualized in
                                tensorboard. Required tensorboardx version=1.7 (Experimental support).

                                The default value is 'False'.
        =====================   ===========================================
        """
        self._check_requisites()

        if lr is None:
            print('Finding optimum learning rate.')

            lr = self.lr_find(allow_plot=False)
            lr = slice(lr / 10, lr)

        self._learning_rate = lr

        if arcgis.env.verbose:
            logger.info('Fitting the model.')

        if getattr(self, '_backend', 'pytorch') == 'tensorflow':
            checkpoint = False

        callbacks = kwargs['callbacks'] if 'callbacks' in kwargs.keys() else []
        kwargs.pop('callbacks', None)
        if early_stopping:
            callbacks.append(
                EarlyStoppingCallback(learn=self.learn,
                                      monitor='valid_loss',
                                      min_delta=0.01,
                                      patience=5))
        if checkpoint:
            from datetime import datetime
            now = datetime.now()
            callbacks.append(
                SaveModelCallback(
                    self,
                    monitor='valid_loss',
                    every='improvement',
                    name=now.strftime("checkpoint_%Y-%m-%d_%H-%M-%S")))

        # If tensorboardx is installed write a log with name as timestamp
        if tensorboard and HAS_TENSORBOARDX:
            training_id = time.strftime("log_%Y-%m-%d_%H-%M-%S")
            log_path = Path(os.path.dirname(self._data.path)) / 'training_log'
            callbacks.append(
                LearnerTensorboardWriter(learn=self.learn,
                                         base_dir=log_path,
                                         name=training_id))
            hostname = socket.gethostname()
            print(
                "Monitor training using Tensorboard using the following command: 'tensorboard --host={} --logdir={}'"
                .format(hostname, log_path))
        # Send out a warning if tensorboardX is not installed
        elif tensorboard:
            warn(
                "Install tensorboardX 1.7 'pip install tensorboardx==1.7' to write training log"
            )

        if one_cycle:
            self.learn.fit_one_cycle(epochs, lr, callbacks=callbacks, **kwargs)
        else:
            self.learn.fit(epochs, lr, callbacks=callbacks, **kwargs)
示例#11
0
    def __init__(self,
                 data_path: str = 'lang_model',
                 emb_sz: int = 800,
                 qrnn: bool = False,
                 bidir: bool = False,
                 n_layers: int = 4,
                 n_hid: int = 2500,
                 bs: int = 104,
                 bptt: int = 67,
                 lr: float = 0.0013,
                 wd: float = .012,
                 one_cycle: bool = True,
                 cycle_len: int = 1) -> None:
        """ Instantiate AWD_LSTM Language Model with hyper-parameters.
        
        data_path: str
            path where databunch is loaded from
        emb_sz: int
            size of word embeddings
        qrnn: bool
            whether or not to use qrnn (requires CudNN)
        bidir: bool
            if RNN should be bi-directional
        n_layers: int
            number of layers in lang model
        n_hid: int
            number of hidden units in model
        lr: float
            learning rate
        bptt: int
            back-propigation-through-time; max sequence length through which gradients will be accumulated.
        bs: int
            batch size
        
        The hyper-parameters are stored in a fastai dict called `fastai.text.models.awd_lstm_lm_config`:
           {'emb_sz': 400, 'n_hid': 1150, 'n_layers': 3, 'pad_token': 1, 'qrnn': False, 'bidir': False, 'output_p': 0.1,
            'hidden_p': 0.15, 'input_p': 0.25, 'embed_p': 0.02,'weight_p': 0.2, 'tie_weights': True, 'out_bias': True}
        """
        self.lr, self.wd, self.one_cycle, self.cycle_len = lr, wd, one_cycle, cycle_len
        awd_lstm_lm_config.update(
            dict(emb_sz=emb_sz,
                 qrnn=qrnn,
                 bidir=bidir,
                 n_layers=n_layers,
                 n_hid=n_hid))
        #log params
        wb_handle = wandb.init(config=awd_lstm_lm_config)
        wandb.config.update({
            'data_path': str(data_path),
            'bs': bs,
            'bptt': bptt,
            'lr': lr
        })
        self.csv_name = 'history_' + wb_handle.name
        wandb.config.update({'csvlog_save_path': self.csv_name})

        # instantiate databunch
        self.data_lm = load_data(data_path, bs=bs, bptt=bptt)

        # instantiate language model
        self.learn = language_model_learner(data=self.data_lm,
                                            arch=AWD_LSTM,
                                            pretrained=False,
                                            model_dir=Path('models_' +
                                                           wb_handle.name),
                                            config=awd_lstm_lm_config)
        self.full_model_path = str(self.learn.path / self.learn.model_dir)
        wandb.config.update({'model_save_path': self.full_model_path})

        # prepare callbacks
        escb = EarlyStoppingCallback(learn=self.learn, patience=2)
        smcb = SaveModelCallback(learn=self.learn,
                                 name='best_' + wb_handle.name)
        rpcb = ReduceLROnPlateauCallback(learn=self.learn, patience=1)
        csvcb = CSVLogger(learn=self.learn, filename=self.csv_name)
        wb = wandbCallback(self.learn)
        self.callbacks = [escb, smcb, rpcb, csvcb, wb]

        self.fit()
示例#12
0
                          base_arch=ENCODER,
                          pretrained=PRETRAINED,
                          is_se_resnext=IS_SE_RESNEXT,
                          metrics=[METRIC],
                          device=DEVICE,
                          model_dir=LOGGING_FOLDER)

    set_BN_momentum(learn.model, batch_size=BATCH_SIZE)
    learn.clip_grad(1.)

    # callbacks
    csv_logger = CSVLogger(learn=learn,
                           filename=f'{LOGGING_FOLDER}/fit_trace',
                           append=True)
    early_stopping = EarlyStoppingCallback(learn=learn,
                                           monitor='accuracy',
                                           patience=PATIENCE)
    save_model = SaveModelCallback(learn=learn,
                                   monitor='accuracy',
                                   name='best_model')
    acc_grad = AccumulateStep(learn, 64 // BATCH_SIZE)

    opt_lr = 0.001

    # fit with frozen
    learn.fit_one_cycle(
        cyc_len=3,
        max_lr=opt_lr,
        callbacks=[acc_grad, csv_logger, early_stopping, save_model])

    # fit entire model with saving on the best epoch
示例#13
0
def train(valid_fold, conf_name):

    f = open(f'./configs/{conf_name}.yaml')
    conf = edict(yaml.load(f))

    class_cnt = conf.class_cnt
    backbone_name = conf.backbone
    unfreeze = True  #conf.unfreeze if 'unfreeze' in conf else False
    epoch = 50

    assert int(valid_fold) <= 4
    # batch_id = str(round(time.time()))
    backbone = get_backbone(backbone_name)

    df = pd.read_csv('./input/train.csv', names=['file_name', 'label'])
    df['fold'] = df.file_name % 5
    df['file_name'] = df.file_name.astype('str') + '.jpg'

    # #print(df.head(), df.shape)
    # if class_cnt <= 2:
    #     df.label = np.where(df.label>=1, 1, 0)

    data = (
        ImageList.from_df(
            df,
            './input/train/',
        ).split_by_idx(df.loc[df.fold == valid_fold].index)
        # split_by_valid_func(lambda o: int(os.path.basename(o).split('.')[0])%5==i)
        .label_from_df(cols='label', label_cls=FloatList)
        # .add_test_folder('./input/test')
        .transform(get_transforms(),
                   size=200).databunch(bs=16)).normalize(imagenet_stats)

    test_data = ImageList.from_folder(path="./input/test")

    data.add_test(test_data)

    #data.show_batch(rows=3, figsize=(15,15))

    #head = create_head(nf, nc, lin_ftrs, ps=ps, concat_pool=concat_pool, bn_final=bn_final)

    learn = cnn_learner(data,
                        backbone,
                        metrics=[root_mean_squared_error],
                        loss_func=nn.MSELoss(),
                        custom_head=None)

    print(learn.model)

    checkpoint_name = f'{backbone()._get_name()}_rf{valid_fold}'
    callbacks = [
        EarlyStoppingCallback(learn,
                              monitor='root_mean_squared_error',
                              min_delta=1e-5,
                              patience=5),
        SaveModelCallback(learn,
                          monitor='root_mean_squared_error',
                          name=checkpoint_name,
                          every='improvement')
    ]

    print(
        f'=====Fold:{valid_fold}, Total epoch:{epoch}, {conf_name}, backbone:{backbone_name}========='
    )

    if unfreeze:
        learn.freeze_to(-2)

    learn.fit_one_cycle(epoch, callbacks=callbacks)

    oof_val = get_oof_df(learn, DatasetType.Valid)

    oof_test = get_oof_df(learn, DatasetType.Test)

    os.makedirs('./output/stacking/', exist_ok=True)
    import socket
    host_name = socket.gethostname()
    # score_list = np.array(learn.recorder.metrics)
    # best_epoch = np.argmax(score_list)
    # best_score = np.max(score_list)
    val_len = len(learn.data.valid_ds.items)
    train_len = len(learn.data.train_ds.items)

    from sklearn.metrics import accuracy_score
    best_score = accuracy_score(oof_val.iloc[:, 0].astype(int),
                                oof_val.iloc[:, -1].astype(int))

    oof_file = f'./output/stacking/{version}_{host_name[:5]}_s{best_score:6.5f}_{conf_name}_f{valid_fold}_val{val_len}_trn{train_len}.h5'

    print(f'Stacking file save to:{oof_file}')
    save_stack_feature(oof_val, oof_test, oof_file)
示例#14
0
                           loss_func=CRITERION,
                           metrics=[METRIC],
                           opt_func=OPTIMIZER,
                           wd=WD)
    set_BN_momentum(learn.model, n_acc=N_ACC)
    learn.clip_grad(1.)

    learn.model = convert_model(learn.model)
    learn.model = nn.DataParallel(learn.model).to(DEVICE)

    # init callbacks
    csv_logger = CSVLogger(learn=learn,
                           filename=f'{LOGGING_FOLDER}/fit_trace',
                           append=True)
    early_stopping = EarlyStoppingCallback(learn=learn,
                                           monitor='valid_loss',
                                           patience=PATIENCE)
    save_model = SaveModelCallback(learn=learn,
                                   monitor='valid_loss',
                                   name='inter_model',
                                   every='epoch')
    acc_grad = AccumulateStep(learn, N_ACC)

    # fit one cycle
    learn.fit_one_cycle(
        cyc_len=NUM_EPOCHS,
        max_lr=LEARNING_RATE,
        div_factor=DIV_FACTOR,
        final_div=DIV_FACTOR,
        annealing_func=ANNEALING,
        start_epoch=START_EPOCH,
示例#15
0
def cb_estop(learner, patience=5, min_delta=0.01, monitor='accuracy'):
    return EarlyStoppingCallback(learner,
                                 patience=patience,
                                 min_delta=min_delta,
                                 monitor=monitor)