Пример #1
0
 def fit(self,
         df_path,
         model_name="lm_5_ep_lr2-3_5_stlr",
         dict_name="itos",
         num_epochs=5,
         is_fit_topics=False):
     df = pd.read_csv(df_path, sep="\t")
     bs = 16
     texts = pd.DataFrame(list(df.text))
     self.data = TextList.from_df(texts,
                     processor=[TokenizeProcessor(tokenizer=Tokenizer(lang="xx")),
                                                  NumericalizeProcessor(vocab=Vocab.load("models/{}.pkl".format(dict_name)))]).\
             random_split_by_pct(.1).\
             label_for_lm().\
             databunch(bs=bs)
     self.learn = language_model_learner(
         self.data,
         AWD_LSTM,
         pretrained=False,
         drop_mult=0.7,
         pretrained_fnames=[model_name, dict_name])
     self.learn.unfreeze()
     self.learn.lr_find(start_lr=slice(10e-7, 10e-5), end_lr=slice(0.4, 10))
     _ = self.learn.recorder.plot(skip_end=10, suggestion=True)
     best_lm_lr = self.learn.recorder.min_grad_lr
     #print(best_lm_lr)
     self.learn.fit_one_cycle(
         num_epochs,
         best_lm_lr,
         callbacks=[ReduceLROnPlateauCallback(self.learn, factor=0.8)])
     # TODO: fit lda
     if is_fit_topics:
         pass
     return self
Пример #2
0
def fit_model(learn, epoch, learner_saved, encoder_saved):
    if learner_saved and encoder_saved:
        learn.load(learner_saved)
        learn.load_encoder(encoder_saved)
    learn.fit_one_cycle(epoch,
                        2e-3,
                        moms=(0.8, 0.7),
                        callbacks=[
                            SaveModelCallback(learn),
                            ReduceLROnPlateauCallback(learn, factor=0.8)
                        ])
    leaner_to_save = "lm_" + str(epoch) + "_ep_lr2-3Px"
    encoder_to_save = "lm_" + str(epoch) + "_ep_lr2-3_encx"
    learn.save(leaner_to_save)
    learn.save_encoder(encoder_to_save)
    return learn, leaner_to_save, encoder_to_save
Пример #3
0
    def __init__(self,
                 data_path: str = 'lang_model',
                 emb_sz: int = 800,
                 qrnn: bool = False,
                 bidir: bool = False,
                 n_layers: int = 4,
                 n_hid: int = 2500,
                 bs: int = 104,
                 bptt: int = 67,
                 lr: float = 0.0013,
                 wd: float = .012,
                 one_cycle: bool = True,
                 cycle_len: int = 1) -> None:
        """ Instantiate AWD_LSTM Language Model with hyper-parameters.
        
        data_path: str
            path where databunch is loaded from
        emb_sz: int
            size of word embeddings
        qrnn: bool
            whether or not to use qrnn (requires CudNN)
        bidir: bool
            if RNN should be bi-directional
        n_layers: int
            number of layers in lang model
        n_hid: int
            number of hidden units in model
        lr: float
            learning rate
        bptt: int
            back-propigation-through-time; max sequence length through which gradients will be accumulated.
        bs: int
            batch size
        
        The hyper-parameters are stored in a fastai dict called `fastai.text.models.awd_lstm_lm_config`:
           {'emb_sz': 400, 'n_hid': 1150, 'n_layers': 3, 'pad_token': 1, 'qrnn': False, 'bidir': False, 'output_p': 0.1,
            'hidden_p': 0.15, 'input_p': 0.25, 'embed_p': 0.02,'weight_p': 0.2, 'tie_weights': True, 'out_bias': True}
        """
        self.lr, self.wd, self.one_cycle, self.cycle_len = lr, wd, one_cycle, cycle_len
        awd_lstm_lm_config.update(
            dict(emb_sz=emb_sz,
                 qrnn=qrnn,
                 bidir=bidir,
                 n_layers=n_layers,
                 n_hid=n_hid))
        #log params
        wb_handle = wandb.init(config=awd_lstm_lm_config)
        wandb.config.update({
            'data_path': str(data_path),
            'bs': bs,
            'bptt': bptt,
            'lr': lr
        })
        self.csv_name = 'history_' + wb_handle.name
        wandb.config.update({'csvlog_save_path': self.csv_name})

        # instantiate databunch
        self.data_lm = load_data(data_path, bs=bs, bptt=bptt)

        # instantiate language model
        self.learn = language_model_learner(data=self.data_lm,
                                            arch=AWD_LSTM,
                                            pretrained=False,
                                            model_dir=Path('models_' +
                                                           wb_handle.name),
                                            config=awd_lstm_lm_config)
        self.full_model_path = str(self.learn.path / self.learn.model_dir)
        wandb.config.update({'model_save_path': self.full_model_path})

        # prepare callbacks
        escb = EarlyStoppingCallback(learn=self.learn, patience=2)
        smcb = SaveModelCallback(learn=self.learn,
                                 name='best_' + wb_handle.name)
        rpcb = ReduceLROnPlateauCallback(learn=self.learn, patience=1)
        csvcb = CSVLogger(learn=self.learn, filename=self.csv_name)
        wb = wandbCallback(self.learn)
        self.callbacks = [escb, smcb, rpcb, csvcb, wb]

        self.fit()
Пример #4
0
        1e-4,
        # max_lr=1e-4,
        # div_factor=1e2,
        # final_div=1e3,
        # pct_start=0.1,
        callbacks=[
            SaveModelCallback(learn,
                              name=f"model_best_{fold}",
                              monitor="kappa_score"),
            TerminateOnNaNCallback(),
            LoggerCallback(
                len(data.dl(DatasetType.Train)) +
                len(data.dl(DatasetType.Valid)), learn),
            logger,
            ReduceLROnPlateauCallback(learn,
                                      patience=2,
                                      factor=0.5,
                                      min_lr=1e-7),
        ],
    )

    torch.save(learn.model.state_dict(), f"{fname}_{fold}.pth")

    learn.model.eval()
    with torch.no_grad():
        for step, (x,
                   y) in progress_bar(enumerate(data.dl(DatasetType.Valid)),
                                      total=len(data.dl(DatasetType.Valid))):
            p = learn.model(*x)
            pred.append(p.float().cpu())
            target.append(y.cpu())