Пример #1
0
def train_lm(n_cycles: int = 3,
             cycle_len: int = 1,
             cycle_mult: int = 2,
             momentum: float = 0.8,
             bptt: int = 40,
             lr: float = 1e-3,
             wd: float = 1e-7):

    datasets = create_or_restore(DATA_ROOT)
    lm_data = [
        fastai_patch(ds)
        for ds in (datasets['train_unsup'], datasets['test_unsup'])
    ]
    bunch = TextLMDataBunch.create(lm_data, path=LM_PATH, bptt=bptt)

    n = sum(len(ds) for ds in lm_data)
    phases = [
        TrainingPhase(n * (cycle_len * cycle_mult**i),
                      lrs=lr,
                      moms=momentum,
                      lr_anneal=annealing_cos) for i in range(n_cycles)
    ]
    learner = RNNLearner.language_model(bunch, bptt)
    cbs = [
        EarlyStopping(learner, patience=2),
        GeneralScheduler(learner, phases),
        SaveModel(learner)
    ]

    if cycle_mult == 1:
        total_epochs = n_cycles * cycle_len
    else:
        total_epochs = int(cycle_len * (1 - cycle_mult**n_cycles) /
                           (1 - cycle_mult))

    print(f'Total number of epochs: {total_epochs:d}')
    try:
        learner.fit(total_epochs, wd=wd, callbacks=cbs)
    except RuntimeError as e:
        print(f'Model training error: {e}')
    finally:
        folder = learner.path / learner.model_dir
        print(f'Saving latest model state into {folder}')
        learner.save('lm_final')
        learner.save_encoder('lm_final_enc')
Пример #2
0
    'PersonalStories', 'PossiblyFeedback', 'SentimentNegative',
    'SentimentNeutral', 'SentimentPositive'
]

fold = str(args.fold)
cat = all_cats[args.cl]
model_id = '2019_ 4_04_20_02_39_766228'
model_id = '2019_ 4_05_02_05_04_069084'
model_id = '2019_ 4_05_11_54_23_327097'
model_id = '2019_ 4_06_18_53_42_028549'
model_id = '2019_ 4_07_20_36_23_662822'
model_id = '2019_ 4_07_20_35_19_533996'
model_id = '2019_ 4_07_23_55_08_571313'
exp_path = '/mnt/data/group07/johannes/ompc/ppexp_short4/' + cat + '_' + fold

data_lm_ft = TextLMDataBunch.load(
    Path('/mnt/data/group07/johannes/ompc/pplmexp_short4'))

if True or not Path('/mnt/data/group07/johannes/ompc/ppexp_short4/' + cat +
                    '_' + fold + '/models/enc5.pth').is_file():
    if True or not Path(
            '/mnt/data/group07/johannes/ompc/pplmexp_short/models/enc5.pth'
    ).is_file():
        print('need to save enc')
        learn_lm = language_model_learner(data_lm_ft)
        learn_lm.load(model_id)
        learn_lm.save_encoder('enc5')
        del learn_lm
    print('need to copy enc')
    os.makedirs(
        os.path.dirname('/mnt/data/group07/johannes/ompc/ppexp_short4/' + cat +
                        '_' + fold + '/models/enc5.pth'),
Пример #3
0
import news_utils.fastai
import news_utils.clean.german


# In[ ]:


parser = argparse.ArgumentParser()
parser.add_argument("--cl", type=int)
parser.add_argument("--fold", type=int)
args = parser.parse_args()

bpemb_de = BPEmb(lang="de", vs=25000, dim=300)

data_lm_ft = TextLMDataBunch.load(path='/mnt/data/group07/johannes/ompc/lmexp', cache_name='whatever')

all_cats = ['ArgumentsUsed', 'Discriminating', 'Inappropriate', 'OffTopic', 'PersonalStories', 'PossiblyFeedback', 'SentimentNegative', 'SentimentNeutral',  'SentimentPositive']

fold = str(args.fold)
cat = all_cats[args.cl]
model_id = '2019_ 4_01_00_11_32_066215' 
exp_path = '/mnt/data/group07/johannes/ompc/exp/' + cat + '_' + fold

os.makedirs(os.path.dirname('/mnt/data/group07/johannes/ompc/exp/' + cat + '_' + fold +'/models/enc5.pth'), exist_ok=True)
shutil.copy('/mnt/data/group07/johannes/ompc/lmexp/models/enc5.pth', '/mnt/data/group07/johannes/ompc/exp/' + cat + '_' + fold +'/models/enc5.pth')


def run_for_class(it=1):
    train_df = pd.read_pickle(Path('/mnt/data/group07/johannes/ompc/data_ann')/cat/fold/'train.pkl')
    test_df = pd.read_pickle(Path('/mnt/data/group07/johannes/ompc/data_ann')/cat/fold/'test.pkl')