Python EarlyStopping示例

编程语言: Python

命名空间/包名称: farm.train

类/类型: EarlyStopping

hotexamples.com的示例: 12

Python EarlyStopping - 已找到12个示例。这些是从开源项目中提取的最受好评的farm.train.EarlyStopping现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

EarlyStopping(12)

示例#1

显示文件

def train_on_split(silo_to_use, n_fold, save_dir):
    logger.info(f"############ Crossvalidation: Fold {n_fold} ############")
    # Create an AdaptiveModel
    # a) which consists of a pretrained language model as a basis
    language_model = LanguageModel.load(lang_model)
    # b) and a prediction head on top that is suited for our task => Text classification
    prediction_head = TextClassificationHead(
        layer_dims=[
            768,
            len(processor.tasks["text_classification"]["label_list"])
        ],
        class_weights=data_silo.calculate_class_weights(
            task_name="text_classification"))

    model = AdaptiveModel(language_model=language_model,
                          prediction_heads=[prediction_head],
                          embeds_dropout_prob=0.2,
                          lm_output_types=["per_sequence"],
                          device=device)

    # Create an optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=0.5e-5,
        device=device,
        n_batches=len(silo_to_use.loaders["train"]),
        n_epochs=n_epochs,
        use_amp=use_amp)

    # Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time
    # Also create an EarlyStopping instance and pass it on to the trainer

    # An early stopping instance can be used to save the model that performs best on the dev set
    # according to some metric and stop training when no improvement is happening for some iterations.
    # NOTE: Using a different save directory for each fold, allows us afterwards to use the
    # nfolds best models in an ensemble!
    save_dir += f"-{n_fold}"
    earlystopping = EarlyStopping(
        metric="f1_offense",
        mode=
        "max",  # use the metric from our own metrics function instead of loss
        save_dir=save_dir,  # where to save the best model
        patience=
        5  # number of evaluations to wait for improvement before terminating the training
    )

    trainer = Trainer(optimizer=optimizer,
                      data_silo=silo_to_use,
                      epochs=n_epochs,
                      n_gpu=n_gpu,
                      lr_schedule=lr_schedule,
                      evaluate_every=evaluate_every,
                      device=device,
                      early_stopping=earlystopping,
                      evaluator_test=False)

    # train it
    model = trainer.train(model)

    return model

示例#2

显示文件

文件： bert_style_document_classification.py 项目： e-123456/covid-19-document-classification

    def train_on_split(silo_to_use, n_fold, save_dir, dev):
        # Create an AdaptiveModel
        # a) which consists of a pretrained language model as a basis
        language_model = LanguageModel.load(lang_model)
        # b) and a prediction head on top that is suited for our task => Text classification
        prediction_head = MultiLabelTextClassificationHead(
            # there is still an error with class weights ...
            # class_weights=data_silo.calculate_class_weights(task_name="text_classification"),
            num_labels=len(label_list))

        model = AdaptiveModel(
            language_model=language_model,
            prediction_heads=[prediction_head],
            embeds_dropout_prob=0.2,
            lm_output_types=["per_sequence"],
            device=dev)

        # Create an optimizer
        model, optimizer, lr_schedule = initialize_optimizer(
            model=model,
            learning_rate=0.5e-5,
            device=dev,
            n_batches=len(silo_to_use.loaders["train"]),
            n_epochs=n_epochs)

        # Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time
        # Also create an EarlyStopping instance and pass it on to the trainer
        save_dir = Path(str(save_dir) + f"-{n_fold}")
        # unfortunately, early stopping is still not working
        earlystopping = EarlyStopping(
            metric="f1_macro", mode="max",
            save_dir=save_dir,  # where to save the best model
            patience=5 # number of evaluations to wait for improvement before terminating the training
        )

        trainer = Trainer(model=model, optimizer=optimizer,
                          data_silo=silo_to_use, epochs=n_epochs,
                          n_gpu=n_gpu, lr_schedule=lr_schedule,
                          evaluate_every=evaluate_every,
                          device=dev, evaluator_test=False,
                          #early_stopping=earlystopping)
                          )
        # train it
        trainer.train()
        trainer.model.save(save_dir)
        return trainer.model

示例#3

显示文件

文件： doc_classification.py 项目： swajahataziz/clinical-outcome-prediction

def doc_classification(
    task_config,
    model_name_or_path,
    cache_dir,
    data_dir,
    save_dir,
    model_dir,
    run_name="0",
    lr=1e-05,
    warmup_steps=5000,
    balance_classes=True,
    embeds_dropout=0.1,
    epochs=200,  # large because we use early stopping by default
    batch_size=20,
    grad_acc_steps=1,
    early_stopping_metric="roc_auc",
    early_stopping_mode="max",
    early_stopping_patience=10,
    model_class="Bert",
    tokenizer_class="BertTokenizer",
    do_lower_case=False,
    do_train=True,
    do_eval=True,
    do_hpo=False,
    print_preds=False,
    print_dev_preds=False,
    max_seq_len=512,
    seed=11,
    eval_every=500,
    use_amp=False,
    use_cuda=True,
):
    # Load task config
    task_config = yaml.safe_load(open(task_config))

    data_dir = data_dir
    save_dir = save_dir
    model_dir = model_dir

    # Create label list from args list or (for large label lists) create from file by splitting by space
    if isinstance(task_config["data"]["label_list"], list):
        label_list = task_config["data"]["label_list"]
    else:
        with open(data_dir / 'labels' /
                  task_config["data"]["label_list"]) as code_file:
            label_list = code_file.read().split(" ")

    # Register Outcome Metrics
    register_task_metrics(label_list)

    # General Settings
    set_all_seeds(seed=seed)
    device, n_gpu = initialize_device_settings(use_cuda=use_cuda,
                                               use_amp=use_amp)

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(
        pretrained_model_name_or_path=model_name_or_path,
        tokenizer_class=tokenizer_class,
        do_lower_case=do_lower_case)

    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    processor = TextClassificationProcessor(
        tokenizer=tokenizer,
        max_seq_len=max_seq_len,
        data_dir=data_dir,
        label_list=label_list,
        metric=task_config["metric"],
        multilabel=task_config["multilabel"],
        train_filename=task_config["data"]["train_filename"],
        dev_filename=task_config["data"]["dev_filename"],
        dev_split=task_config["data"]["dev_split"]
        if "dev_split" in task_config["data"] else None,
        test_filename=task_config["data"]["test_filename"],
        delimiter=task_config["data"]["parsing"]["delimiter"],
        quote_char=task_config["data"]["parsing"]["quote_char"],
        label_column_name=task_config["data"]["parsing"]["label_column"])

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a
    #    few descriptive statistics of our datasets
    data_silo = DataSilo(processor=processor,
                         caching=True,
                         cache_path=Path(cache_dir),
                         batch_size=batch_size)

    if do_train:

        # Setup MLFlow logger
        ml_logger = MLFlowLogger(tracking_uri=task_config["log_dir"])
        ml_logger.init_experiment(
            experiment_name=task_config["experiment_name"],
            run_name=f'{task_config["experiment_name"]}_{run_name}')

        # 4. Create an AdaptiveModel
        # a) which consists of a pretrained language model as a basis
        language_model = LanguageModel.load(model_name_or_path,
                                            language_model_class=model_class)

        # b) and a prediction head on top that is suited for our task

        # Define class weights
        if balance_classes:
            class_weights = data_silo.calculate_class_weights(
                task_name=task_config["task_type"])
        else:
            class_weights = None

        # Create Multi- or Single-Label Classification Heads
        if task_config["multilabel"]:

            prediction_head = MultiLabelTextClassificationHead(
                class_weights=class_weights, num_labels=len(label_list))

        else:
            prediction_head = ExtendedTextClassificationHead(
                class_weights=class_weights, num_labels=len(label_list))

        model = ExtendedAdaptiveModel(
            language_model=language_model,
            prediction_heads=[prediction_head],
            embeds_dropout_prob=embeds_dropout,
            lm_output_types=[task_config["output_type"]],
            device=device)

        # 5. Create an optimizer
        schedule_opts = {
            "name": "LinearWarmup",
            "num_warmup_steps": warmup_steps
        }

        model, optimizer, lr_schedule = initialize_optimizer(
            model=model,
            learning_rate=lr,
            device=device,
            n_batches=len(data_silo.loaders["train"]),
            n_epochs=epochs,
            use_amp=use_amp,
            grad_acc_steps=grad_acc_steps,
            schedule_opts=schedule_opts)

        # 6. Create an early stopping instance
        early_stopping = None
        if early_stopping_mode != "none":
            early_stopping = EarlyStopping(mode=early_stopping_mode,
                                           min_delta=0.0001,
                                           save_dir=model_dir,
                                           metric=early_stopping_metric,
                                           patience=early_stopping_patience)

        # 7. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it
        # from time to time

        trainer = ExtendedTrainer(model=model,
                                  optimizer=optimizer,
                                  data_silo=data_silo,
                                  epochs=epochs,
                                  n_gpu=n_gpu,
                                  lr_schedule=lr_schedule,
                                  evaluate_every=eval_every,
                                  early_stopping=early_stopping,
                                  device=device,
                                  grad_acc_steps=grad_acc_steps,
                                  evaluator_test=do_eval)

        def score_callback(eval_score, train_loss):
            tune.report(roc_auc_dev=eval_score, train_loss=train_loss)

        # 8. Train the model
        trainer.train(score_callback=score_callback if do_hpo else None)

        # 9. Save model if not saved in early stopping
        model.save(model_dir + "/final_model")
        processor.save(model_dir + "/final_model")

    if do_eval:
        # Load newly trained model or existing model
        if do_train:
            model_dir = model_dir
        else:
            model_dir = Path(model_name_or_path)

        logger.info("###### Eval on TEST SET #####")

        evaluator_test = ExtendedEvaluator(
            data_loader=data_silo.get_data_loader("test"),
            tasks=data_silo.processor.tasks,
            device=device)

        # Load trained model for evaluation
        model = ExtendedAdaptiveModel.load(model_dir, device)
        model.connect_heads_with_processor(data_silo.processor.tasks,
                                           require_labels=True)

        # Evaluate
        results = evaluator_test.eval(model, return_preds_and_labels=True)

        # Log results
        utils.log_results(results,
                          dataset_name="test",
                          steps=len(evaluator_test.data_loader),
                          save_path=model_dir + "/eval_results.txt")

        if print_preds:
            # Print model test predictions
            utils.save_predictions(results,
                                   save_dir=model_dir,
                                   multilabel=task_config["multilabel"])

        if print_dev_preds:
            # Evaluate on dev set, e.g. for threshold tuning
            evaluator_dev = Evaluator(
                data_loader=data_silo.get_data_loader("dev"),
                tasks=data_silo.processor.tasks,
                device=device)
            dev_results = evaluator_dev.eval(model,
                                             return_preds_and_labels=True)
            utils.log_results(dev_results,
                              dataset_name="dev",
                              steps=len(evaluator_dev.data_loader),
                              save_path=model_dir + "/eval_dev_results.txt")

            # Print model dev predictions
            utils.save_predictions(dev_results,
                                   save_dir=model_dir,
                                   multilabel=task_config["multilabel"],
                                   dataset_name="dev")

示例#4

显示文件

文件： finetune_sentence_level.py 项目： gsarti/interpreting-complexity

def train_on_split(args, silo, processor, fold=None):
    if args.folds > 1:
        args.logger.info(
            f"############ Crossvalidation: Fold {fold} ############")
    language_model = CustomLanguageModel.load(
        args.model_name, language_model_class=args.model_class_name)
    if args.prediction_layer > 0:
        language_model.enable_hidden_states_output()
    prediction_heads, out_types = load_prediction_heads(args, silo)
    # Sum all by default
    loss_fct = None if args.task_weights is None else compute_weighted_loss(
        args.task_weights, args.label_columns)
    # Create an AdaptiveModel = LM + prediction head(s)
    model = CustomAdaptiveModel(
        language_model=language_model,
        prediction_heads=prediction_heads,
        embeds_dropout_prob=args.embed_dropout_prob,
        lm_output_types=out_types,
        device=args.device,
        loss_aggregation_fn=loss_fct,
        head_feats=args.do_feat_embeds,
        freeze_model=args.freeze_model,
        custom_pooling_strategy=args.pooling_strategy,
        prediction_layer=args.prediction_layer,
    )
    # Create an optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=args.learning_rate,
        n_batches=len(silo.loaders["train"]),
        n_epochs=args.num_train_epochs,
        device=args.device,
        grad_acc_steps=args.grad_acc_steps,
    )
    # Setup early stopping
    earlystopping = None
    fold_save_dir = f"{args.save_dir}_{fold}" if args.folds > 1 else args.save_dir
    if args.patience is not None:
        earlystopping = EarlyStopping(metric="loss",
                                      mode="min",
                                      save_dir=fold_save_dir,
                                      patience=args.patience)
    # Feed everything to the trainer
    trainer = MultitaskTrainer(
        model=model,
        optimizer=optimizer,
        data_silo=silo,
        epochs=args.num_train_epochs,
        n_gpu=args.n_gpu,
        device=args.device,
        lr_schedule=lr_schedule,
        evaluate_every=args.evaluate_every,
        early_stopping=earlystopping,
        evaluator_test=False,
        eval_report=False,
    )
    # Let it grow
    trainer.train()
    if args.patience is None:
        # Store the model, only if it wasn't already saved by early stopping
        model.save(fold_save_dir)
        processor.save(fold_save_dir)
    return trainer.model

示例#5

显示文件

文件： doc_classification_with_earlystopping.py 项目： paweldobrzynski/FARM

def doc_classification_with_earlystopping():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO)

    ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
    # for local logging instead:
    # ml_logger = MLFlowLogger(tracking_uri="logs")
    ml_logger.init_experiment(experiment_name="Public_FARM",
                              run_name="DocClassification_ES_f1_1")

    ##########################
    ########## Settings
    ##########################
    set_all_seeds(seed=42)
    use_amp = None
    device, n_gpu = initialize_device_settings(use_cuda=True)
    n_epochs = 20
    batch_size = 32
    evaluate_every = 100
    lang_model = "bert-base-german-cased"
    do_lower_case = False

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)

    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    # Here we load GermEval 2018 Data automaticaly if it is not available.
    # GermEval 2018 only has train.tsv and test.tsv dataset - no dev.tsv

    # The processor wants to know the possible labels ...
    label_list = ["OTHER", "OFFENSE"]

    # The evaluation on the dev-set can be done with one of the predefined metrics or with a
    # metric defined as a function from (preds, labels) to a dict that contains all the actual
    # metrics values. The function must get registered under a string name and the string name must
    # be used.
    def mymetrics(preds, labels):
        acc = simple_accuracy(preds, labels)
        f1other = f1_score(y_true=labels, y_pred=preds, pos_label="OTHER")
        f1offense = f1_score(y_true=labels, y_pred=preds, pos_label="OFFENSE")
        f1macro = f1_score(y_true=labels, y_pred=preds, average="macro")
        f1micro = f1_score(y_true=labels, y_pred=preds, average="macro")
        return {
            "acc": acc,
            "f1_other": f1other,
            "f1_offense": f1offense,
            "f1_macro": f1macro,
            "f1_micro": f1micro
        }

    register_metrics('mymetrics', mymetrics)
    metric = 'mymetrics'

    processor = TextClassificationProcessor(
        tokenizer=tokenizer,
        max_seq_len=64,
        data_dir=Path("../data/germeval18"),
        label_list=label_list,
        metric=metric,
        label_column_name="coarse_label")

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    data_silo = DataSilo(processor=processor, batch_size=batch_size)

    # 4. Create an AdaptiveModel
    # a) which consists of a pretrained language model as a basis
    language_model = LanguageModel.load(lang_model)
    # b) and a prediction head on top that is suited for our task => Text classification
    prediction_head = TextClassificationHead(
        num_labels=len(label_list),
        class_weights=data_silo.calculate_class_weights(
            task_name="text_classification"))

    model = AdaptiveModel(language_model=language_model,
                          prediction_heads=[prediction_head],
                          embeds_dropout_prob=0.2,
                          lm_output_types=["per_sequence"],
                          device=device)

    # 5. Create an optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=0.5e-5,
        device=device,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        use_amp=use_amp)

    # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time
    # Also create an EarlyStopping instance and pass it on to the trainer

    # An early stopping instance can be used to save the model that performs best on the dev set
    # according to some metric and stop training when no improvement is happening for some iterations.
    earlystopping = EarlyStopping(
        metric="f1_offense",
        mode=
        "max",  # use the metric from our own metrics function instead of loss
        # metric="f1_macro", mode="max",  # use f1_macro from the dev evaluator of the trainer
        # metric="loss", mode="min",   # use loss from the dev evaluator of the trainer
        save_dir=Path("saved_models/bert-german-doc-tutorial-es"
                      ),  # where to save the best model
        patience=
        5  # number of evaluations to wait for improvement before terminating the training
    )

    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      data_silo=data_silo,
                      epochs=n_epochs,
                      n_gpu=n_gpu,
                      lr_schedule=lr_schedule,
                      evaluate_every=evaluate_every,
                      device=device,
                      early_stopping=earlystopping)

    # 7. Let it grow
    trainer.train()

    # 8. Hooray! You have a model.
    # NOTE: if early stopping is used, the best model has been stored already in the directory
    # defined with the EarlyStopping instance
    # The model we have at this moment is the model from the last training epoch that was carried
    # out before early stopping terminated the training
    save_dir = Path("saved_models/bert-german-doc-tutorial")
    model.save(save_dir)
    processor.save(save_dir)

    # 9. Load it & harvest your fruits (Inference)
    basic_texts = [
        {
            "text":
            "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot sei"
        },
        {
            "text": "Martin Müller spielt Handball in Berlin"
        },
    ]

    # Load from the final epoch directory and apply
    print("LOADING INFERENCER FROM FINAL MODEL DURING TRAINING")
    model = Inferencer.load(save_dir)
    result = model.inference_from_dicts(dicts=basic_texts)
    print(result)
    model.close_multiprocessing_pool()

    # Load from saved best model
    print("LOADING INFERENCER FROM BEST MODEL DURING TRAINING")
    model = Inferencer.load(earlystopping.save_dir)
    result = model.inference_from_dicts(dicts=basic_texts)
    print("APPLICATION ON BEST MODEL")
    print(result)
    model.close_multiprocessing_pool()

示例#6

显示文件

def doc_classifcation():
    device, n_gpu = initialize_device_settings(use_cuda=True, use_amp=use_amp)

    tokenizer = AutoTokenizer.from_pretrained(lang_model, strip_accents=False)
    #tokenizer = Tokenizer.load(
    #    pretrained_model_name_or_path=lang_model,
    #    do_lower_case=do_lower_case)

    processor = TextClassificationProcessor(tokenizer=tokenizer,
                                            max_seq_len=128,
                                            data_dir=Path("./data/germeval18"),
                                            label_list=label_list,
                                            metric=metric,
                                            dev_filename="test.tsv",  # we want to evaluate against test
                                            label_column_name="coarse_label",
                                            )

    data_silo = DataSilo(
        processor=processor,
        batch_size=batch_size)

    language_model = LanguageModel.load(lang_model)
    prediction_head = TextClassificationHead(
        class_weights=data_silo.calculate_class_weights(task_name="text_classification"),
        num_labels=len(label_list))

    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_sequence"],
        device=device)

    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=3e-5,
        device=device,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        use_amp=use_amp)

    earlystopping = EarlyStopping(
        metric=metric, mode="max",
        #save_dir=Path("./saved_models"),
        patience=3
    )

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=n_epochs,
        n_gpu=n_gpu,
        lr_schedule=lr_schedule,
        evaluate_every=evaluate_every,
        early_stopping=earlystopping,
        device=device)

    trainer.train()

    return earlystopping.best_so_far

示例#7

显示文件

文件： classification.py 项目： cw18-coder/verseagility

def doc_classification(task,
                       model_type,
                       n_epochs,
                       batch_size,
                       embeds_dropout,
                       evaluate_every,
                       use_cuda,
                       max_seq_len,
                       learning_rate,
                       do_lower_case,
                       register_model,
                       save_model=True,
                       early_stopping=False):

    language = cu.params.get('language')

    # Check task
    if cu.tasks.get(str(task)).get('type') != 'classification':
        raise Exception('NOT A CLASSIFICATION TASK')

    # Data
    dt_task = dt.Data(task=task)
    ## Download training files
    if not os.path.isfile(dt_task.get_path('fn_train', dir='data_dir')):
        dt_task.download('data_dir', dir='data_dir', source='datastore')

    # Settings
    set_all_seeds(seed=42)
    use_amp = None
    device, n_gpu = initialize_device_settings(use_cuda=use_cuda,
                                               use_amp=use_amp)
    lang_model = he.get_farm_model(model_type, language)
    save_dir = dt_task.get_path('model_dir')
    label_list = dt_task.load('fn_label', dir='data_dir',
                              header=None)[0].to_list()

    # AML log
    try:
        aml_run.log('task', task)
        aml_run.log('language', language)
        aml_run.log('n_epochs', n_epochs)
        aml_run.log('batch_size', batch_size)
        aml_run.log('learning_rate', learning_rate)
        aml_run.log('embeds_dropout', embeds_dropout)
        aml_run.log('max_seq_len', max_seq_len)
        aml_run.log('lang_model', lang_model)
        aml_run.log_list('label_list', label_list)
    except:
        pass

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)

    # The evaluation on the dev-set can be done with one of the predefined metrics or with a
    # metric defined as a function from (preds, labels) to a dict that contains all the actual
    # metrics values. The function must get registered under a string name and the string name must
    # be used.
    def mymetrics(preds, labels):
        acc = simple_accuracy(preds, labels)
        f1macro = f1_score(y_true=labels, y_pred=preds, average="macro")
        f1micro = f1_score(y_true=labels, y_pred=preds, average="micro")
        # AML log
        try:
            aml_run.log('acc', acc.get('acc'))
            aml_run.log('f1macro', f1macro)
            aml_run.log('f1micro', f1micro)
        except:
            pass
        return {"acc": acc, "f1_macro": f1macro, "f1_micro": f1micro}

    register_metrics('mymetrics', mymetrics)
    metric = 'mymetrics'

    processor = TextClassificationProcessor(
        tokenizer=tokenizer,
        max_seq_len=max_seq_len,
        data_dir=dt_task.data_dir,
        label_list=label_list,
        metric=metric,
        label_column_name="label",
        train_filename=dt_task.get_path('fn_train', dir='data_dir'),
        test_filename=dt_task.get_path('fn_test', dir='data_dir'))

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    data_silo = DataSilo(processor=processor, batch_size=batch_size)

    # 4. Create an AdaptiveModel
    ## Pretrained language model as a basis
    language_model = LanguageModel.load(lang_model)

    ## Prediction head on top that is suited for our task => Text classification
    prediction_head = TextClassificationHead(
        num_labels=len(processor.tasks["text_classification"]["label_list"]),
        class_weights=data_silo.calculate_class_weights(
            task_name="text_classification"))

    model = AdaptiveModel(language_model=language_model,
                          prediction_heads=[prediction_head],
                          embeds_dropout_prob=embeds_dropout,
                          lm_output_types=["per_sequence"],
                          device=device)

    # 5. Create an optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        device=device,
        learning_rate=learning_rate,
        use_amp=use_amp)

    # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time
    # Also create an EarlyStopping instance and pass it on to the trainer

    # An early stopping instance can be used to save the model that performs best on the dev set
    # according to some metric and stop training when no improvement is happening for some iterations.
    if early_stopping:
        earlystopping = EarlyStopping(
            metric="f1_macro",
            mode="max",  # use f1_macro from the dev evaluator of the trainer
            # metric="loss", mode="min",   # use loss from the dev evaluator of the trainer
            save_dir=save_dir,  # where to save the best model
            patience=
            2  # number of evaluations to wait for improvement before terminating the training
        )
    else:
        earlystopping = None

    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      data_silo=data_silo,
                      epochs=n_epochs,
                      n_gpu=n_gpu,
                      lr_schedule=lr_schedule,
                      evaluate_every=evaluate_every,
                      device=device,
                      early_stopping=earlystopping)

    # 7. Let it grow
    trainer.train()

    # 8. Store it:
    # NOTE: if early stopping is used, the best model has been stored already in the directory
    # defined with the EarlyStopping instance
    # The model we have at this moment is the model from the last training epoch that was carried
    # out before early stopping terminated the training
    if save_model:
        model.save(save_dir)
        processor.save(save_dir)

        if register_model:
            dt_task.upload('model_dir', destination='model')

示例#8

显示文件

def main(args):
    print(f"[INFO] PyTorch Version: {torch.__version__}")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("[INFO] Devices available: {}".format(device))
    checkpoint_path = Path(args.ckpt_path) / args.run_name
    ml_logger = MLFlowLogger(tracking_uri=args.tracking_uri)
    ml_logger.init_experiment(experiment_name=args.experiment_name,
                              run_name=args.run_name)
    tokenizer = Tokenizer.load(
        pretrained_model_name_or_path=args.pretrained_model_name_or_path,
        do_lower_case=False)
    # Processor
    if args.task_name == "text_classification":
        processor = TextClassificationProcessor(
            tokenizer=tokenizer,
            train_filename=args.train_filename,
            dev_filename=None,
            test_filename=args.test_filename,
            header=0,
            max_seq_len=args.max_seq_len,
            data_dir=args.data_dir,
            label_list=args.label_list,
            metric=args.metric,
            label_column_name=args.label_column_name,
            text_column_name=args.text_column_name)
    elif args.task_name == "question_answering":
        processor = SquadProcessor(tokenizer=tokenizer,
                                   train_filename=args.train_filename,
                                   dev_filename=args.test_filename,
                                   test_filename=args.test_filename,
                                   max_seq_len=args.max_seq_len,
                                   data_dir=args.data_dir,
                                   label_list=args.label_list,
                                   metric=args.metric,
                                   max_query_length=64,
                                   doc_stride=128,
                                   max_answers=1)
    else:
        raise ValueError("task name error")
    processor.save(checkpoint_path)

    # DataSilo
    data_silo = DataSilo(processor=processor,
                         batch_size=args.batch_size,
                         eval_batch_size=args.eval_batch_size,
                         caching=True,
                         cache_path=checkpoint_path)
    # LanguageModel: Build pretrained language model
    language_model = LanguageModel.load(args.pretrained_model_name_or_path,
                                        language="korean")

    # PredictionHead: Build predictor layer
    if args.task_name == "text_classification":
        # If you do classification on imbalanced classes, consider using class weights.
        # They change the loss function to down-weight frequent classes.
        prediction_head = TextClassificationHead(
            num_labels=len(args.label_list),
            class_weights=data_silo.calculate_class_weights(
                task_name=args.task_name))
    elif args.task_name == "question_answering":
        prediction_head = QuestionAnsweringHead(
            layer_dims=[768, 2],
            task_name=args.task_name,
        )
    else:
        raise ValueError("task name error")

    # AdaptiveModel: Combine all
    if args.task_name == "text_classification":
        lm_output_types = ["per_sequence"]
    elif args.task_name == "question_answering":
        lm_output_types = ["per_token"]
    else:
        raise ValueError("task name error")

    model = AdaptiveModel(language_model=language_model,
                          prediction_heads=[prediction_head],
                          embeds_dropout_prob=args.embeds_dropout_prob,
                          lm_output_types=lm_output_types,
                          device=device)

    # Initialize Optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        device=device,
        learning_rate=args.learning_rate,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=args.n_epochs)
    # EarlyStopping
    earlymetric = "f1" if args.task_name == "question_answering" else "acc"
    mode = "max" if args.task_name in [
        "text_classification", "question_answering"
    ] else "min"
    earlystop = EarlyStopping(save_dir=checkpoint_path,
                              metric=earlymetric,
                              mode=mode,
                              patience=5)

    # Trainer
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        lr_schedule=lr_schedule,
        data_silo=data_silo,
        early_stopping=earlystop,
        evaluate_every=args.evaluate_every,
        checkpoints_to_keep=args.checkpoints_to_keep,
        checkpoint_root_dir=checkpoint_path,
        checkpoint_every=args.checkpoint_every,
        epochs=args.n_epochs,
        n_gpu=args.n_gpu,
        device=device,
    )
    # now train!
    model = trainer.train()

示例#9

显示文件

def text_pair_classification():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO)

    ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
    ml_logger.init_experiment(experiment_name="Public_FARM",
                              run_name="Run_text_pair_classification")

    ##########################
    ########## Settings ######
    ##########################
    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=True)
    n_epochs = 2
    batch_size = 64
    evaluate_every = 500
    lang_model = "bert-base-cased"
    label_list = ["0", "1"]

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=False)

    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    #    We do not have a sample dataset for regression yet, add your own dataset to run the example
    processor = TextPairClassificationProcessor(
        tokenizer=tokenizer,
        label_list=label_list,
        metric="acc",
        label_column_name="label",
        max_seq_len=64,
        train_filename=training_filename,
        dev_filename=test_filename,
        test_filename=test_filename,
        data_dir=Path("../data"),
        tasks={"text_classification"},
        delimiter="\t")

    # train_filename = training_filename,
    # test_filename = test_filename,
    # dev_filename = test_filename,
    # dev_split = 0.5,

    # data_dir=Path("../data/asnq_binary"),

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    data_silo = DataSilo(processor=processor, batch_size=batch_size)
    # Alte Version vor StreamingDataSilo
    # data_silo = DataSilo(
    #    processor=processor,
    #    batch_size=batch_size, max_processes=4)

    # 4. Create an AdaptiveModel
    # a) which consists of a pretrained language model as a basis
    language_model = LanguageModel.load(lang_model)
    # b) and a prediction head on top that is suited for our task
    prediction_head = TextClassificationHead(
        num_labels=len(label_list),
        class_weights=data_silo.calculate_class_weights(
            task_name="text_classification"))

    model = AdaptiveModel(language_model=language_model,
                          prediction_heads=[prediction_head],
                          embeds_dropout_prob=0.1,
                          lm_output_types=["per_sequence_continuous"],
                          device=device)

    # 5. Create an optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=5e-6,
        device=device,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs)

    now = datetime.now()  # current date and time

    # An early stopping instance can be used to save the model that performs best on the dev set
    # according to some metric and stop training when no improvement is happening for some iterations.
    earlystopping = EarlyStopping(
        #metric="f1_weighted", mode="max",  # use f1_macro from the dev evaluator of the trainer
        metric="loss",
        mode="min",  # use loss from the dev evaluator of the trainer
        save_dir=Path(
            "saved_models/earlystopping/" +
            now.strftime("%m%d%Y%H%M%S")),  # where to save the best model
        patience=
        8  # number of evaluations to wait for improvement before terminating the training
    )

    # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time
    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      data_silo=data_silo,
                      epochs=n_epochs,
                      n_gpu=n_gpu,
                      lr_schedule=lr_schedule,
                      evaluate_every=evaluate_every,
                      device=device,
                      early_stopping=earlystopping)
    # model=model,
    # optimizer=optimizer,
    # data_silo=data_silo,
    # epochs=n_epochs,
    # n_gpu=n_gpu,
    # lr_schedule=lr_schedule,
    # evaluate_every=evaluate_every,
    # device=device)

    # 7. Let it grow
    #comment if going to use a stored model
    trainer.train()

    # 8. Hooray! You have a model. Store it:
    # When a new model is being trained and need to be saved
    save_dir = Path("saved_models/text_pair_classification_model" +
                    now.strftime("%m%d%Y%H%M%S"))
    model.save(save_dir)
    processor.save(save_dir)

    # When only a model needs to be loaded change the details to load the needed model
    # save_dir = Path("saved_models/text_pair_classification_model" + "01272021103548")

    # 9. Load it & harvest your fruits (Inference)
    #    Add your own text adapted to the dataset you provide
    basic_texts = [
        {
            "text":
            "<claim-text>The method of claim 10, wherein the indium metal layer is 10 nm to 100 µm thick.</claim-text>",
            "text_b":
            "<p id="
            "p0001"
            " num="
            "0001"
            ">The present invention is directed to metal plating compositions and methods. More specifically, the present invention is directed to metal plating compositions and methods which provide improved leveling and throwing power.</p <p id="
            "p0039"
            " num="
            "0039"
            ">One or more conventional surfactants may be used. Typically, surfactants include, but are not limited to, nonionic surfactants such as alkyl phenoxy polyethoxyethanols. Other suitable surfactants containing multiple oxyethylene groups also may be used. Such surfactants include compounds of polyoxyethylene polymers having from as many as 20 to 150 repeating units. Such compounds also may perform as suppressors. Also included in the class of polymers are both block and random copolymers of polyoxyethylene (EO) and polyoxypropylene (PO). Surfactants may be added in conventional amounts, such as from 0.05 g/L to 20 g/L or such as from 0.5 g/L to 5 g/L.</p <p id="
            "p0040"
            " num="
            "0040"
            ">Conventional levelers include, but are not limited to, one or more of alkylated polyalkyleneimines and organic sulfo sulfonates. Examples of such compounds include, 4-mercaptopyridine, 2-mercaptothiazoline, ethylene thiourea, thiourea, 1-(2-hydroxyethyl)-2-imidazolidinethion (HIT) and alkylated polyalkyleneimines. Such levelers are included in conventional amounts. Typically, such levelers are included in amounts of 1ppb to 1 g/L, or such as from 10ppb to 500ppm.</p <p id="
            "p0042"
            " num="
            "0042"
            ">Alkali metal salts which may be included in the plating compositions include, but are not limited to, sodium and potassium salts of halogens, such as chloride, fluoride and bromide. Typically chloride is used. Such alkali metal salts are used in conventional amounts.</p <p id="
            "p0053"
            " num="
            "0053"
            ">The metal plating compositions may be used to plate a metal or metal alloy on a substrate by any method known in the art and literature. Typically, the metal or metal alloy is electroplated using conventional electroplating processes with conventional apparatus. A soluble or insoluble anode may be used with the electroplating compositions.</p <p id="
            "p0022"
            " num="
            "0022"
            ">One or more sources of metal ions are included in metal plating compositions to plate metals. The one or more sources of metal ions provide metal ions which include, but are not limited to, copper, tin, nickel, gold, silver, palladium, platinum and indium. Alloys include, but are not limited to, binary and ternary alloys of the foregoing metals. Typically, metals chosen from copper, tin, nickel, gold, silver or indium are plated with the metal plating compositions. More typically, metals chosen from copper, tin, silver or indium are plated. Most typically, copper is plated.</p <p id="
            "p0030"
            " num="
            "0030"
            ">Indium salts which may be used include, but are not limited to, one or more of indium salts of alkane sulfonic acids and aromatic sulfonic acids, such as methanesulfonic acid, ethanesulfonic acid, butane sulfonic acid, benzenesulfonic acid and toluenesulfonic acid, salts of sulfamic acid, sulfate salts, chloride and bromide salts of indium, nitrate salts, hydroxide salts, indium oxides, fluoroborate salts, indium salts of carboxylic acids, such as citric acid, acetoacetic acid, glyoxylic acid, pyruvic acid, glycolic acid, malonic acid, hydroxamic acid, iminodiacetic acid, salicylic acid, glyceric acid, succinic acid, malic acid, tartaric acid, hydroxybutyric acid, indium salts of amino acids, such as arginine, aspartic acid, asparagine, glutamic acid, glycine, glutamine, leucine, lysine, threonine, isoleucine, and valine.</p"
        },
        {
            "text":
            "<claim-text>A toner comprising: <claim-text>toner base particles; and</claim-text> <claim-text>an external additive,</claim-text> <claim-text>the toner base particles each comprising a binder resin and a colorant,</claim-text> <claim-text>wherein the external additive comprises coalesced particles,</claim-text> <claim-text>wherein the coalesced particles are each a non-spherical secondary particle in which primary particles are coalesced together, and</claim-text> <claim-text>wherein an index of a particle size distribution of the coalesced particles is expressed by the following Formula (1): <maths id="
            "math0004"
            " num="
            "(formula (1)"
            "><math display="
            "block"
            "><mfrac><msub><mi>Db</mi><mn>50</mn></msub><msub><mi>Db</mi><mn>10</mn></msub></mfrac><mo>≦</mo><mn>1.20</mn></math><img id="
            "ib0008"
            " file="
            "imgb0008.tif"
            " wi="
            "93"
            " he="
            "21"
            " img-content="
            "math"
            " img-format="
            "tif"
            "/></maths><br/> where, in a distribution diagram in which particle diameters in nm of the coalesced particles are on a horizontal axis and cumulative percentages in % by number of the coalesced particles are on a vertical axis and in which the coalesced particles are accumulated from the coalesced particles having smaller particle diameters to the coalesced particles having larger particle diameters, Db<sub>50</sub> denotes a particle diameter of the coalesced particle at which the cumulative percentage is 50% by number, and Db<sub>10</sub> denotes a particle diameter of the coalesced particle at which the cumulative percentage is 10% by number.</claim-text></claim-text>",
            "text_b":
            "<p id="
            "p0177"
            " num="
            "0177"
            ">For a similar reason, it is preferred that the electroconductive fine powder has a volume-average particle size of 0.5 - 5 µm, more preferably 0.8 - 5 µm, further preferably 1.1 - 5 µm and has a particle size distribution such that particles of 0.5 µm or smaller occupy at most 70 % by volume and particles of 5.0 µm or larger occupy at most 5 % by number.</p <p id="
            "p0189"
            " num="
            "0189"
            ">The volume-average particle size and particle size distribution of the electroconductive fine powder described herein are based on values measured in the following manner. A laser diffraction-type particle size distribution measurement apparatus ("
            "Model LS-230"
            ", available from Coulter Electronics Inc.) is equipped with a liquid module, and the measurement is performed in a particle size range of 0.04 - 2000 µm to obtain a volume-basis particle size distribution. For the measurement, a minor amount of surfactant is added to 10 cc of pure water and 10 mg of a sample electroconductive fine powder is added thereto, followed by 10 min. of dispersion by means of an ultrasonic disperser (ultrasonic homogenizer) to obtain a sample dispersion liquid, which is subjected to a single time of measurement for 90 sec.</p <p id="
            "p0191"
            " num="
            "0191"
            ">In the case where the electroconductive fine powder is composed of agglomerate particles, the particle size of the electroconductive fine powder is determined as the particle size of the agglomerate. The electroconductive fine powder in the form of agglomerated secondary particles can be used as well as that in the form of primary particles. Regardless of its agglomerated form, the electroconductive fine powder can exhibit its desired function of charging promotion by presence in the form of the agglomerate in the charging section at the contact position<!-- EPO <DP n="
            "85"
            "> --> between the charging member and the image-bearing member or in a region in proximity thereto.</p"
        },
    ]

    model = Inferencer.load(save_dir)
    result = model.inference_from_dicts(dicts=basic_texts)

    print(result)
    model.close_multiprocessing_pool()

示例#10

显示文件

文件： doc_classification_with_earlystopping.py 项目： svmihar/FARM

    model=model,
    learning_rate=0.5e-5,
    device=device,
    n_batches=len(data_silo.loaders["train"]),
    n_epochs=n_epochs,
    use_amp=use_amp)

# 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time
# Also create an EarlyStopping instance and pass it on to the trainer

# An early stopping instance can be used to save the model that performs best on the dev set
# according to some metric and stop training when no improvement is happening for some iterations.
earlystopping = EarlyStopping(
    metric="f1_offense", mode="max",   # use the metric from our own metrics function instead of loss
    # metric="f1_macro", mode="max",  # use f1_macro from the dev evaluator of the trainer
    # metric="loss", mode="min",   # use loss from the dev evaluator of the trainer
    save_dir="saved_models/bert-german-doc-tutorial-es",  # where to save the best model
    patience=5    # number of evaluations to wait for improvement before terminating the training
)

trainer = Trainer(
    optimizer=optimizer,
    data_silo=data_silo,
    epochs=n_epochs,
    n_gpu=n_gpu,
    lr_schedule=lr_schedule,
    evaluate_every=evaluate_every,
    device=device,
    early_stopping=earlystopping)

# 7. Let it grow

示例#11

显示文件

文件： experiment.py 项目： yon606/FARM

def run_experiment(args):
    logger.info("\n***********************************************"
                f"\n************* Experiment: {args.task.name} ************"
                "\n************************************************")
    ml_logger = MlLogger(tracking_uri=args.logging.mlflow_url)
    ml_logger.init_experiment(
        experiment_name=args.logging.mlflow_experiment,
        run_name=args.logging.mlflow_run_name,
        nested=args.logging.mlflow_nested,
    )

    validate_args(args)
    distributed = bool(args.general.local_rank != -1)

    # Init device and distributed settings
    device, n_gpu = initialize_device_settings(
        use_cuda=args.general.cuda,
        local_rank=args.general.local_rank,
        use_amp=args.general.use_amp,
    )

    args.parameter.batch_size = int(args.parameter.batch_size //
                                    args.parameter.gradient_accumulation_steps)

    set_all_seeds(args.general.seed)

    # Prepare Data
    tokenizer = Tokenizer.load(args.parameter.model,
                               do_lower_case=args.parameter.lower_case)

    processor = Processor.load(
        tokenizer=tokenizer,
        max_seq_len=args.parameter.max_seq_len,
        data_dir=Path(args.general.data_dir),
        **args.task.toDict(
        ),  # args is of type DotMap and needs conversion to std python dicts
    )

    data_silo = DataSilo(
        processor=processor,
        batch_size=args.parameter.batch_size,
        distributed=distributed,
    )

    class_weights = None
    if args.parameter.balance_classes:
        task_names = list(processor.tasks.keys())
        if len(task_names) > 1:
            raise NotImplementedError(
                f"Balancing classes is currently not supported for multitask experiments. Got tasks:  {task_names} "
            )
        class_weights = data_silo.calculate_class_weights(
            task_name=task_names[0])

    model = get_adaptive_model(
        lm_output_type=args.parameter.lm_output_type,
        prediction_heads=args.parameter.prediction_head,
        layer_dims=args.parameter.layer_dims,
        model=args.parameter.model,
        device=device,
        class_weights=class_weights,
        embeds_dropout_prob=args.parameter.embeds_dropout_prob,
    )

    # Init optimizer
    optimizer_opts = args.optimizer.optimizer_opts.toDict(
    ) if args.optimizer.optimizer_opts else None
    schedule_opts = args.optimizer.schedule_opts.toDict(
    ) if args.optimizer.schedule_opts else None
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=args.optimizer.learning_rate,
        schedule_opts=schedule_opts,
        optimizer_opts=optimizer_opts,
        use_amp=args.general.use_amp,
        n_batches=len(data_silo.loaders["train"]),
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        n_epochs=args.parameter.epochs,
        device=device)

    model_name = (
        f"{model.language_model.name}-{model.language_model.language}-{args.task.name}"
    )

    # An early stopping instance can be used to save the model that performs best on the dev set
    # according to some metric and stop training when no improvement is happening for some iterations.
    if "early_stopping" in args:
        early_stopping = EarlyStopping(
            metric=args.task.metric,
            mode=args.early_stopping.mode,
            save_dir=Path(
                f"{args.general.output_dir}/{model_name}_early_stopping"
            ),  # where to save the best model
            patience=args.early_stopping.
            patience  # number of evaluations to wait for improvement before terminating the training
        )
    else:
        early_stopping = None

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=args.parameter.epochs,
        n_gpu=n_gpu,
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        use_amp=args.general.use_amp,
        local_rank=args.general.local_rank,
        lr_schedule=lr_schedule,
        evaluate_every=args.logging.eval_every,
        device=device,
        early_stopping=early_stopping)

    model = trainer.train()

    processor.save(Path(f"{args.general.output_dir}/{model_name}"))
    model.save(Path(f"{args.general.output_dir}/{model_name}"))

    ml_logger.end_run()

示例#12

显示文件

def outcome_pretraining(task_config,
                        model_name,
                        cache_dir,
                        run_name="0",
                        lr=1e-05,
                        warmup_steps=5000,
                        embeds_dropout=0.1,
                        epochs=200,  # large because we use early stopping by default
                        batch_size=20,
                        grad_acc_steps=1,
                        early_stopping_metric="loss",
                        early_stopping_mode="min",
                        early_stopping_patience=10,
                        model_class="Bert",
                        tokenizer_class="BertTokenizer",
                        do_lower_case=True,
                        do_train=True,
                        do_eval=True,
                        do_hpo=False,
                        max_seq_len=512,
                        seed=11,
                        eval_every=500,
                        use_amp=False,
                        use_cuda=True,
                        ):
    # Load task config
    task_config = yaml.safe_load(open(task_config))

    data_dir = Path(task_config["data"]["data_dir"])

    # General Settings
    set_all_seeds(seed=seed)
    device, n_gpu = initialize_device_settings(use_cuda=use_cuda, use_amp=use_amp)

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=model_name, tokenizer_class=tokenizer_class,
                               do_lower_case=do_lower_case)

    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    processor = OutcomePretrainingProcessor(tokenizer=tokenizer,
                                            max_seq_len=max_seq_len,
                                            data_dir=data_dir,
                                            train_filename=task_config["data"]["train_filename"],
                                            dev_filename=task_config["data"]["dev_filename"],
                                            seed=seed,
                                            max_size_admission=50,
                                            max_size_discharge=50,
                                            cache_dir=cache_dir)

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a
    #    few descriptive statistics of our datasets
    data_silo = OutcomePretrainingDataSilo(
        processor=processor,
        caching=True,
        cache_dir=cache_dir,
        batch_size=batch_size,
        max_multiprocessing_chunksize=200)

    if do_train:

        # Set save dir for experiment output
        save_dir = Path(task_config["output_dir"]) / f'{task_config["experiment_name"]}_{run_name}'

        # Use HPO config args if config is passed
        if do_hpo:
            save_dir = save_dir / tune.session.get_trial_name()
        else:
            exp_name = f"exp_{random.randint(100000, 999999)}"
            save_dir = save_dir / exp_name

        # Create save dir
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        # Setup MLFlow logger
        ml_logger = MLFlowLogger(tracking_uri=task_config["log_dir"])
        ml_logger.init_experiment(experiment_name=task_config["experiment_name"],
                                  run_name=f'{task_config["experiment_name"]}_{run_name}')

        # 4. Create an AdaptiveModel
        # a) which consists of a pretrained language model as a basis

        language_model = LanguageModel.load(model_name, language_model_class=model_class)

        # b) and NextSentenceHead prediction head or TextClassificationHead if it's not a Bert Model
        if model_class == "Bert":
            next_sentence_head = NextSentenceHead.load(model_class)
        else:
            next_sentence_head = TextClassificationHead(num_labels=2)

        model = AdaptiveModel(
            language_model=language_model,
            prediction_heads=[next_sentence_head],
            embeds_dropout_prob=embeds_dropout,
            lm_output_types=["per_sequence"],
            device=device,
        )

        # 5. Create an optimizer
        schedule_opts = {"name": "LinearWarmup",
                         "num_warmup_steps": warmup_steps}

        model, optimizer, lr_schedule = initialize_optimizer(
            model=model,
            learning_rate=lr,
            device=device,
            n_batches=len(data_silo.loaders["train"]),
            n_epochs=epochs,
            use_amp=use_amp,
            grad_acc_steps=grad_acc_steps,
            schedule_opts=schedule_opts)

        # 6. Create an early stopping instance
        early_stopping = None
        if early_stopping_mode != "none":
            early_stopping = EarlyStopping(
                mode=early_stopping_mode,
                min_delta=0.0001,
                save_dir=save_dir,
                metric=early_stopping_metric,
                patience=early_stopping_patience
            )

        # 7. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it
        # from time to time

        trainer = ExtendedTrainer(
            model=model,
            optimizer=optimizer,
            data_silo=data_silo,
            epochs=epochs,
            n_gpu=n_gpu,
            lr_schedule=lr_schedule,
            evaluate_every=eval_every,
            early_stopping=early_stopping,
            device=device,
            grad_acc_steps=grad_acc_steps,
            evaluator_test=do_eval
        )

        def score_callback(eval_score, train_loss):
            tune.report(roc_auc_dev=eval_score, train_loss=train_loss)

        # 8. Train the model
        trainer.train(score_callback=score_callback if do_hpo else None)

        # 9. Save model if not saved in early stopping
        model.save(save_dir / "final_model")
        processor.save(save_dir / "final_model")

    if do_eval:
        # Load newly trained model or existing model
        if do_train:
            model_dir = save_dir
        else:
            model_dir = Path(model_name)

        logger.info("###### Eval on TEST SET #####")

        evaluator_test = Evaluator(
            data_loader=data_silo.get_data_loader("test"),
            tasks=data_silo.processor.tasks,
            device=device
        )

        # Load trained model for evaluation
        model = AdaptiveModel.load(model_dir, device)
        model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True)

        # Evaluate
        results = evaluator_test.eval(model, return_preds_and_labels=True)

        # Log results
        utils.log_results(results, dataset_name="test", steps=len(evaluator_test.data_loader),
                          save_path=model_dir / "eval_results.txt")