Пример #1
0
 def log_params(self):
     params = {
         "epochs": self.epochs,
         "n_gpu": self.n_gpu,
         "device": self.device
     }
     MlLogger.log_params(params)
Пример #2
0
 def log_params(self):
     """
     Logs paramteres to generic logger MlLogger
     """
     params = {
         "lm1_type":
         self.language_model1.__class__.__name__,
         "lm1_name":
         self.language_model1.name,
         "lm1_output_types":
         ",".join(self.lm1_output_types),
         "lm2_type":
         self.language_model2.__class__.__name__,
         "lm2_name":
         self.language_model2.name,
         "lm2_output_types":
         ",".join(self.lm2_output_types),
         "prediction_heads":
         ",".join(
             [head.__class__.__name__ for head in self.prediction_heads])
     }
     try:
         MlLogger.log_params(params)
     except Exception as e:
         logger.warning(f"ML logging didn't work: {e}")
Пример #3
0
    def backward_propagate(self, loss, step):
        loss = self.adjust_loss(loss)
        if self.global_step % self.log_loss_every == 0 and self.local_rank in [
                -1, 0
        ]:
            if self.local_rank in [-1, 0]:
                MlLogger.log_metrics(
                    {"Train_loss_total": float(loss.detach().cpu().numpy())},
                    step=self.global_step,
                )
                if self.log_learning_rate:
                    MlLogger.log_metrics(
                        {"learning_rate": self.lr_schedule.get_last_lr()[0]},
                        step=self.global_step)
        if self.use_amp:
            with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        if step % self.grad_acc_steps == 0:
            if self.max_grad_norm is not None:
                if self.use_amp:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(self.optimizer), self.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   self.max_grad_norm)
            self.optimizer.step()
            self.optimizer.zero_grad()
            if self.lr_schedule:
                self.lr_schedule.step()
        return loss
Пример #4
0
    def log_results(results,
                    dataset_name,
                    steps,
                    logging=True,
                    print=True,
                    num_fold=None):
        # Print a header
        header = "\n\n"
        header += BUSH_SEP + "\n"
        header += "***************************************************\n"
        if num_fold:
            header += f"***** EVALUATION | FOLD: {num_fold} | {dataset_name.upper()} SET | AFTER {steps} BATCHES *****\n"
        else:
            header += f"***** EVALUATION | {dataset_name.upper()} SET | AFTER {steps} BATCHES *****\n"
        header += "***************************************************\n"
        header += BUSH_SEP + "\n"
        logger.info(header)

        for head_num, head in enumerate(results):
            logger.info("\n _________ {} _________".format(head['task_name']))
            for metric_name, metric_val in head.items():
                # log with ML framework (e.g. Mlflow)
                if logging:
                    if not metric_name in [
                            "preds", "labels"
                    ] and not metric_name.startswith("_"):
                        if isinstance(metric_val, numbers.Number):
                            MlLogger.log_metrics(
                                metrics={
                                    f"{dataset_name}_{metric_name}_{head['task_name']}":
                                    metric_val
                                },
                                step=steps,
                            )
                # print via standard python logger
                if print:
                    if metric_name == "report":
                        if isinstance(metric_val,
                                      str) and len(metric_val) > 8000:
                            metric_val = metric_val[:
                                                    7500] + "\n ............................. \n" + metric_val[
                                                        -500:]
                        logger.info("{}: \n {}".format(metric_name,
                                                       metric_val))
                    else:
                        if not metric_name in [
                                "preds", "labels"
                        ] and not metric_name.startswith("_"):
                            logger.info("{}: {}".format(
                                metric_name, metric_val))
Пример #5
0
def get_scheduler(optimizer, opts):
    """ Get the scheduler based on dictionary with options. Options are passed to the scheduler constructor.

    :param optimizer: optimizer whose learning rate to control
    :param opts: dictionary of args to be passed to constructor of schedule
    :return: created scheduler
    """
    schedule_name = opts.get('name')
    try:
        sched_constructor = getattr(import_module('torch.optim.lr_scheduler'),
                                    schedule_name)
    except AttributeError:
        try:
            # The method names in transformers became quite long and unhandy.
            # for convenience we offer usage of shorter alias (e.g. "LinearWarmup")
            scheduler_translations = {
                "LinearWarmup":
                "get_linear_schedule_with_warmup",
                "ConstantWarmup":
                "get_constant_schedule_with_warmup",
                "Constant":
                "get_constant_schedule",
                "CosineWarmup":
                "get_cosine_schedule_with_warmup",
                "CosineWarmupWithRestarts":
                "get_cosine_with_hard_restarts_schedule_with_warmup"
            }
            if schedule_name in scheduler_translations.keys():
                schedule_name = scheduler_translations[schedule_name]
            # in contrast to torch, we actually get here a method and not a class
            sched_constructor = getattr(
                import_module('transformers.optimization'), schedule_name)
        except AttributeError:
            raise AttributeError(
                f"Scheduler '{schedule_name}' not found in 'torch' or 'transformers'"
            )

    logger.info(f"Using scheduler '{schedule_name}'")

    # get supported args of constructor
    allowed_args = inspect.signature(sched_constructor).parameters.keys()

    # convert from warmup proportion to steps if required
    if 'num_warmup_steps' in allowed_args and 'num_warmup_steps' not in opts and 'warmup_proportion' in opts:
        opts['num_warmup_steps'] = int(opts["warmup_proportion"] *
                                       opts["num_training_steps"])
        MlLogger.log_params({"warmup_proportion": opts["warmup_proportion"]})

    # only pass args that are supported by the constructor
    constructor_opts = {k: v for k, v in opts.items() if k in allowed_args}

    # Logging
    logger.info(f"Loading schedule `{schedule_name}`: '{constructor_opts}'")
    MlLogger.log_params(constructor_opts)
    MlLogger.log_params({"schedule_name": schedule_name})

    scheduler = sched_constructor(optimizer, **constructor_opts)
    scheduler.opts = opts  # save the opts with the scheduler to use in load/save
    return scheduler
Пример #6
0
    def _calculate_statistics(self):
        """ Calculate and log simple summary statistics of the datasets """
        logger.info("")
        logger.info("DATASETS SUMMARY")
        logger.info("================")

        self.counts = {}
        clipped = -1
        ave_len = -1

        if self.data["train"]:
            self.counts["train"] = len(self.data["train"])
            if "input_ids" in self.tensor_names:
                clipped, ave_len, seq_lens, max_seq_len = self._calc_length_stats_single_encoder()
            elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names:
                clipped, ave_len, seq_lens, max_seq_len = self._calc_length_stats_biencoder()
            else:
                logger.warning(f"Could not compute length statistics because 'input_ids' or 'query_input_ids' and 'passage_input_ids' are missing.")
                clipped = -1
                ave_len = -1
        else:
            self.counts["train"] = 0

        if self.data["dev"]:
            self.counts["dev"] = len(self.data["dev"])
        else:
            self.counts["dev"] = 0

        if self.data["test"]:
            self.counts["test"] = len(self.data["test"])
        else:
            self.counts["test"] = 0


        logger.info("Examples in train: {}".format(self.counts["train"]))
        logger.info("Examples in dev  : {}".format(self.counts["dev"]))
        logger.info("Examples in test : {}".format(self.counts["test"]))
        logger.info("")
        if self.data["train"]:
            if "input_ids" in self.tensor_names:
                logger.info("Longest sequence length observed after clipping:     {}".format(max(seq_lens)))
                logger.info("Average sequence length after clipping: {}".format(ave_len))
                logger.info("Proportion clipped:      {}".format(clipped))
                if clipped > 0.5:
                    logger.info("[Farmer's Tip] {}% of your samples got cut down to {} tokens. "
                                "Consider increasing max_seq_len. "
                                "This will lead to higher memory consumption but is likely to "
                                "improve your model performance".format(round(clipped * 100, 1), max_seq_len))
            elif "query_input_ids" in self.tensor_names and "passage_input_ids" in self.tensor_names:
                logger.info("Longest query length observed after clipping: {}   - for max_query_len: {}".format(max(seq_lens[0]),max_seq_len[0]))
                logger.info("Average query length after clipping:          {}".format(ave_len[0]))
                logger.info("Proportion queries clipped:                   {}".format(clipped[0]))
                logger.info("")
                logger.info("Longest passage length observed after clipping: {}   - for max_passage_len: {}".format(max(seq_lens[1]),max_seq_len[1]))
                logger.info("Average passage length after clipping:          {}".format(ave_len[1]))
                logger.info("Proportion passages clipped:                    {}".format(clipped[1]))

        MlLogger.log_params(
            {
                "n_samples_train": self.counts["train"],
                "n_samples_dev": self.counts["dev"],
                "n_samples_test": self.counts["test"],
                "batch_size": self.batch_size,
                "ave_seq_len": ave_len,
                "clipped": clipped,
            }
        )
Пример #7
0
    def __init__(
        self,
        model,
        processor,
        task_type,
        batch_size=4,
        gpu=False,
        name=None,
        return_class_probs=False,
        extraction_strategy=None,
        extraction_layer=None,
        s3e_stats=None,
        num_processes=None,
        disable_tqdm=False,
        benchmarking=False,
        dummy_ph=False
    ):
        """
        Initializes Inferencer from an AdaptiveModel and a Processor instance.

        :param model: AdaptiveModel to run in inference mode
        :type model: AdaptiveModel
        :param processor: A dataset specific Processor object which will turn input (file or dict) into a Pytorch Dataset.
        :type processor: Processor
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification", "ner". More coming soon...
        :param task_type: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param name: Name for the current Inferencer model, displayed in the REST API
        :type name: string
        :param return_class_probs: either return probability distribution over all labels or the prob of the associated label
        :type return_class_probs: bool
        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector), 'reduce_mean'
                               (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors),
                               's3e' (sentence vector via S3E pooling, see https://arxiv.org/abs/2002.09620)
        :type extraction_strategy: str
        :param extraction_layer: number of layer from which the embeddings shall be extracted. Default: -1 (very last layer).
        :type extraction_layer: int
        :param s3e_stats: Stats of a fitted S3E model as returned by `fit_s3e_on_corpus()`
                          (only needed for task_type="embeddings" and extraction_strategy = "s3e")
        :type s3e_stats: dict
        :param num_processes: the number of processes for `multiprocessing.Pool`.
                              Set to value of 1 (or 0) to disable multiprocessing.
                              Set to None to let Inferencer use all CPU cores minus one.
                              If you want to debug the Language Model, you might need to disable multiprocessing!
                              **Warning!** If you use multiprocessing you have to close the
                              `multiprocessing.Pool` again! To do so call
                              :func:`~farm.infer.Inferencer.close_multiprocessing_pool` after you are
                              done using this class. The garbage collector will not do this for you!
        :type num_processes: int
        :param disable_tqdm: Whether to disable tqdm logging (can get very verbose in multiprocessing)
        :type disable_tqdm: bool
        :param dummy_ph: If True, methods of the prediction head will be replaced
                     with a dummy method. This is used to isolate lm run time from ph run time.
        :type dummy_ph: bool
        :param benchmarking: If True, a benchmarking object will be initialised within the class and
                             certain parts of the code will be timed for benchmarking. Should be kept
                             False if not benchmarking since these timing checkpoints require synchronization
                             of the asynchronous Pytorch operations and may slow down the model.
        :type benchmarking: bool
        :return: An instance of the Inferencer.

        """
        WANDBLogger.disable()

        # For benchmarking
        if dummy_ph:
            model.bypass_ph()

        self.benchmarking = benchmarking
        if self.benchmarking:
            self.benchmarker = Benchmarker()

        # Init device and distributed settings
        device, n_gpu = initialize_device_settings(use_cuda=gpu, local_rank=-1, use_amp=None)

        self.processor = processor
        self.model = model
        self.model.eval()
        self.batch_size = batch_size
        self.device = device
        self.language = self.model.get_language()
        self.task_type = task_type
        self.disable_tqdm = disable_tqdm
        self.problematic_sample_ids = set()

        if task_type == "embeddings":
            if not extraction_layer or not extraction_strategy:
                    logger.warning("Using task_type='embeddings', but couldn't find one of the args `extraction_layer` and `extraction_strategy`. "
                                   "Since FARM 0.4.2, you set both when initializing the Inferencer and then call inferencer.inference_from_dicts() instead of inferencer.extract_vectors()")
            self.model.prediction_heads = torch.nn.ModuleList([])
            self.model.language_model.extraction_layer = extraction_layer
            self.model.language_model.extraction_strategy = extraction_strategy
            self.model.language_model.s3e_stats = s3e_stats

        # TODO add support for multiple prediction heads

        self.name = name if name != None else f"anonymous-{self.task_type}"
        self.return_class_probs = return_class_probs

        model.connect_heads_with_processor(processor.tasks, require_labels=False)
        set_all_seeds(42)

        self._set_multiprocessing_pool(num_processes)
Пример #8
0
def initialize_optimizer(model,
                         n_batches,
                         n_epochs,
                         device,
                         learning_rate,
                         optimizer_opts=None,
                         schedule_opts=None,
                         distributed=False,
                         grad_acc_steps=1,
                         local_rank=-1,
                         use_amp=None):
    """
    Initializes an optimizer, a learning rate scheduler and converts the model if needed (e.g for mixed precision).
    Per default, we use transformers' AdamW and a linear warmup schedule with warmup ratio 0.1.
    You can easily switch optimizer and schedule via `optimizer_opts` and `schedule_opts`.

    :param model: model to optimize (e.g. trimming weights to fp16 / mixed precision)
    :type model: AdaptiveModel
    :param n_batches: number of batches for training
    :type n_batches: int
    :param n_epochs: number of epochs for training
    :param device:
    :param learning_rate: Learning rate
    :type learning_rate: float
    :param optimizer_opts: Dict to customize the optimizer. Choose any optimizer available from torch.optim, apex.optimizers or
                           transformers.optimization by supplying the class name and the parameters for the constructor.
                           Examples:
                           1) AdamW from Transformers (Default):
                           {"name": "TransformersAdamW", "correct_bias": False, "weight_decay": 0.01}
                           2) SGD from pytorch:
                           {"name": "SGD", "momentum": 0.0}
                           3) FusedLAMB from apex:
                           {"name": "FusedLAMB", "bias_correction": True}
    :param schedule_opts: Dict to customize the learning rate schedule.
                          Choose any Schedule from Pytorch or Huggingface's Transformers by supplying the class name
                          and the parameters needed by the constructor.
                          If the dict does not contain ``num_training_steps`` it will be set by
                          calculating it from ``n_batches``, ``grad_acc_steps`` and ``n_epochs``.
                          Examples:
                          1) Linear Warmup (Default):
                          {"name": "LinearWarmup",
                          "num_warmup_steps": 0.1 * num_training_steps,
                          "num_training_steps": num_training_steps}
                          2) CosineWarmup:
                          {"name": "CosineWarmup",
                          "num_warmup_steps": 0.1 * num_training_steps,
                          "num_training_steps": num_training_steps}
                          3) CyclicLR from pytorch:
                          {"name": "CyclicLR", "base_lr": 1e-5, "max_lr":1e-4, "step_size_up": 100}
    :param distributed: Whether training on distributed machines
    :param grad_acc_steps: Number of steps to accumulate gradients for. Helpful to mimic large batch_sizes on small machines.
    :param local_rank: rank of the machine in a distributed setting
    :param use_amp: Optimization level of nvidia's automatic mixed precision (AMP). The higher the level, the faster the model.
                    Options:
                    "O0" (Normal FP32 training)
                    "O1" (Mixed Precision => Recommended)
                    "O2" (Almost FP16)
                    "O3" (Pure FP16).
                    See details on: https://nvidia.github.io/apex/amp.html
    :return: model, optimizer, scheduler
    """

    if use_amp and not AMP_AVAILABLE:
        raise ImportError(
            f'Got use_amp = {use_amp}, but cannot find apex. '
            'Please install Apex if you want to make use of automatic mixed precision. '
            'https://github.com/NVIDIA/apex')

    if (schedule_opts is not None) and (not isinstance(schedule_opts, dict)):
        raise TypeError('Parameter schedule_opts must be None or '
                        'an instance of dict but was {}!'.format(
                            type(schedule_opts)))

    num_train_optimization_steps = int(n_batches / grad_acc_steps) * n_epochs

    # Use some defaults to simplify life of inexperienced users
    if optimizer_opts is None:
        optimizer_opts = {
            "name": "TransformersAdamW",
            "correct_bias": False,
            "weight_decay": 0.01
        }
    optimizer_opts["lr"] = learning_rate

    if schedule_opts is None:
        # Default schedule: Linear Warmup with 10% warmup
        schedule_opts = {
            "name": "LinearWarmup",
            "num_warmup_steps": 0.1 * num_train_optimization_steps,
            "num_training_steps": num_train_optimization_steps
        }

        # schedule_opts = {"name": "OneCycleLR", "max_lr":learning_rate, "pct_start": 0.1,
        #                  "total_steps": num_train_optimization_steps }
    elif "num_training_steps" not in schedule_opts:
        schedule_opts["num_training_steps"] = num_train_optimization_steps

    # Log params
    MlLogger.log_params({
        "use_amp":
        use_amp,
        "num_train_optimization_steps":
        schedule_opts["num_training_steps"],
    })

    # Get optimizer from pytorch, transformers or apex
    optimizer = _get_optim(model, optimizer_opts)

    # Adjust for parallel training + amp
    model, optimizer = optimize_model(model, device, local_rank, optimizer,
                                      distributed, use_amp)

    # Get learning rate schedule - moved below to supress warning
    scheduler = get_scheduler(optimizer, schedule_opts)

    return model, optimizer, scheduler
Пример #9
0
def _get_optim(model, opts):
    """ Get the optimizer based on dictionary with options. Options are passed to the optimizer constructor.

    :param model: model to optimize
    :param opts: config dictionary that will be passed to optimizer together with the params
    (e.g. lr, weight_decay, correct_bias ...). no_decay' can be given - parameters containing any of those strings
    will have weight_decay set to 0.
    :return: created optimizer
    """

    optimizer_name = opts.pop('name', None)

    # Logging
    logger.info(f"Loading optimizer `{optimizer_name}`: '{opts}'")
    MlLogger.log_params(opts)
    MlLogger.log_params({"optimizer_name": optimizer_name})

    weight_decay = opts.pop('weight_decay', None)
    no_decay = opts.pop('no_decay', None)

    if no_decay:
        optimizable_parameters = [{
            'params': [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay) and p.requires_grad
            ],
            **opts
        }, {
            'params': [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay) and p.requires_grad
            ],
            'weight_decay':
            0.0,
            **opts
        }]
    else:
        optimizable_parameters = [{
            'params': [p for p in model.parameters() if p.requires_grad],
            **opts
        }]

    # default weight decay is not the same for all optimizers, so we can't use default value
    # only explicitly add weight decay if it's given
    if weight_decay is not None:
        optimizable_parameters[0]['weight_decay'] = weight_decay

    # Import optimizer by checking in order: torch, transformers, apex and local imports
    try:
        optim_constructor = getattr(import_module('torch.optim'),
                                    optimizer_name)
    except AttributeError:
        try:
            optim_constructor = getattr(
                import_module('transformers.optimization'), optimizer_name)
        except AttributeError:
            try:
                optim_constructor = getattr(import_module('apex.optimizers'),
                                            optimizer_name)
            except (AttributeError, ImportError):
                try:
                    # Workaround to allow loading AdamW from transformers
                    # pytorch > 1.2 has now also a AdamW (but without the option to set bias_correction = False,
                    # which is done in the original BERT implementation)
                    optim_constructor = getattr(sys.modules[__name__],
                                                optimizer_name)
                except (AttributeError, ImportError):
                    raise AttributeError(
                        f"Optimizer '{optimizer_name}' not found in 'torch', 'transformers', 'apex' or 'local imports"
                    )

    return optim_constructor(optimizable_parameters)