Python PyTorchTrialContext示例，determined.pytorch.PyTorchTrialContext Python示例

示例#1

0

显示文件

文件： pytorch_onevar_model.py 项目： justin-determined-ai/determined

    def __init__(self, context: pytorch.PyTorchTrialContext) -> None:
        self.context = context

        model = torch.nn.Linear(1, 1, False)

        # Manually initialize the one weight to 0.
        model.weight.data.fill_(0)

        self.model = context.wrap_model(model)

        self.lr = 0.001

        opt = torch.optim.SGD(self.model.parameters(), self.lr)
        self.opt = context.wrap_optimizer(opt)

        self.loss_fn = torch.nn.MSELoss()

        self.cls_reducer = context.wrap_reducer(TriangleLabelSum(),
                                                name="cls_reducer")
        self.fn_reducer = context.wrap_reducer(triangle_label_sum,
                                               name="fn_reducer")

        self.hparams = self.context.get_hparams()
        if self.hparams.get("disable_dataset_reproducibility_checks"):
            self.context.experimental.disable_dataset_reproducibility_checks()

示例#2

0

显示文件

    def __init__(self, context: PyTorchTrialContext) -> None:
        lm = mnist.LightningMNISTClassifier(
            lr=context.get_hparam('learning_rate'))
        data_dir = f"/tmp/data-rank{context.distributed.get_rank()}"
        self.dm = mnist.MNISTDataModule(context.get_data_config()["url"],
                                        data_dir)

        super().__init__(context, lightning_module=lm)
        self.dm.prepare_data()

示例#3

0

显示文件

    def __init__(self, context: pytorch.PyTorchTrialContext):
        self.context = context

        model = nn.Linear(1, 1, False)
        model.weight.data.fill_(0)

        self.model = context.wrap_model(model)

        opt = torch.optim.SGD(self.model.parameters(), 0.1)
        self.opt = context.wrap_optimizer(opt)

示例#4

0

显示文件

文件： model_def.py 项目： wbwatkinson/determined

    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.data_config = context.get_data_config()
        self.hparams = AttrDict(context.get_hparams())

        # Create a unique download directory for each rank so they don't overwrite each
        # other when doing distributed training.
        self.download_directory = self.data_config["data_download_dir"]
        data.download_data(self.download_directory)
        corpus = data_util.Corpus(self.download_directory)
        self.corpus = corpus
        self.ntokens = len(corpus.dictionary)
        self.hidden = None

        # This is used to store eval history and will switch to ASGD
        # once validation perplexity stops improving.
        self._last_loss = None
        self._eval_history = []
        self._last_epoch = -1

        # Define the model
        genotype = self.get_genotype_from_hps()
        self.model = self.context.wrap_model(
            RNNModel(
                self.ntokens,
                self.hparams.emsize,
                self.hparams.nhid,
                self.hparams.nhidlast,
                self.hparams.dropout,
                self.hparams.dropouth,
                self.hparams.dropoutx,
                self.hparams.dropouti,
                self.hparams.dropoute,
                genotype=genotype,
            ))
        total_params = sum(x.data.nelement() for x in self.model.parameters())
        logging.info("Model total parameters: {}".format(total_params))

        # Define the optimizer
        self._optimizer = self.context.wrap_optimizer(
            HybridSGD(
                self.model.parameters(),
                self.hparams.learning_rate,
                self.hparams.weight_decay,
                lambd=0,
                t0=0,
            ))

        # Define the LR scheduler
        self.myLR = MyLR(self._optimizer, self.hparams)
        step_mode = LRScheduler.StepMode.MANUAL_STEP
        self.wrapped_LR = self.context.wrap_lr_scheduler(self.myLR,
                                                         step_mode=step_mode)

示例#5

0

显示文件

文件： xnli_trial.py 项目： wbwatkinson/determined

    def __init__(self, context: det_torch.PyTorchTrialContext) -> None:
        self.logger = logging.getLogger(__name__)
        self.hparams = attrdict.AttrDict(context.get_hparams())
        self.data_config = attrdict.AttrDict(context.get_data_config())
        self.context = context

        # Load dataset and get metadata.
        # This needs to be done before we initialize the HF config, tokenizer, and model
        # because we need to know num_labels before doing so.
        if self.data_config.train_language is None:
            train_dataset = datasets.load_dataset("xnli",
                                                  self.data_config.language,
                                                  split="train")
        else:
            train_dataset = datasets.load_dataset(
                "xnli", self.data_config.train_language, split="train")
        eval_dataset = datasets.load_dataset("xnli",
                                             self.data_config.language,
                                             split="validation")

        self.raw_datasets = {
            "train": train_dataset,
            "validation": eval_dataset
        }
        label_list = train_dataset.features["label"].names
        self.hparams.num_labels = len(label_list)

        super(XNLITrial, self).__init__(context)
        self.logger.info(self.config)

        # We need to create the tokenized dataset after init because we need to model and
        # tokenizer to be available.
        self.tokenized_datasets = self.build_datasets()
        train_length = len(self.tokenized_datasets["train"])
        self.logger.info("training records: {}".format(train_length))
        if ("records_per_epoch" in self.exp_config
                and train_length != self.exp_config["records_per_epoch"]):
            self.logger.warning(
                "number of train records {} does not match records_per_epoch of {}"
                .format(train_length, self.exp_config["records_per_epoch"]))

        # Create metric reducer
        metric = datasets.load_metric("xnli", timeout=200)

        def compute_metrics(pred_labels) -> Dict:
            preds, labels = zip(*pred_labels)
            preds = utils.expand_like(preds)
            labels = utils.expand_like(labels)
            preds = np.argmax(preds, axis=1)
            return metric.compute(predictions=preds, references=labels)

        self.reducer = context.experimental.wrap_reducer(compute_metrics,
                                                         for_training=False)

示例#6

0

显示文件

文件： model_def.py 项目： wbwatkinson/determined

    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.data_config = context.get_data_config()
        self.hparams = context.get_hparams()
        self.criterion = torch.nn.functional.cross_entropy
        # The last epoch is only used for logging.
        self._last_epoch = -1
        self.results = {
            "loss": float("inf"),
            "top1_accuracy": 0,
            "top5_accuracy": 0
        }

        # Define the model
        genotype = self.get_genotype_from_hps()
        self.model = self.context.wrap_model(
            Network(
                self.hparams["init_channels"],
                10,  # num_classes
                self.hparams["layers"],
                self.hparams["auxiliary"],
                genotype,
            ))
        print("param size = {} MB".format(
            utils.count_parameters_in_MB(self.model)))
        size = 0
        for p in self.model.parameters():
            size += p.nelement()
        print("param count: {}".format(size))

        # Apply constraints if desired
        if "use_constraints" in self.hparams and self.hparams[
                "use_constraints"]:
            apply_constraints(self.hparams, size)

        # Define the optimizer
        self.optimizer = self.context.wrap_optimizer(
            torch.optim.SGD(
                self.model.parameters(),
                lr=self.context.get_hparam("learning_rate"),
                momentum=self.context.get_hparam("momentum"),
                weight_decay=self.context.get_hparam("weight_decay"),
            ))

        # Define the LR scheduler
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer,
            self.context.get_hparam("train_epochs"),
        )
        step_mode = LRScheduler.StepMode.STEP_EVERY_EPOCH
        self.wrapped_scheduler = self.context.wrap_lr_scheduler(
            self.scheduler, step_mode=step_mode)

示例#7

0

显示文件

文件： model_def.py 项目： justin-determined-ai/determined

    def __init__(self, context: pytorch.PyTorchTrialContext) -> None:
        self.context = context

        model = torch.nn.Linear(1, 1, False)
        model.weight.data.fill_(0)
        self.model = context.wrap_model(model)

        self.lr = 0.001

        optimizer = torch.optim.SGD(self.model.parameters(), self.lr)
        self.opt = context.wrap_optimizer(optimizer)

        self.loss_fn = torch.nn.MSELoss(reduction="mean")

示例#8

0

显示文件

    def __init__(self, context: det_torch.PyTorchTrialContext) -> None:
        self.context = context
        # A subclass of BaseTransformerTrial may have already set hparams and data_config
        # attributes so we only reset them if they do not exist.
        if not hasattr(self, "hparams"):
            self.hparams = attrdict.AttrDict(context.get_hparams())
        if not hasattr(self, "data_config"):
            self.data_config = attrdict.AttrDict(context.get_data_config())
        if not hasattr(self, "exp_config"):
            self.exp_config = attrdict.AttrDict(
                context.get_experiment_config())
        # Check to make sure all expected hyperparameters are set.
        self.check_hparams()

        # Parse hparams and data_config.
        (
            self.config_kwargs,
            self.tokenizer_kwargs,
            self.model_kwargs,
        ) = hf_parse.default_parse_config_tokenizer_model_kwargs(self.hparams)
        optimizer_kwargs, scheduler_kwargs = hf_parse.default_parse_optimizer_lr_scheduler_kwargs(
            self.hparams)

        self.config, self.tokenizer, self.model = build_using_auto(
            self.config_kwargs,
            self.tokenizer_kwargs,
            self.hparams.model_mode,
            self.model_kwargs,
            use_pretrained_weights=self.hparams.use_pretrained_weights,
        )
        self.model = self.context.wrap_model(self.model)

        self.optimizer = self.context.wrap_optimizer(
            build_default_optimizer(self.model, optimizer_kwargs))

        if self.hparams.use_apex_amp:
            self.model, self.optimizer = self.context.configure_apex_amp(
                models=self.model,
                optimizers=self.optimizer,
            )

        self.lr_scheduler = self.context.wrap_lr_scheduler(
            build_default_lr_scheduler(self.optimizer, scheduler_kwargs),
            det_torch.LRScheduler.StepMode.STEP_EVERY_BATCH,
        )

        self.grad_clip_fn = None

        if optimizer_kwargs.max_grad_norm > 0:  # type: ignore
            self.grad_clip_fn = lambda x: torch.nn.utils.clip_grad_norm_(
                x, optimizer_kwargs.max_grad_norm)

示例#9

0

显示文件

文件： model_def.py 项目： wbwatkinson/determined

    def __init__(self, context: PyTorchTrialContext, *args, **kwargs) -> None:
        lm = mnist.LitMNIST(
            hidden_size=context.get_hparam('hidden_size'),
            learning_rate=context.get_hparam('learning_rate'),
        )
        data_dir = f"/tmp/data-rank{context.distributed.get_rank()}"
        self.dm = data.MNISTDataModule(
            data_url=context.get_data_config()["url"],
            data_dir=data_dir,
            batch_size=context.get_per_slot_batch_size(),
        )

        super().__init__(context, lightning_module=lm, *args, **kwargs)
        self.dm.prepare_data()

示例#10

0

显示文件

    def __init__(self, context: PyTorchTrialContext) -> None:
        data_dir = f'/tmp/data-rank{context.distributed.get_rank()}'
        self.dm = gan.MNISTDataModule(context.get_data_config()['url'], data_dir,
                                      batch_size=context.get_per_slot_batch_size())
        channels, width, height = self.dm.size()
        lm = gan.GAN(channels, width, height,
                    batch_size=context.get_per_slot_batch_size(),
                    lr=context.get_hparam('lr'),
                    b1=context.get_hparam('b1'),
                    b2=context.get_hparam('b2'),
        )

        super().__init__(context, lightning_module=lm)
        self.dm.prepare_data()

示例#11

0

显示文件

文件： _data.py 项目： justin-determined-ai/determined

def build_dataloader(
    cfg: mmcv.Config,
    split: "str",
    context: det_torch.PyTorchTrialContext,
    shuffle: bool,
) -> Tuple[torch_data.Dataset, det_torch.DataLoader]:
    """
    Build the dataset and dataloader according to cfg and sampler parameters.

    Arguments:
        cfg: mmcv.Config with dataset specifications.
        split: one of train, val, or test. If val or test, annotations are not loaded.
        context: PyTorchTrialContext with seed info used to seed the dataloader workers.
        shuffle: whether to shuffle indices for data loading.
    Returns:
        dataset and dataloader
    """
    assert split in ["train", "val", "test"
                     ], "argument split must be one of train, val, or test."
    num_samples_per_gpu = context.get_per_slot_batch_size()
    num_replicas = context.distributed.get_size()
    num_workers = cfg.workers_per_gpu
    test_mode = False if split == "train" else True

    cfg = eval(f"cfg.{split}")
    maybe_download_ann_file(cfg)

    dataset = mmdet.datasets.build_dataset(cfg, {"test_mode": test_mode})
    if test_mode:
        dataset = DatasetWithIndex(dataset)
    sampler = GroupSampler(dataset, num_samples_per_gpu,
                           num_replicas) if shuffle else None

    return dataset, det_torch.DataLoader(
        dataset,
        batch_size=num_samples_per_gpu,
        num_workers=num_workers,
        sampler=sampler,
        collate_fn=functools.partial(mmcv.parallel.collate,
                                     samples_per_gpu=num_samples_per_gpu),
        pin_memory=False,
        worker_init_fn=functools.partial(
            mmdet.datasets.builder.worker_init_fn,
            seed=context.get_trial_seed(),
            rank=context.distributed.get_rank(),
            num_workers=num_workers,
        ),
    )

示例#12

0

显示文件

 def __init__(self,
              context: pytorch.PyTorchTrialContext,
              lm_class=OneVarLM) -> None:
     self.context = context
     lm = lm_class(**context.get_hparams())
     self.dm = OneDatasetLDM()
     super().__init__(context, lm)

示例#13

0

显示文件

文件： pytorch_onevar_model.py 项目： yshvrdhn/determined

    def __init__(self, context: pytorch.PyTorchTrialContext) -> None:
        self.context = context

        model = torch.nn.Linear(1, 1, False)

        # Manually initialize the one weight to 0.
        model.weight.data.fill_(0)

        self.model = context.wrap_model(model)

        self.lr = 0.001

        opt = torch.optim.SGD(self.model.parameters(), self.lr)
        self.opt = context.wrap_optimizer(opt)

        self.loss_fn = torch.nn.MSELoss()

示例#14

0

显示文件

def test_get_pretrained_weights(
    mmdet_config_dir: None, context: det_torch.PyTorchTrialContext
) -> None:
    mh_mmdet.utils.CONFIG_TO_PRETRAINED = mh_mmdet.utils.get_config_pretrained_url_mapping()
    path, ckpt = mh_mmdet.get_pretrained_ckpt_path("/tmp", context.get_hparam("config_file"))
    assert path is not None
    assert ckpt is not None

示例#15

0

显示文件

文件： _trial.py 项目： justin-determined-ai/determined

    def __init__(self, context: det_torch.PyTorchTrialContext) -> None:
        self.context = context
        self.hparams = attrdict.AttrDict(context.get_hparams())
        self.data_config = attrdict.AttrDict(context.get_data_config())
        self.cfg = self.build_mmdet_config()
        # We will control how data is moved to GPU.
        self.context.experimental.disable_auto_to_device()

        # Build model and make sure it's compatible with horovod.
        self.model = mmdet.models.build_detector(self.cfg.model)

        # Initialize model
        self.model.init_weights()

        # If use_pretrained, try loading pretrained weights for the mmcv config if available.
        if self.hparams.use_pretrained:
            ckpt_path, ckpt = utils.get_pretrained_ckpt_path("/tmp", self.hparams.config_file)
            if ckpt_path is not None:
                logging.info("Loading from pretrained weights.")
                if "state_dict" in ckpt:
                    self.model.load_state_dict(ckpt["state_dict"])
                else:
                    self.model.load_state_dict(ckpt)

        # If fp16 is specified in the mmdet config, we will use torch native amp.
        fp16_cfg = self.cfg.get("fp16", None)
        if fp16_cfg is not None:
            self.setup_torch_amp(fp16_cfg)

        self.model = self.context.wrap_model(self.model)

        self.optimizer = self.context.wrap_optimizer(
            mmcv.runner.build_optimizer(self.model, self.cfg.optimizer)
        )
        self.model.zero_grad()

        self.clip_grads_fn = None
        if self.cfg.optimizer_config.grad_clip is not None:
            self.clip_grads_fn = lambda x: torch.nn.utils.clip_grad_norm_(
                x,
                self.cfg.optimizer_config.grad_clip.max_norm,
                self.cfg.optimizer_config.grad_clip.norm_type,
            )

        # mmdet sets loggers in the package that interrupt with Determined logging.
        # We reset the root logger after mmdet models are initialized.
        set_logger(bool(self.context.env.experiment_config.get("debug", False)))

示例#16

0

显示文件

    def __init__(self, trial_context: PyTorchTrialContext) -> None:
        self.context = trial_context
        self.data_config = trial_context.get_data_config()
        self.hparams = AttrDict(trial_context.get_hparams())
        self.last_epoch = 0

        self.data_dir = os.path.join(
            self.data_config["download_dir"],
            f"data-rank{self.context.distributed.get_rank()}",
        )

        # Initialize the models.
        criterion = nn.CrossEntropyLoss()
        self.model = self.context.wrap_model(
            Network(
                self.hparams.init_channels,
                self.hparams.n_classes,
                self.hparams.layers,
                criterion,
                self.hparams.nodes,
                k=self.hparams.shuffle_factor,
            ))

        # Initialize the optimizers and learning rate scheduler.
        self.ws_opt = self.context.wrap_optimizer(
            torch.optim.SGD(
                self.model.ws_parameters(),
                self.hparams.learning_rate,
                momentum=self.hparams.momentum,
                weight_decay=self.hparams.weight_decay,
            ))
        self.arch_opt = self.context.wrap_optimizer(
            EG(
                self.model.arch_parameters(),
                self.hparams.arch_learning_rate,
                lambda p: p / p.sum(dim=-1, keepdim=True),
            ))

        self.lr_scheduler = self.context.wrap_lr_scheduler(
            lr_scheduler=CosineAnnealingLR(
                self.ws_opt,
                self.hparams.scheduler_epochs,
                self.hparams.min_learning_rate,
            ),
            step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH,
        )

示例#17

0

显示文件

    def __init__(self, context: PyTorchTrialContext, lightning_module: pl.LightningModule):
        check_compatibility(lightning_module)
        override_unsupported_nud(lightning_module, context)
        context.wrap_model(lightning_module)
        optimizers, lr_schedulers = self.setup_optimizers_schedulers(context, lightning_module)
        pls = _PLAdapterState(context, lightning_module, optimizers)
        self._pls = pls

        # set lightning_module properties
        pls.lm.use_ddp = False  # type: ignore
        pls.lm.use_ddp2 = False  # type: ignore
        pls.lm.use_dp = False  # type: ignore
        pls.lm.use_tpu = False  # type: ignore
        type(pls.lm).local_rank = context.distributed.get_local_rank()  # type: ignore
        type(pls.lm).global_rank = context.distributed.get_rank()  # type: ignore
        pls.lm.use_amp = context.experimental._auto_amp or context._use_apex
        pls.lm.to(context.device)

示例#18

0

显示文件

 def test_merge_config(
     self, context: det_torch.PyTorchTrialContext, trial: mh_mmdet.MMDetTrial
 ) -> None:
     hparams = context.get_hparams()
     hparams["merge_config"] = "./tests/fixtures/merge_config.py"
     trial.hparams = attrdict.AttrDict(hparams)
     new_cfg = trial.build_mmdet_config()
     assert new_cfg.optimizer.type == "AdamW"
     assert new_cfg.optimizer_config.grad_clip.max_norm == 0.1

示例#19

0

显示文件

文件： ner_trial.py 项目： wbwatkinson/determined

    def __init__(self, context: det_torch.PyTorchTrialContext) -> None:
        self.logger = logging.getLogger(__name__)
        self.context = context
        self.hparams = attrdict.AttrDict(context.get_hparams())
        self.data_config = attrdict.AttrDict(context.get_data_config())

        # Load dataset and get metadata.
        # This needs to be done before we initialize the HF config, tokenizer, and model
        # because we need to know num_labels before doing so.
        self.raw_datasets = hf.default_load_dataset(self.data_config)
        datasets_metadata = ner_utils.get_dataset_metadata(self.raw_datasets, self.hparams)
        self.hparams.num_labels = datasets_metadata.num_labels

        super(NERTrial, self).__init__(context)
        self.logger.info(self.config)

        # We need to create the tokenized dataset after init because we need to model and
        # tokenizer to be available.
        self.tokenized_datasets = ner_utils.build_tokenized_datasets(
            self.raw_datasets,
            self.model,
            self.data_config,
            self.tokenizer,
            datasets_metadata.text_column_name,
            datasets_metadata.label_column_name,
            datasets_metadata.label_to_id,
        )
        train_length = len(self.tokenized_datasets["train"])
        self.logger.info("training records: {}".format(train_length))
        if (
            "records_per_epoch" in self.exp_config
            and train_length != self.exp_config["records_per_epoch"]
        ):
            self.logger.warning(
                "number of train records {} does not match records_per_epoch of {}".format(
                    train_length, self.exp_config["records_per_epoch"]
                )
            )

        # Create metric reducer
        self.reducer = context.experimental.wrap_reducer(
            functools.partial(ner_utils.compute_metrics, datasets_metadata.label_list),
            for_training=False,
        )

示例#20

0

显示文件

    def __init__(self, context: pytorch.PyTorchTrialContext) -> None:
        self.context = context

        model = torch.nn.Linear(1, 1, False)

        # Manually initialize the one weight to 0.
        model.weight.data.fill_(0)

        self.model = context.wrap_model(model)

        self.lr = 0.001

        opt = torch.optim.SGD(self.model.parameters(), self.lr)
        self.opt = context.wrap_optimizer(opt)

        self.loss_fn = torch.nn.MSELoss()

        self.cls_reducer = context.wrap_reducer(TriangleLabelSum(),
                                                name="cls_reducer")
        self.fn_reducer = context.wrap_reducer(triangle_label_sum,
                                               name="fn_reducer")

示例#21

0

显示文件

 def test_override_mmdet_config(
     self, context: det_torch.PyTorchTrialContext, trial: mh_mmdet.MMDetTrial
 ) -> None:
     hparams = context.get_hparams()
     hparams["override_mmdet_config"] = {
         "optimizer_config._delete_": True,
         "optimizer_config.grad_clip.max_norm": 35,
         "optimizer_config.grad_clip.norm_type": 2,
     }
     trial.hparams = attrdict.AttrDict(hparams)
     new_cfg = trial.build_mmdet_config()
     assert new_cfg.optimizer_config.grad_clip.max_norm == 35
     assert new_cfg.optimizer_config.grad_clip.norm_type == 2

示例#22

0

显示文件

    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.data_config = context.get_data_config()
        self.criterion = CrossEntropyLabelSmooth(
            context.get_hparam("num_classes"),  # num classes
            context.get_hparam("label_smoothing_rate"),
        )
        self.last_epoch_idx = -1

        self.model = self.context.wrap_model(self.build_model_from_config())

        self.optimizer = self.context.wrap_optimizer(
            torch.optim.SGD(
                self.model.parameters(),
                lr=self.context.get_hparam("learning_rate"),
                momentum=self.context.get_hparam("momentum"),
                weight_decay=self.context.get_hparam("weight_decay"),
            ))

        self.lr_scheduler = self.context.wrap_lr_scheduler(
            self.build_lr_scheduler_from_config(self.optimizer),
            step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH,
        )

示例#23

0

显示文件

    def setup_optimizers_schedulers(
        self,
        context: PyTorchTrialContext,
        lightning_module: pl.LightningModule,
    ) -> Tuple[List[Optimizer], List[_LRScheduler]]:
        """
        Wrap optimizers and lr_schedulers returned by `configure_optimizers` to
        work with Determined.
        Return: Wrapped `optimizers`, and `lr_schedulers` in a tuple
        """
        optimizers, lr_scheduler_dicts, opt_frequencies = TrainerOptimizersMixin().init_optimizers(
            lightning_module,
        )
        # TODO(DET-5021) support custom frequencies with the manual step.
        for freq in opt_frequencies:
            check.eq(freq, 1, "custom optimizer frequencies are not supported")
        optimizers = cast(List[Optimizer], optimizers)
        lr_scheduler_dicts = cast(List[dict], lr_scheduler_dicts)

        def lightning_scheduler_dict_to_det(lrs: dict) -> _LRScheduler:
            """
            input_dict = {
                'scheduler': None,
                'name': None,  # no custom name
                'interval': 'epoch',  # after epoch is over
                'frequency': 1,  # every epoch/batch
                'reduce_on_plateau': False,  # most often not ReduceLROnPlateau scheduler
                'monitor': monitor,  # value to monitor for ReduceLROnPlateau
                'strict': True,  # enforce that the monitor exists for ReduceLROnPlateau
            }
            """
            if lrs["reduce_on_plateau"]:
                raise InvalidModelException("LRScheduler reduce_on_plateaue is not supported")
            if lrs["monitor"] is not None:
                raise InvalidModelException("LRScheduler monitor is not supported")

            step_mode = (
                LRScheduler.StepMode.STEP_EVERY_EPOCH
                if lrs["interval"] == "epoch"
                else LRScheduler.StepMode.STEP_EVERY_BATCH
            )
            return context.wrap_lr_scheduler(lrs["scheduler"], step_mode)

        optimizers = [context.wrap_optimizer(opt) for opt in optimizers]
        lr_schedulers = [lightning_scheduler_dict_to_det(lrs) for lrs in lr_scheduler_dicts]
        return optimizers, lr_schedulers

示例#24

0

显示文件

    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.data_config = context.get_data_config()
        self.num_classes = {
            "train": context.get_hparam("num_classes_train"),
            "val": context.get_hparam("num_classes_val"),
        }
        self.num_support = {
            "train": context.get_hparam("num_support_train"),
            "val": context.get_hparam("num_support_val"),
        }
        self.num_query = {
            "train": context.get_hparam("num_query_train"),
            "val":
            None,  # Use all available examples for val at meta-test time
        }
        self.get_train_valid_splits()

        x_dim = 1  # Omniglot is black and white
        hid_dim = self.context.get_hparam("hidden_dim")
        z_dim = self.context.get_hparam("embedding_dim")

        def conv_block(in_channels, out_channels):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(),
                nn.MaxPool2d(2),
            )

        self.model = self.context.wrap_model(
            nn.Sequential(
                conv_block(x_dim, hid_dim),
                conv_block(hid_dim, hid_dim),
                conv_block(hid_dim, hid_dim),
                conv_block(hid_dim, z_dim),
                Flatten(),
            ))

        self.optimizer = self.context.wrap_optimizer(
            torch.optim.Adam(
                self.model.parameters(),
                lr=self.context.get_hparam("learning_rate"),
                weight_decay=self.context.get_hparam("weight_decay"),
            ))

        self.lr_scheduler = self.context.wrap_lr_scheduler(
            torch.optim.lr_scheduler.StepLR(
                self.optimizer,
                self.context.get_hparam("reduce_every"),
                gamma=self.context.get_hparam("lr_gamma"),
            ), LRScheduler.StepMode.STEP_EVERY_EPOCH)

示例#25

0

显示文件

    def __init__(
        self,
        context: PyTorchTrialContext,
        lightning_module: pl.LightningModule,
        precision: Union[Literal[32], Literal[16]] = 32,
        amp_backend: Union[Literal["native"], Literal["apex"]] = "native",
        amp_level: Union[Literal["O0", "O1", "O2", "O3"]] = "O2",
    ):
        """
        This performs the necessary initialization steps to:

        1. check the compatibility of the provided ``LightningModule`` with ``LightningAdapter``.
        2. define a ``PyTorchTrial`` with models, optimizers, and LR schedulers that are provided
           by ``LightningModule``.
        3. patch the ``LightningModule`` methods that depend on a ``Trainer``.

        After inheriting this class, you need to override this function to initialize the adapted
        ``PyTorchTrial``.
        Within your ``__init__`` , you should instantiate the ``LightningModule`` and call
        ``super().__init__``.

        Here is a minimal code example.

        .. code-block:: python

            def __init__(self, context: PyTorchTrialContext) -> None:
                lm = mnist.LightningMNISTClassifier(lr=context.get_hparam('learning_rate'))
                super().__init__(context, lightning_module=lm)

        Arguments:
            context (PyTorchTrialContext)
            lightning_module (``LightningModule``):
                User-defined lightning module.
            precision (int, default=32):
                Precision to use.
                Accepted values are 16, and 32.
            amp_backend (str):
                Automatic mixed precision backend to use.
                Accepted values are "native", and "mixed".
            amp_level (str, optional, default="O2"):
                Apex amp optimization level.
                Accepted values are "O0", "O1", "O2", and "O3".
                https://nvidia.github.io/apex/amp.html#opt-levels-and-properties

        """

        check.check_in(precision, {16, 32},
                       "only precisions 16 & 32 are supported.")
        check.check_in(amp_backend, {"native", "apex"},
                       'only "native", and "apex" are supported')

        check_compatibility(lightning_module)
        override_unsupported_nud(lightning_module, context)

        if precision == 16 and amp_backend == "native":
            context.experimental.use_amp()

        context.wrap_model(lightning_module)

        pls = _LightningAdapterState(context, lightning_module, [], [])
        self._pls = pls
        pls.optimizers, pls.lr_schedulers = self.setup_optimizers_schedulers()

        if precision == 16 and amp_backend == "apex":
            context.configure_apex_amp(
                context.models,
                context.optimizers,
                enabled=True,
                opt_level=amp_level,
            )

        # set lightning_module properties
        pls.lm.use_ddp = False
        pls.lm.use_ddp2 = False
        pls.lm.use_dp = False
        pls.lm.use_tpu = False
        type(pls.lm).local_rank = context.distributed.get_local_rank(
        )  # type: ignore
        type(pls.lm).global_rank = context.distributed.get_rank(
        )  # type: ignore
        pls.lm.to(context.device)
        use_amp = context.experimental._auto_amp or context._use_apex
        pls.lm.use_amp = use_amp
        pls.lm.precision = "mixed" if use_amp else precision  # type: ignore

示例#26

0

显示文件

    def __init__(self, context: det_torch.PyTorchTrialContext) -> None:
        self.logger = logging.getLogger(__name__)
        self.hparams = attrdict.AttrDict(context.get_hparams())
        self.data_config = attrdict.AttrDict(context.get_data_config())
        self.context = context

        # Check to make sure the dataset is configured correctly.
        if self.data_config.dataset_name is not None:
            dataset_name = self.data_config.dataset_name
            if dataset_name == "squad":
                assert (not self.data_config.version_2_with_negative
                        ), "version_2_with_negative should be false for squad"
            elif dataset_name == "squad_v2":
                assert (
                    self.data_config.version_2_with_negative
                ), "version_2_with_negative should be true for squad_v2"

        self.data_processors = data_beam_search

        # Get the datasets: you can either provide your own CSV or JSON training and evaluation
        # files (see below) or just provide the name of one of the public datasets available on the
        # hub at https://huggingface.co/datasets/ (the dataset will be downloaded automatically
        # from the datasets Hub).

        # For CSV/JSON files, this script will use the column called 'text' or the first column if
        # no column called 'text' is found. You can easily tweak this behavior (see below).

        # See more about loading any type of standard or custom dataset (from files, python dict,
        # pandas DataFrame, etc) at
        # https://huggingface.co/docs/datasets/loading_datasets.html.
        self.raw_datasets = hf.default_load_dataset(self.data_config)
        self.column_names = self.raw_datasets["train"].column_names

        # For beam search, we need to use a different model from the default model returned by
        # AutoModelForQuestionAnswering.  We will use a custom init in this case that is a slight
        # modification of the BaseTransformerTrial init method.
        self.exp_config = attrdict.AttrDict(context.get_experiment_config())

        # Check to make sure all expected hyperparameters are set.
        self.check_hparams()

        # Parse hparams and data_config.
        (
            self.config_kwargs,
            self.tokenizer_kwargs,
            self.model_kwargs,
        ) = hf.default_parse_config_tokenizer_model_kwargs(self.hparams)
        optimizer_kwargs, scheduler_kwargs = hf.default_parse_optimizer_lr_scheduler_kwargs(
            self.hparams)

        self.config = transformers.XLNetConfig.from_pretrained(
            **self.config_kwargs)
        self.tokenizer = transformers.XLNetTokenizerFast.from_pretrained(
            **self.tokenizer_kwargs)

        # We need to use XLNetForQuestionAnswering instead of XLNetForQuestionAnsweringSimple
        # which is the default returned by AutoModelForQuestionAnswering.
        if self.hparams.use_pretrained_weights:
            self.model_kwargs["config"] = self.config
            self.model = transformers.XLNetForQuestionAnswering.from_pretrained(
                **self.model_kwargs)
        else:
            self.model = transformers.XLNetForQuestionAnswering(self.config)
        self.model = self.context.wrap_model(self.model)

        # The rest is the same as the parent init method.
        self.optimizer = self.context.wrap_optimizer(
            hf.build_default_optimizer(self.model, optimizer_kwargs))

        if self.hparams.use_apex_amp:
            self.model, self.optimizer = self.context.configure_apex_amp(
                models=self.model,
                optimizers=self.optimizer,
            )

        self.lr_scheduler = self.context.wrap_lr_scheduler(
            hf.build_default_lr_scheduler(self.optimizer, scheduler_kwargs),
            det_torch.LRScheduler.StepMode.STEP_EVERY_BATCH,
        )
        self.grad_clip_fn = (
            lambda x: torch.nn.utils.clip_grad_norm_(
                x, optimizer_kwargs.max_grad_norm)
            if optimizer_kwargs.max_grad_norm > 0  # type: ignore
            else None)

        self.logger.info(self.config)

        if not isinstance(self.tokenizer,
                          transformers.PreTrainedTokenizerFast):
            raise ValueError(
                "This example script only works for models that have a fast tokenizer. Checkout "
                "the big table of models at "
                "https://huggingface.co/transformers/index.html#bigtable to find the model types "
                "that meet this requirement")

        # We need to create the tokenized dataset after init because we need to model and
        # tokenizer to be available.
        self.tokenized_datasets = self.build_datasets()
        train_length = len(self.tokenized_datasets["train"])
        self.logger.info("training records: {}".format(train_length))
        if ("records_per_epoch" in self.exp_config
                and train_length != self.exp_config["records_per_epoch"]):
            self.logger.warning(
                "number of train records {} does not match records_per_epoch of {}"
                .format(train_length, self.exp_config["records_per_epoch"]))

        # Create metric reducer
        metric = datasets.load_metric("squad_v2" if self.data_config.
                                      version_2_with_negative else "squad")

        self.reducer = context.experimental.wrap_reducer(
            functools.partial(
                qa_utils.compute_metrics,
                self.data_config,
                self.column_names,
                self.data_processors.post_processing_function,
                self.raw_datasets,
                self.tokenized_datasets,
                self.model,
                metric,
            ),
            for_training=False,
        )

示例#27

0

显示文件

文件： qa_trial.py 项目： justin-determined-ai/determined

    def __init__(self, context: det_torch.PyTorchTrialContext) -> None:
        self.logger = logging.getLogger(__name__)
        super(QATrial, self).__init__(context)
        self.logger.info(self.config)

        # Check to make sure the dataset is configured correctly.
        if self.data_config.dataset_name is not None:
            dataset_name = self.data_config.dataset_name
            if dataset_name == "squad":
                assert (
                    not self.data_config.version_2_with_negative
                ), "version_2_with_negative should be false for squad"
            elif dataset_name == "squad_v2":
                assert (
                    self.data_config.version_2_with_negative
                ), "version_2_with_negative should be true for squad_v2"

        self.data_processors = data

        # Get the datasets: you can either provide your own CSV or JSON training and evaluation
        # files (see below) or just provide the name of one of the public datasets available on the
        # hub at https://huggingface.co/datasets/ (the dataset will be downloaded automatically
        # from the datasets Hub).

        # For CSV/JSON files, this script will use the column called 'text' or the first column if
        # no column called 'text' is found. You can easily tweak this behavior (see below).

        # See more about loading any type of standard or custom dataset (from files, python dict,
        # pandas DataFrame, etc) at
        # https://huggingface.co/docs/datasets/loading_datasets.html.
        self.raw_datasets = hf.default_load_dataset(self.data_config)
        self.column_names = self.raw_datasets["train"].column_names

        if not isinstance(self.tokenizer, transformers.PreTrainedTokenizerFast):
            raise ValueError(
                "This example script only works for models that have a fast tokenizer. Checkout "
                "the big table of models at "
                "https://huggingface.co/transformers/index.html#bigtable to find the model types "
                "that meet this requirement"
            )

        # We need to create the tokenized dataset after init because we need to model and
        # tokenizer to be available.
        self.tokenized_datasets = self.build_datasets()
        train_length = len(self.tokenized_datasets["train"])
        self.logger.info("training records: {}".format(train_length))
        if (
            "records_per_epoch" in self.exp_config
            and train_length != self.exp_config["records_per_epoch"]
        ):
            self.logger.warning(
                "number of train records {} does not match records_per_epoch of {}".format(
                    train_length, self.exp_config["records_per_epoch"]
                )
            )

        # Create metric reducer
        metric = datasets.load_metric(
            "squad_v2" if self.data_config.version_2_with_negative else "squad"
        )

        self.reducer = context.wrap_reducer(
            functools.partial(
                qa_utils.compute_metrics,
                self.data_config,
                self.column_names,
                self.data_processors.post_processing_function,
                self.raw_datasets,
                self.tokenized_datasets,
                self.model,
                metric,
            ),
            for_training=False,
        )

示例#28

0

显示文件

    def __init__(self, context: det_torch.PyTorchTrialContext) -> None:
        self.logger = logging.getLogger(__name__)
        self.hparams = attrdict.AttrDict(context.get_hparams())
        self.data_config = attrdict.AttrDict(context.get_data_config())
        self.context = context

        # Load dataset and get metadata.
        # This needs to be done before we initialize the HF config, tokenizer, and model
        # because we need to know num_labels before doing so.

        # For CSV/JSON files, this example will use as labels the column called `label` and as pair
        # of sentences the sentences in columns called `sentence1` and `sentence2` if such column
        # exists or the first two columns not named label if at least two columns are provided.
        #
        # If the CSVs/JSONs contain only one non-label column, the example will do single sentence
        # classification on this single column.

        # See more about loading any type of standard or custom dataset at
        # https://huggingface.co/docs/datasets/loading_datasets.html.

        self.raw_datasets = hf.default_load_dataset(self.data_config)

        if self.hparams.finetuning_task is not None:
            is_regression = self.hparams.finetuning_task == "stsb"
            if not is_regression:
                label_list = self.raw_datasets["train"].features["label"].names
                num_labels = len(label_list)
            else:
                num_labels = 1
        else:
            # Trying to have good defaults here, don't hesitate to tweak to your needs.
            is_regression = self.raw_datasets["train"].features[
                "label"].dtype in [
                    "float32",
                    "float64",
                ]
            if is_regression:
                num_labels = 1
            else:
                # A useful fast method is datasets.Dataset.unique from
                # https://huggingface.co/docs/datasets/package_reference/main_classes.html
                label_list = self.raw_datasets["train"].unique("label")
                label_list.sort()  # Let's sort it for determinism
                num_labels = len(label_list)
        self.is_regression = is_regression
        self.hparams.num_labels = num_labels
        if not self.is_regression:
            self.label_list = label_list

        super(GLUETrial, self).__init__(context)
        self.logger.info(self.config)

        # We need to create the tokenized dataset after init because we need to model and
        # tokenizer to be available.
        self.tokenized_datasets = self.build_datasets()
        train_length = len(self.tokenized_datasets["train"])
        self.logger.info("training records: {}".format(train_length))
        if ("records_per_epoch" in self.exp_config
                and train_length != self.exp_config["records_per_epoch"]):
            self.logger.warning(
                "number of train records {} does not match records_per_epoch of {}"
                .format(train_length, self.exp_config["records_per_epoch"]))

        # Create metric reducer
        metric = datasets.load_metric("glue", self.hparams.finetuning_task)

        # You can define your custom compute_metrics function. It takes an `EvalPrediction` object
        # (a namedtuple with a predictions and label_ids field) and has to return a dictionary
        # mapping string to float.
        def compute_metrics(pred_labels) -> Dict:
            preds, labels = zip(*pred_labels)
            preds = utils.expand_like(preds)
            labels = utils.expand_like(labels)
            preds = np.squeeze(preds) if is_regression else np.argmax(preds,
                                                                      axis=1)
            if self.hparams.finetuning_task is not None:
                result = metric.compute(predictions=preds, references=labels)
                if len(result) > 1:
                    result["combined_score"] = np.mean(list(
                        result.values())).item()
                return result
            elif is_regression:
                return {"mse": ((preds - labels)**2).mean().item()}
            else:
                return {
                    "accuracy":
                    (preds == labels).astype(np.float32).mean().item()
                }

        self.reducer = context.wrap_reducer(compute_metrics,
                                            for_training=False)

示例#29

0

显示文件

文件： _callbacks.py 项目： justin-determined-ai/determined

 def __init__(self, context: det_torch.PyTorchTrialContext):
     self.context = context
     self._data_loader = None  # type: Optional[DummyDataloader]
     experiment_config = context.get_experiment_config()
     self.max_length = experiment_config["searcher"]["max_length"]

示例#30

0

显示文件

 def __init__(self, context: PyTorchTrialContext) -> None:
     self.scaler = context.wrap_scaler(GradScaler())
     super().__init__(context)