Exemplo n.º 1
0
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 lr_scheduler_func: Optional[Callable] = None,
                 weight_decay: float = 0.0,
                 optimizer_config_list: List[Dict[str, Union[float, List[str],
                                                             str]]] = None,
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None,
                 **kwargs):
        stage_list = None

        if load_model_path is None:
            if optimizer_config_list is None:
                raise ValueError(
                    'Must provide ``optimizer_config_list`` when initializing a new multi-stage '
                    'model!')
            else:
                stage_list = list(
                    OrderedDict.fromkeys([
                        optimizer_config['stage']
                        for optimizer_config in optimizer_config_list
                    ]))

        super().__init__(stage_list=stage_list, **get_init_arguments())

        if load_model_path is None:
            # set stage if we have not already loaded it in and set it there
            self.hparams.stage = self.hparams.stage_list[0]
            self.set_stage(self.hparams.stage)
 def __init__(self,
              train: INTERACTIONS_LIKE_INPUT = None,
              val: INTERACTIONS_LIKE_INPUT = None,
              embedding_dim: int = 30,
              sparse: bool = False,
              lr: float = 1e-3,
              lr_scheduler_func: Optional[Callable] = partial(
                  ReduceLROnPlateau, patience=1, verbose=True),
              weight_decay: float = 0.0,
              optimizer: Union[str, Callable] = 'adam',
              loss: Union[str, Callable] = 'hinge',
              metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
              metadata_for_loss_weights: Optional[Dict[str, float]] = None,
              y_range: Optional[Tuple[float, float]] = None,
              load_model_path: Optional[str] = None,
              map_location: Optional[str] = None):
     super().__init__(**get_init_arguments())
    def __init__(
            self,
            train: INTERACTIONS_LIKE_INPUT = None,
            val: INTERACTIONS_LIKE_INPUT = None,
            item_metadata: Union[torch.tensor, pd.DataFrame, np.array] = None,
            trained_model: MatrixFactorizationModel = None,
            metadata_layers_dims: Optional[List[int]] = None,
            combined_layers_dims: List[int] = [128, 64, 32],
            freeze_embeddings: bool = True,
            dropout_p: float = 0.0,
            lr: float = 1e-3,
            lr_scheduler_func: Optional[Callable] = partial(ReduceLROnPlateau,
                                                            patience=1,
                                                            verbose=True),
            weight_decay: float = 0.0,
            optimizer: Union[str, Callable] = 'adam',
            loss: Union[str, Callable] = 'hinge',
            metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
            metadata_for_loss_weights: Optional[Dict[str, float]] = None,
            # y_range: Optional[Tuple[float, float]] = None,
            load_model_path: Optional[str] = None,
            map_location: Optional[str] = None):
        item_metadata_num_cols = None
        if load_model_path is None:
            if trained_model is None:
                raise ValueError(
                    'Must provide ``trained_model`` for ``HybridPretrainedModel``.'
                )

            if item_metadata is None:
                raise ValueError(
                    'Must provide item metadata for ``HybridPretrainedModel``.'
                )
            elif isinstance(item_metadata, pd.DataFrame):
                item_metadata = torch.from_numpy(item_metadata.to_numpy())
            elif isinstance(item_metadata, np.ndarray):
                item_metadata = torch.from_numpy(item_metadata)

            item_metadata = item_metadata.float()

            item_metadata_num_cols = item_metadata.shape[1]

        super().__init__(**get_init_arguments(),
                         item_metadata_num_cols=item_metadata_num_cols)
 def __init__(self,
              train: INTERACTIONS_LIKE_INPUT = None,
              val: INTERACTIONS_LIKE_INPUT = None,
              user_embedding_dim: int = 60,
              item_embedding_dim: int = 60,
              user_dense_layers_dims: List[float] = [48, 32],
              item_dense_layers_dims: List[float] = [48, 32],
              embedding_dropout_p: float = 0.0,
              dense_dropout_p: float = 0.0,
              lr: float = 1e-3,
              bias_lr: Optional[Union[float, str]] = 1e-2,
              lr_scheduler_func: Optional[Callable] = partial(
                  ReduceLROnPlateau, patience=1, verbose=True),
              weight_decay: float = 0.0,
              optimizer: Union[str, Callable] = 'adam',
              bias_optimizer: Optional[Union[str, Callable]] = 'sgd',
              loss: Union[str, Callable] = 'hinge',
              metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
              metadata_for_loss_weights: Optional[Dict[str, float]] = None,
              y_range: Optional[Tuple[float, float]] = None,
              load_model_path: Optional[str] = None,
              map_location: Optional[str] = None):
     super().__init__(**get_init_arguments())
Exemplo n.º 5
0
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 lr: float = 1e-3,
                 lr_scheduler_func: Optional[Callable] = None,
                 weight_decay: float = 0.0,
                 optimizer: Union[str, Callable] = 'adam',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None,
                 **kwargs):
        if isinstance(train, Interactions) or isinstance(
                train, ExplicitInteractions):
            train = InteractionsDataLoader(interactions=train, shuffle=True)
        if isinstance(val, Interactions) or isinstance(val,
                                                       ExplicitInteractions):
            val = InteractionsDataLoader(interactions=val, shuffle=False)

        super().__init__()

        # save datasets as class-level attributes and NOT ``hparams`` so model checkpointing /
        # saving can complete faster
        self.train_loader = train
        self.val_loader = val

        # potential issue with PyTorch Lightning is that a function cannot be saved as a
        # hyperparameter, so we will sidestep this by setting it as a class-level attribute
        # https://github.com/PyTorchLightning/pytorch-lightning/issues/2444
        self.lr_scheduler_func = lr_scheduler_func
        self.loss = loss
        self.optimizer = optimizer
        self.bias_optimizer = kwargs.get('bias_optimizer')

        if load_model_path is not None:
            # we are loading in a previously-saved model, not creating a new one
            self._load_model_init_helper(load_model_path=load_model_path,
                                         map_location=map_location,
                                         **kwargs)
        else:
            if self.train_loader is None:
                raise TypeError(
                    '``train`` must be provided to all newly-instantiated models!'
                )
            elif self.val_loader is not None:
                assert self.train_loader.num_users == self.val_loader.num_users, (
                    'Both training and val ``num_users`` must equal: '
                    f'{self.train_loader.num_users} != {self.val_loader.num_users}.'
                )
                assert self.train_loader.num_items == self.val_loader.num_items, (
                    'Both training and val ``num_items`` must equal: '
                    f'{self.train_loader.num_items} != {self.val_loader.num_items}.'
                )

                if (hasattr(self.train_loader, 'num_negative_samples')
                        or hasattr(self.val_loader, 'num_negative_samples')):
                    num_negative_samples_error = (
                        'Training and val ``num_negative_samples`` property must both equal ``1``'
                        f' or both be greater than ``1``, not: {self.train_loader.num_items} and'
                        f' {self.val_loader.num_items}, respectively.')
                    if self.train_loader.num_negative_samples == 1:
                        assert self.val_loader.num_negative_samples == 1, num_negative_samples_error
                    elif self.train_loader.num_negative_samples > 1:
                        assert self.val_loader.num_negative_samples > 1, num_negative_samples_error
                    else:
                        raise ValueError(
                            '``self.train_loader.num_negative_samples`` must be greater than ``0``,'
                            f' not {self.train_loader.num_negative_samples}.')

            # saves all passed-in parameters
            init_args = get_init_arguments(
                exclude=['train', 'val', 'item_metadata', 'trained_model'],
                verbose=False,
            )

            self.save_hyperparameters(init_args, *kwargs.keys())

            self.hparams.num_users = self.train_loader.num_users
            self.hparams.num_items = self.train_loader.num_items
            self.hparams.num_epochs_completed = 0

            self._configure_loss()

            # check weight decay and sparsity
            if hasattr(self.hparams, 'sparse'):
                if self.hparams.sparse and self.hparams.weight_decay != 0:
                    warnings.warn(
                        textwrap.dedent(f'''
                            ``weight_decay`` value must be 0 when ``sparse`` is flagged, not
                            {self.hparams.weight_decay}. Setting to 0.
                            ''').replace('\n', ' ').strip())
                    self.hparams.weight_decay = 0.0

            # set up the actual model
            self._setup_model(**kwargs)
Exemplo n.º 6
0
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 item_metadata: Union[torch.tensor, pd.DataFrame, np.array] = None,
                 embedding_dim: int = 30,
                 metadata_layers_dims: Optional[List[int]] = None,
                 combined_layers_dims: List[int] = [128, 64, 32],
                 dropout_p: float = 0.0,
                 lr: float = 1e-3,
                 bias_lr: Optional[Union[float, str]] = 1e-2,
                 metadata_only_stage_lr: float = 1e-3,
                 all_stage_lr: float = 1e-4,
                 lr_scheduler_func: Optional[Callable] = partial(
                     ReduceLROnPlateau,
                     patience=1,
                     verbose=False,
                 ),
                 weight_decay: float = 0.0,
                 optimizer: Union[str, Callable] = 'adam',
                 bias_optimizer: Optional[Union[str, Callable]] = 'sgd',
                 metadata_only_stage_optimizer: Union[str, Callable] = 'adam',
                 all_stage_optimizer: Union[str, Callable] = 'adam',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        item_metadata_num_cols = None
        optimizer_config_list = None

        if load_model_path is None:
            if item_metadata is None:
                raise ValueError('Must provide item metadata for ``HybridPretrainedModel``.')
            elif isinstance(item_metadata, pd.DataFrame):
                item_metadata = torch.from_numpy(item_metadata.to_numpy())
            elif isinstance(item_metadata, np.ndarray):
                item_metadata = torch.from_numpy(item_metadata)

            item_metadata = item_metadata.float()

            item_metadata_num_cols = item_metadata.shape[1]

            if bias_optimizer is not None:
                initial_optimizer_block = [
                    {
                        'lr': lr,
                        'optimizer': optimizer,
                        # optimize embeddings...
                        'parameter_prefix_list': ['user_embedding', 'item_embedding'],
                        'stage': 'matrix_factorization',
                    },
                    {
                        'lr': lr if bias_lr == 'infer' else bias_lr,
                        'optimizer': optimizer if bias_optimizer == 'infer' else bias_optimizer,
                        # ... and optimize bias terms too
                        'parameter_prefix_list': ['user_bias', 'item_bias'],
                        'stage': 'matrix_factorization',
                    },
                ]
            else:
                initial_optimizer_block = [
                    {
                        'lr': lr,
                        'optimizer': optimizer,
                        # optimize embeddings and bias terms all together
                        'parameter_prefix_list': [
                            'user_embedding',
                            'item_embedding',
                            'user_bias',
                            'item_bias'],
                        'stage': 'matrix_factorization',
                    },
                ]

            optimizer_config_list = initial_optimizer_block + [
                {
                    'lr': metadata_only_stage_lr,
                    'optimizer': metadata_only_stage_optimizer,
                    # optimize metadata layers only
                    'parameter_prefix_list': ['metadata', 'combined', 'user_bias', 'item_bias'],
                    'stage': 'metadata_only',
                },
                {
                    'lr': all_stage_lr,
                    'optimizer': all_stage_optimizer,
                    # optimize everything
                    'parameter_prefix_list': ['user', 'item', 'metadata', 'combined'],
                    'stage': 'all',
                },
            ]

        super().__init__(optimizer_config_list=optimizer_config_list,
                         item_metadata_num_cols=item_metadata_num_cols,
                         **get_init_arguments())
Exemplo n.º 7
0
        def __init__(self, var_1, var_2=2468, **kwargs):
            super().__init__()

            self.actual = get_init_arguments(exclude=['var_4'], verbose=True)
Exemplo n.º 8
0
        def __init__(self, var_1, var_2=2468, **kwargs):
            super().__init__()

            self.actual = get_init_arguments(exclude=['var_2', 'var_3'])
Exemplo n.º 9
0
        def __init__(self, var_1, var_2=12345, **kwargs):
            super().__init__()

            self.actual = get_init_arguments()
Exemplo n.º 10
0
        def __init__(self, **kwargs):
            super().__init__()

            self.actual = get_init_arguments()
Exemplo n.º 11
0
        def __init__(self, var_1, var_2=54321):
            super().__init__()

            self.actual = get_init_arguments()
Exemplo n.º 12
0
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 item_buckets: Iterable[int] = None,
                 embedding_dim: int = 30,
                 dropout_p: float = 0.0,
                 sparse: bool = False,
                 item_buckets_stage_lr: float = 1e-3,
                 no_buckets_stage_lr: float = 1e-3,
                 lr_scheduler_func: Optional[Callable] = partial(
                     ReduceLROnPlateau,
                     patience=1,
                     verbose=False,
                 ),
                 weight_decay: float = 0.0,
                 item_buckets_stage_optimizer: Union[str, Callable] = 'adam',
                 no_buckets_stage_optimizer: Union[str, Callable] = 'adam',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        optimizer_config_list = None
        num_item_buckets = None

        if load_model_path is None:
            # TODO: separate out optimizer and bias optimizer somehow
            optimizer_config_list = [
                {
                    'lr': item_buckets_stage_lr,
                    'optimizer': item_buckets_stage_optimizer,
                    'parameter_prefix_list': [
                        'user_embed',
                        'user_bias',
                        'item_bucket_embed',
                        'item_bucket_bias',
                    ],
                    'stage': 'item_buckets',
                },
                {
                    'lr': no_buckets_stage_lr,
                    'optimizer': no_buckets_stage_optimizer,
                    'parameter_prefix_list': [
                        'user_embed',
                        'user_bias',
                        'item_embed',
                        'item_bias',
                    ],
                    'stage': 'no_buckets',
                },
            ]

            if not isinstance(item_buckets, torch.Tensor):
                item_buckets = torch.tensor(item_buckets)

            # data quality checks for ``item_buckets``
            assert item_buckets.dim() == 1, (
                f'``item_buckets`` must be 1-dimensional, not {item_buckets.dim()}-dimensional!'
            )
            if len(item_buckets) != train.num_items:
                raise ValueError(
                    'Length of ``item_buckets`` must be equal to the number of items in the '
                    f'dataset: {len(item_buckets)} != {train.num_items}.'
                )
            if min(item_buckets) != 0:
                raise ValueError(f'``item_buckets`` IDs must start at 0, not {min(item_buckets)}!')

            num_item_buckets = item_buckets.max().item() + 1

        super().__init__(optimizer_config_list=optimizer_config_list,
                         num_item_buckets=num_item_buckets,
                         **get_init_arguments())