Пример #1
0
    def __init__(self,
                 input_features: Dict[str, "InputFeature"] = None,
                 config: TransformerCombinerConfig = None,
                 **kwargs):
        super().__init__(input_features)
        self.name = "TransformerCombiner"
        logger.debug(f" {self.name}")

        self.reduce_output = config.reduce_output
        self.reduce_sequence = SequenceReducer(
            reduce_mode=config.reduce_output,
            max_sequence_length=len(self.input_features),
            encoding_size=config.hidden_size,
        )
        if self.reduce_output is None:
            self.supports_masking = True

        # sequence size for Transformer layer is number of input features
        self.sequence_size = len(self.input_features)

        logger.debug("  Projectors")
        self.projectors = ModuleList(
            # regardless of rank-2 or rank-3 input, torch.prod() calculates size
            # after flattening the encoder output tensor
            [
                Linear(
                    torch.prod(
                        torch.Tensor([*input_features[inp].output_shape
                                      ])).type(torch.int32),
                    config.hidden_size) for inp in input_features
            ])

        logger.debug("  TransformerStack")
        self.transformer_stack = TransformerStack(
            input_size=config.hidden_size,
            sequence_size=self.sequence_size,
            hidden_size=config.hidden_size,
            num_heads=config.num_heads,
            output_size=config.transformer_output_size,
            num_layers=config.num_layers,
            dropout=config.dropout,
        )

        if self.reduce_output is not None:
            logger.debug("  FCStack")
            self.fc_stack = FCStack(
                self.transformer_stack.output_shape[-1],
                layers=config.fc_layers,
                num_layers=config.num_fc_layers,
                default_output_size=config.output_size,
                default_use_bias=config.use_bias,
                default_weights_initializer=config.weights_initializer,
                default_bias_initializer=config.bias_initializer,
                default_norm=config.norm,
                default_norm_params=config.norm_params,
                default_activation=config.fc_activation,
                default_dropout=config.fc_dropout,
                fc_residual=config.fc_residual,
            )
Пример #2
0
    def __init__(
        self, input_features: Dict[str, "InputFeature"] = None, config: TabTransformerCombinerConfig = None, **kwargs
    ):
        super().__init__(input_features)
        self.name = "TabTransformerCombiner"
        logger.debug(f"Initializing {self.name}")

        if config.reduce_output is None:
            raise ValueError("TabTransformer requires the `reduce_output` " "parameter")
        self.reduce_output = config.reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=config.reduce_output)
        self.supports_masking = True

        self.embed_input_feature_name = config.embed_input_feature_name
        if self.embed_input_feature_name:
            vocab = [
                i_f
                for i_f in input_features
                if input_features[i_f].type != NUMERICAL or input_features[i_f].type != BINARY
            ]
            if self.embed_input_feature_name == "add":
                self.embed_i_f_name_layer = Embed(vocab, config.hidden_size, force_embedding_size=True)
                projector_size = config.hidden_size
            elif isinstance(self.embed_input_feature_name, int):
                if self.embed_input_feature_name > config.hidden_size:
                    raise ValueError(
                        "TabTransformer parameter "
                        "`embed_input_feature_name` "
                        "specified integer value ({}) "
                        "needs to be smaller than "
                        "`hidden_size` ({}).".format(self.embed_input_feature_name, config.hidden_size)
                    )
                self.embed_i_f_name_layer = Embed(
                    vocab,
                    self.embed_input_feature_name,
                    force_embedding_size=True,
                )
                projector_size = config.hidden_size - self.embed_input_feature_name
            else:
                raise ValueError(
                    "TabTransformer parameter "
                    "`embed_input_feature_name` "
                    "should be either None, an integer or `add`, "
                    "the current value is "
                    "{}".format(self.embed_input_feature_name)
                )
        else:
            projector_size = config.hidden_size

        logger.debug("  Projectors")
        self.unembeddable_features = []
        self.embeddable_features = []
        for i_f in input_features:
            if input_features[i_f].type in {NUMERICAL, BINARY}:
                self.unembeddable_features.append(input_features[i_f].name)
            else:
                self.embeddable_features.append(input_features[i_f].name)

        self.projectors = ModuleList()
        for i_f in self.embeddable_features:
            flatten_size = self.get_flatten_size(input_features[i_f].output_shape)
            self.projectors.append(Linear(flatten_size[0], projector_size))

        # input to layer_norm are the encoder outputs for unembeddable features,
        # which are numerical or binary features.  These should be 2-dim
        # tensors.  Size should be concatenation of these tensors.
        concatenated_unembeddable_encoders_size = 0
        for i_f in self.unembeddable_features:
            concatenated_unembeddable_encoders_size += input_features[i_f].output_shape[0]

        self.layer_norm = torch.nn.LayerNorm(concatenated_unembeddable_encoders_size)

        logger.debug("  TransformerStack")
        self.transformer_stack = TransformerStack(
            input_size=config.hidden_size,
            sequence_size=len(self.embeddable_features),
            hidden_size=config.hidden_size,
            # todo: can we just use projector_size? # hidden_size,
            num_heads=config.num_heads,
            fc_size=config.transformer_fc_size,
            num_layers=config.num_layers,
            dropout=config.dropout,
        )

        logger.debug("  FCStack")
        transformer_hidden_size = self.transformer_stack.layers[-1].output_shape[-1]

        # determine input size to fully connected layer based on reducer
        if config.reduce_output == "concat":
            num_embeddable_features = len(self.embeddable_features)
            fc_input_size = num_embeddable_features * transformer_hidden_size
        else:
            fc_input_size = transformer_hidden_size if len(self.embeddable_features) > 0 else 0
        self.fc_stack = FCStack(
            fc_input_size + concatenated_unembeddable_encoders_size,
            layers=config.fc_layers,
            num_layers=config.num_fc_layers,
            default_fc_size=config.fc_size,
            default_use_bias=config.use_bias,
            default_weights_initializer=config.weights_initializer,
            default_bias_initializer=config.bias_initializer,
            default_norm=config.norm,
            default_norm_params=config.norm_params,
            default_activation=config.fc_activation,
            default_dropout=config.fc_dropout,
            fc_residual=config.fc_residual,
        )

        # Create empty tensor of shape [1, 0] to use as hidden in case there are no category or numeric/binary features.
        self.register_buffer("empty_hidden", torch.empty([1, 0]))
        self.register_buffer("embeddable_features_indices", torch.arange(0, len(self.embeddable_features)))
Пример #3
0
    def __init__(
            self,
            input_features=None,
            embed_input_feature_name=None,  # None or embedding size or "add"
            num_layers=1,
            hidden_size=256,
            num_heads=8,
            transformer_fc_size=256,
            dropout=0.1,
            fc_layers=None,
            num_fc_layers=0,
            fc_size=256,
            use_bias=True,
            weights_initializer='glorot_uniform',
            bias_initializer='zeros',
            weights_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            # weights_constraint=None,
            # bias_constraint=None,
            norm=None,
            norm_params=None,
            fc_activation='relu',
            fc_dropout=0,
            fc_residual=False,
            reduce_output='concat',
            **kwargs
    ):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        if reduce_output is None:
            raise ValueError("TabTransformer requires the `resude_output` "
                             "parametr")
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.supports_masking = True
        self.layer_norm = LayerNormalization()

        self.embed_input_feature_name = embed_input_feature_name
        if self.embed_input_feature_name:
            vocab = [i_f for i_f in input_features
                     if i_f[TYPE] != NUMERICAL or i_f[TYPE] != BINARY]
            if self.embed_input_feature_name == 'add':
                self.embed_i_f_name_layer = Embed(vocab, hidden_size,
                                                  force_embedding_size=True)
                projector_size = hidden_size
            elif isinstance(self.embed_input_feature_name, int):
                if self.embed_input_feature_name > hidden_size:
                    raise ValueError(
                        "TabTransformer parameter "
                        "`embed_input_feature_name` "
                        "specified integer value ({}) "
                        "needs to be smaller than "
                        "`hidden_size` ({}).".format(
                            self.embed_input_feature_name, hidden_size
                        ))
                self.embed_i_f_name_layer = Embed(
                    vocab,
                    self.embed_input_feature_name,
                    force_embedding_size=True,
                )
                projector_size = hidden_size - self.embed_input_feature_name
            else:
                raise ValueError("TabTransformer parameter "
                                 "`embed_input_feature_name` "
                                 "should be either None, an integer or `add`, "
                                 "the current value is "
                                 "{}".format(self.embed_input_feature_name))
        else:
            projector_size = hidden_size

        logger.debug('  Projectors')
        self.projectors = [Dense(projector_size) for i_f in input_features
                           if i_f[TYPE] != NUMERICAL and i_f[TYPE] != BINARY]
        self.skip_features = [i_f[NAME] for i_f in input_features
                              if i_f[TYPE] == NUMERICAL or i_f[TYPE] == BINARY]

        logger.debug('  TransformerStack')
        self.transformer_stack = TransformerStack(
            hidden_size=hidden_size,
            num_heads=num_heads,
            fc_size=transformer_fc_size,
            num_layers=num_layers,
            dropout=dropout
        )

        logger.debug('  FCStack')
        self.fc_stack = FCStack(
            layers=fc_layers,
            num_layers=num_fc_layers,
            default_fc_size=fc_size,
            default_use_bias=use_bias,
            default_weights_initializer=weights_initializer,
            default_bias_initializer=bias_initializer,
            default_weights_regularizer=weights_regularizer,
            default_bias_regularizer=bias_regularizer,
            default_activity_regularizer=activity_regularizer,
            # default_weights_constraint=weights_constraint,
            # default_bias_constraint=bias_constraint,
            default_norm=norm,
            default_norm_params=norm_params,
            default_activation=fc_activation,
            default_dropout=fc_dropout,
            fc_residual=fc_residual,
        )
Пример #4
0
    def __init__(
            self,
            input_features=None,
            num_layers=1,
            hidden_size=256,
            num_heads=8,
            transformer_fc_size=256,
            dropout=0.1,
            fc_layers=None,
            num_fc_layers=0,
            fc_size=256,
            use_bias=True,
            weights_initializer='glorot_uniform',
            bias_initializer='zeros',
            weights_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            # weights_constraint=None,
            # bias_constraint=None,
            norm=None,
            norm_params=None,
            fc_activation='relu',
            fc_dropout=0,
            fc_residual=False,
            reduce_output='mean',
            **kwargs
    ):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        if self.reduce_output is None:
            self.supports_masking = True

        logger.debug('  Projectors')
        self.projectors = [Dense(hidden_size) for _ in input_features]

        logger.debug('  TransformerStack')
        self.transformer_stack = TransformerStack(
            hidden_size=hidden_size,
            num_heads=num_heads,
            fc_size=transformer_fc_size,
            num_layers=num_layers,
            dropout=dropout
        )

        if self.reduce_output is not None:
            logger.debug('  FCStack')
            self.fc_stack = FCStack(
                layers=fc_layers,
                num_layers=num_fc_layers,
                default_fc_size=fc_size,
                default_use_bias=use_bias,
                default_weights_initializer=weights_initializer,
                default_bias_initializer=bias_initializer,
                default_weights_regularizer=weights_regularizer,
                default_bias_regularizer=bias_regularizer,
                default_activity_regularizer=activity_regularizer,
                # default_weights_constraint=weights_constraint,
                # default_bias_constraint=bias_constraint,
                default_norm=norm,
                default_norm_params=norm_params,
                default_activation=fc_activation,
                default_dropout=fc_dropout,
                fc_residual=fc_residual,
            )