Пример #1
0
    def __init__(self, feature, *args, **kwargs):
        super().__init__(*args, feature=feature, **kwargs)

        self.reduce_input = None
        self.reduce_dependencies = None
        self.dependencies = []

        self.fc_layers = None
        self.num_fc_layers = 0
        self.fc_size = 256
        self.use_bias = True
        self.weights_initializer = 'glorot_uniform'
        self.bias_initializer = 'zeros'
        self.weights_regularizer = None
        self.bias_regularizer = None
        self.activity_regularizer = None
        # self.weights_constraint=None
        # self.bias_constraint=None
        self.norm = None
        self.norm_params = None
        self.activation = 'relu'
        self.dropout = 0

        self.overwrite_defaults(feature)

        logger.debug(' output feature fully connected layers')
        logger.debug('  FCStack')
        self.fc_stack = FCStack(
            layers=self.fc_layers,
            num_layers=self.num_fc_layers,
            default_fc_size=self.fc_size,
            default_use_bias=self.use_bias,
            default_weights_initializer=self.weights_initializer,
            default_bias_initializer=self.bias_initializer,
            default_weights_regularizer=self.weights_regularizer,
            default_bias_regularizer=self.bias_regularizer,
            default_activity_regularizer=self.activity_regularizer,
            # default_weights_constraint=self.weights_constraint,
            # default_bias_constraint=self.bias_constraint,
            default_norm=self.norm,
            default_norm_params=self.norm_params,
            default_activation=self.activation,
            default_dropout=self.dropout,
        )

        # set up two sequence reducers, one for inputs and other for dependencies
        self.reduce_sequence_input = SequenceReducer(
            reduce_mode=self.reduce_input
        )
        if self.dependencies:
            self.dependency_reducers = {}
            for dependency in self.dependencies:
                self.dependency_reducers[dependency] = SequenceReducer(
                    reduce_mode=self.reduce_dependencies
                )
Пример #2
0
    def __init__(
        self,
        hidden_size: int,
        vocab_size: int,
        max_sequence_length: int,
        cell_type: str,
        num_layers: int = 1,
        reduce_input="sum",
    ):
        super().__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.rnn_decoder = RNNDecoder(hidden_size,
                                      vocab_size,
                                      cell_type,
                                      num_layers=num_layers)
        self.max_sequence_length = max_sequence_length
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_input)
        self.num_layers = num_layers

        self.register_buffer("logits",
                             torch.zeros([max_sequence_length, vocab_size]))
        self.register_buffer(
            "decoder_input",
            torch.Tensor([strings_utils.SpecialSymbol.START.value]))
Пример #3
0
    def __init__(
            self,
            pretrained_model_name_or_path='google/electra-small-discriminator',
            reduce_output='sum',
            trainable=False,
            num_tokens=None,
            **kwargs
    ):
        super(ELECTRAEncoder, self).__init__()
        try:
            from transformers import TFElectraModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFElectraModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
Пример #4
0
    def __init__(
            self,
            pretrained_model_name_or_path,
            reduce_output='sum',
            trainable=True,
            num_tokens=None,
            **kwargs
    ):
        super(AutoTransformerEncoder, self).__init__()
        try:
            from transformers import TFAutoModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFAutoModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        if not self.reduce_output == 'cls_pooled':
            self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
Пример #5
0
def test_multiple_dependencies(reduce_dependencies, hidden_shape,
                               dependent_hidden_shape,
                               dependent_hidden_shape2):
    # setup at least for a single dependency
    hidden_layer = torch.randn(hidden_shape, dtype=torch.float32)
    other_hidden_layer = torch.randn(dependent_hidden_shape,
                                     dtype=torch.float32)
    other_dependencies = {
        "feature_name": other_hidden_layer,
    }

    # setup dummy output feature to be root of dependency list
    num_feature_defn = number_feature()
    num_feature_defn["loss"] = {"type": "mean_squared_error"}
    num_feature_defn["dependencies"] = ["feature_name"]
    if len(dependent_hidden_shape) > 2:
        num_feature_defn["reduce_dependencies"] = reduce_dependencies

    # Based on specification calculate expected resulting hidden size for
    # with one dependencies
    if reduce_dependencies == "concat" and len(hidden_shape) == 2 and len(
            dependent_hidden_shape) == 3:
        expected_hidden_size = HIDDEN_SIZE + OTHER_HIDDEN_SIZE * SEQ_SIZE
    else:
        expected_hidden_size = HIDDEN_SIZE + OTHER_HIDDEN_SIZE

    # set up if multiple dependencies specified, setup second dependent feature
    if dependent_hidden_shape2:
        other_hidden_layer2 = torch.randn(dependent_hidden_shape2,
                                          dtype=torch.float32)
        other_dependencies["feature_name2"] = other_hidden_layer2
        num_feature_defn["dependencies"].append("feature_name2")
        if len(dependent_hidden_shape2) > 2:
            num_feature_defn["reduce_dependencies"] = reduce_dependencies

        # Based on specification calculate marginal increase in resulting
        # hidden size with two dependencies
        if reduce_dependencies == "concat" and len(hidden_shape) == 2 and len(
                dependent_hidden_shape2) == 3:
            expected_hidden_size += dependent_hidden_shape2[-1] * SEQ_SIZE
        else:
            expected_hidden_size += dependent_hidden_shape2[-1]

    # Set up dependency reducers.
    dependency_reducers = torch.nn.ModuleDict()
    for feature_name in other_dependencies.keys():
        dependency_reducers[feature_name] = SequenceReducer(
            reduce_mode=reduce_dependencies)

    # test dependency concatenation
    num_feature_defn["input_size"] = expected_hidden_size
    results = output_feature_utils.concat_dependencies(
        "num_feature", num_feature_defn["dependencies"], dependency_reducers,
        hidden_layer, other_dependencies)

    # confirm size of resulting concat_dependencies() call
    if len(hidden_shape) > 2:
        assert results.shape == (BATCH_SIZE, SEQ_SIZE, expected_hidden_size)
    else:
        assert results.shape == (BATCH_SIZE, expected_hidden_size)
Пример #6
0
    def __init__(
            self,
            pretrained_model_name_or_path='jplu/tf-flaubert-base-uncased',
            reduce_output='sum',
            trainable=False,
            num_tokens=None,
            **kwargs
    ):
        super(FlauBERTEncoder, self).__init__()
        try:
            from transformers import TFFlaubertModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFFlaubertModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
Пример #7
0
    def __init__(self, feature, *args, **kwargs):
        super().__init__(*args, feature=feature, **kwargs)

        self.reduce_input = None
        self.reduce_dependencies = None
        self.dependencies = []

        self.fc_layers = None
        self.num_fc_layers = 0
        self.fc_size = 256
        self.use_bias = True
        self.weights_initializer = "xavier_uniform"
        self.bias_initializer = "zeros"
        self.norm = None
        self.norm_params = None
        self.activation = "relu"
        self.dropout = 0
        self.input_size = None

        self.overwrite_defaults(feature)

        logger.debug(" output feature fully connected layers")
        logger.debug("  FCStack")
        self.fc_stack = FCStack(
            first_layer_input_size=self.input_size,
            layers=self.fc_layers,
            num_layers=self.num_fc_layers,
            default_fc_size=self.fc_size,
            default_use_bias=self.use_bias,
            default_weights_initializer=self.weights_initializer,
            default_bias_initializer=self.bias_initializer,
            default_norm=self.norm,
            default_norm_params=self.norm_params,
            default_activation=self.activation,
            default_dropout=self.dropout,
        )

        # set up two sequence reducers, one for inputs and other for dependencies
        self.reduce_sequence_input = SequenceReducer(
            reduce_mode=self.reduce_input)
        if self.dependencies:
            self.dependency_reducers = torch.nn.ModuleDict()
            # todo: re-evaluate need for separate handling of `attention` reducer
            #       currently this code does not support `attention`
            for dependency in self.dependencies:
                self.dependency_reducers[dependency] = SequenceReducer(
                    reduce_mode=self.reduce_dependencies)
Пример #8
0
    def __init__(self,
                 input_features: Dict[str, "InputFeature"] = None,
                 config: TransformerCombinerConfig = None,
                 **kwargs):
        super().__init__(input_features)
        self.name = "TransformerCombiner"
        logger.debug(f" {self.name}")

        self.reduce_output = config.reduce_output
        self.reduce_sequence = SequenceReducer(
            reduce_mode=config.reduce_output,
            max_sequence_length=len(self.input_features),
            encoding_size=config.hidden_size,
        )
        if self.reduce_output is None:
            self.supports_masking = True

        # sequence size for Transformer layer is number of input features
        self.sequence_size = len(self.input_features)

        logger.debug("  Projectors")
        self.projectors = ModuleList(
            # regardless of rank-2 or rank-3 input, torch.prod() calculates size
            # after flattening the encoder output tensor
            [
                Linear(
                    torch.prod(
                        torch.Tensor([*input_features[inp].output_shape
                                      ])).type(torch.int32),
                    config.hidden_size) for inp in input_features
            ])

        logger.debug("  TransformerStack")
        self.transformer_stack = TransformerStack(
            input_size=config.hidden_size,
            sequence_size=self.sequence_size,
            hidden_size=config.hidden_size,
            num_heads=config.num_heads,
            output_size=config.transformer_output_size,
            num_layers=config.num_layers,
            dropout=config.dropout,
        )

        if self.reduce_output is not None:
            logger.debug("  FCStack")
            self.fc_stack = FCStack(
                self.transformer_stack.output_shape[-1],
                layers=config.fc_layers,
                num_layers=config.num_fc_layers,
                default_output_size=config.output_size,
                default_use_bias=config.use_bias,
                default_weights_initializer=config.weights_initializer,
                default_bias_initializer=config.bias_initializer,
                default_norm=config.norm,
                default_norm_params=config.norm_params,
                default_activation=config.fc_activation,
                default_dropout=config.fc_dropout,
                fc_residual=config.fc_residual,
            )
Пример #9
0
def test_get_rnn_init_state_uses_hidden(num_layers):
    batch_size = 16
    sequence_length = 32
    state_size = 64
    combiner_outputs = {}
    combiner_outputs[HIDDEN] = torch.rand(
        [batch_size, sequence_length, state_size])

    # With sequence reduction.
    result = sequence_decoder_utils.get_rnn_init_state(
        combiner_outputs, SequenceReducer(reduce_mode="sum"), num_layers)
    assert list(result.size()) == [num_layers, batch_size, state_size]

    # Without sequence reduction.
    with pytest.raises(ValueError):
        sequence_decoder_utils.get_rnn_init_state(
            combiner_outputs, SequenceReducer(reduce_mode="none"), num_layers)
Пример #10
0
def test_get_rnn_init_state_prefers_encoder_output_state(num_layers):
    batch_size = 16
    state_size = 64
    combiner_outputs = {}
    combiner_outputs[HIDDEN] = torch.rand([batch_size, state_size])
    combiner_outputs[ENCODER_OUTPUT_STATE] = torch.rand(
        [batch_size, state_size * 2])

    result = sequence_decoder_utils.get_rnn_init_state(
        combiner_outputs, SequenceReducer(reduce_mode="sum"), num_layers)

    assert list(result.size()) == [num_layers, batch_size, state_size * 2]
Пример #11
0
    def __init__(
        self, input_features: Dict[str, "InputFeature"], config: SequenceConcatCombinerConfig = None, **kwargs
    ):
        super().__init__(input_features)
        self.name = "SequenceConcatCombiner"
        logger.debug(f" {self.name}")

        self.reduce_output = config.reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=config.reduce_output)
        if self.reduce_output is None:
            self.supports_masking = True
        self.main_sequence_feature = config.main_sequence_feature
Пример #12
0
    def __init__(self,
                 reduce_output=None,
                 main_sequence_feature=None,
                 **kwargs):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        if self.reduce_output is None:
            self.supports_masking = True
        self.main_sequence_feature = main_sequence_feature
Пример #13
0
    def __init__(self,
                 pretrained_model_name_or_path='transfo-xl-wt103',
                 reduce_output='sum',
                 trainable=True,
                 **kwargs):
        super().__init__()
        try:
            from transformers import TFTransfoXLModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]')
            sys.exit(-1)

        self.transformer = TFTransfoXLModel.from_pretrained(
            pretrained_model_name_or_path)
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
Пример #14
0
    def __init__(
        self, input_features: Dict[str, "InputFeature"] = None, config: TabTransformerCombinerConfig = None, **kwargs
    ):
        super().__init__(input_features)
        self.name = "TabTransformerCombiner"
        logger.debug(f"Initializing {self.name}")

        if config.reduce_output is None:
            raise ValueError("TabTransformer requires the `reduce_output` " "parameter")
        self.reduce_output = config.reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=config.reduce_output)
        self.supports_masking = True

        self.embed_input_feature_name = config.embed_input_feature_name
        if self.embed_input_feature_name:
            vocab = [
                i_f
                for i_f in input_features
                if input_features[i_f].type != NUMERICAL or input_features[i_f].type != BINARY
            ]
            if self.embed_input_feature_name == "add":
                self.embed_i_f_name_layer = Embed(vocab, config.hidden_size, force_embedding_size=True)
                projector_size = config.hidden_size
            elif isinstance(self.embed_input_feature_name, int):
                if self.embed_input_feature_name > config.hidden_size:
                    raise ValueError(
                        "TabTransformer parameter "
                        "`embed_input_feature_name` "
                        "specified integer value ({}) "
                        "needs to be smaller than "
                        "`hidden_size` ({}).".format(self.embed_input_feature_name, config.hidden_size)
                    )
                self.embed_i_f_name_layer = Embed(
                    vocab,
                    self.embed_input_feature_name,
                    force_embedding_size=True,
                )
                projector_size = config.hidden_size - self.embed_input_feature_name
            else:
                raise ValueError(
                    "TabTransformer parameter "
                    "`embed_input_feature_name` "
                    "should be either None, an integer or `add`, "
                    "the current value is "
                    "{}".format(self.embed_input_feature_name)
                )
        else:
            projector_size = config.hidden_size

        logger.debug("  Projectors")
        self.unembeddable_features = []
        self.embeddable_features = []
        for i_f in input_features:
            if input_features[i_f].type in {NUMERICAL, BINARY}:
                self.unembeddable_features.append(input_features[i_f].name)
            else:
                self.embeddable_features.append(input_features[i_f].name)

        self.projectors = ModuleList()
        for i_f in self.embeddable_features:
            flatten_size = self.get_flatten_size(input_features[i_f].output_shape)
            self.projectors.append(Linear(flatten_size[0], projector_size))

        # input to layer_norm are the encoder outputs for unembeddable features,
        # which are numerical or binary features.  These should be 2-dim
        # tensors.  Size should be concatenation of these tensors.
        concatenated_unembeddable_encoders_size = 0
        for i_f in self.unembeddable_features:
            concatenated_unembeddable_encoders_size += input_features[i_f].output_shape[0]

        self.layer_norm = torch.nn.LayerNorm(concatenated_unembeddable_encoders_size)

        logger.debug("  TransformerStack")
        self.transformer_stack = TransformerStack(
            input_size=config.hidden_size,
            sequence_size=len(self.embeddable_features),
            hidden_size=config.hidden_size,
            # todo: can we just use projector_size? # hidden_size,
            num_heads=config.num_heads,
            fc_size=config.transformer_fc_size,
            num_layers=config.num_layers,
            dropout=config.dropout,
        )

        logger.debug("  FCStack")
        transformer_hidden_size = self.transformer_stack.layers[-1].output_shape[-1]

        # determine input size to fully connected layer based on reducer
        if config.reduce_output == "concat":
            num_embeddable_features = len(self.embeddable_features)
            fc_input_size = num_embeddable_features * transformer_hidden_size
        else:
            fc_input_size = transformer_hidden_size if len(self.embeddable_features) > 0 else 0
        self.fc_stack = FCStack(
            fc_input_size + concatenated_unembeddable_encoders_size,
            layers=config.fc_layers,
            num_layers=config.num_fc_layers,
            default_fc_size=config.fc_size,
            default_use_bias=config.use_bias,
            default_weights_initializer=config.weights_initializer,
            default_bias_initializer=config.bias_initializer,
            default_norm=config.norm,
            default_norm_params=config.norm_params,
            default_activation=config.fc_activation,
            default_dropout=config.fc_dropout,
            fc_residual=config.fc_residual,
        )

        # Create empty tensor of shape [1, 0] to use as hidden in case there are no category or numeric/binary features.
        self.register_buffer("empty_hidden", torch.empty([1, 0]))
        self.register_buffer("embeddable_features_indices", torch.arange(0, len(self.embeddable_features)))
Пример #15
0
    def __init__(
            self,
            num_classes,
            cell_type='rnn',
            state_size=256,
            embedding_size=64,
            beam_width=1,
            num_layers=1,
            attention=None,
            tied_embeddings=None,
            is_timeseries=False,
            max_sequence_length=0,
            use_bias=True,
            weights_initializer='glorot_uniform',
            bias_initializer='zeros',
            weights_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            reduce_input='sum',
            **kwargs
    ):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        self.cell_type = cell_type
        self.state_size = state_size
        self.embedding_size = embedding_size
        self.beam_width = beam_width
        self.num_layers = num_layers
        self.attention = attention
        self.attention_mechanism = None
        self.tied_embeddings = tied_embeddings
        self.is_timeseries = is_timeseries
        self.num_classes = num_classes
        self.max_sequence_length = max_sequence_length
        self.state_size = state_size
        self.attention_mechanism = None

        self.reduce_input = reduce_input if reduce_input else 'sum'
        self.reduce_sequence = SequenceReducer(reduce_mode=self.reduce_input)

        if is_timeseries:
            self.vocab_size = 1
        else:
            self.vocab_size = self.num_classes

        self.GO_SYMBOL = self.vocab_size
        self.END_SYMBOL = 0

        logger.debug('  project input Dense')
        self.project = Dense(
            state_size,
            use_bias=use_bias,
            kernel_initializer=weights_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=weights_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer
        )

        logger.debug('  Embedding')
        self.decoder_embedding = Embedding(
            input_dim=self.num_classes + 1,  # account for GO_SYMBOL
            output_dim=embedding_size,
            embeddings_initializer=weights_initializer,
            embeddings_regularizer=weights_regularizer,
            activity_regularizer=activity_regularizer
        )
        logger.debug('  project output Dense')
        self.dense_layer = Dense(
            num_classes,
            use_bias=use_bias,
            kernel_initializer=weights_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=weights_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer
        )
        rnn_cell = get_from_registry(cell_type, rnn_layers_registry)
        rnn_cells = [rnn_cell(state_size) for _ in range(num_layers)]
        self.decoder_rnncell = StackedRNNCells(rnn_cells)
        logger.debug('  {}'.format(self.decoder_rnncell))

        # Sampler
        self.sampler = tfa.seq2seq.sampler.TrainingSampler()

        logger.debug('setting up attention for', attention)
        if attention is not None:
            if attention == 'luong':
                self.attention_mechanism = LuongAttention(units=state_size)
            elif attention == 'bahdanau':
                self.attention_mechanism = BahdanauAttention(units=state_size)
            logger.debug('  {}'.format(self.attention_mechanism))
            self.decoder_rnncell = AttentionWrapper(
                self.decoder_rnncell,
                [self.attention_mechanism] * num_layers,
                attention_layer_size=[state_size] * num_layers
            )
            logger.debug('  {}'.format(self.decoder_rnncell))
Пример #16
0
    def __init__(
        self,
        embedding_size: int = 10,
        embeddings_on_cpu: bool = False,
        should_softmax: bool = False,
        fc_layers: Optional[List] = None,
        num_fc_layers: int = 0,
        output_size: int = 10,
        use_bias: bool = True,
        weights_initializer: str = "xavier_uniform",
        bias_initializer: str = "zeros",
        norm: Optional[str] = None,
        norm_params: Dict = None,
        activation: str = "relu",
        dropout: float = 0,
        **kwargs,
    ):
        """
        :param embedding_size: it is the maximum embedding size, the actual
               size will be `min(vocabulary_size, embedding_size)`
               for `dense` representations and exactly `vocabulary_size`
               for the `sparse` encoding, where `vocabulary_size` is
               the number of different strings appearing in the training set
               in the column the feature is named after (plus 1 for
               `<UNK>`).
        :type embedding_size: Integer
        :param embeddings_on_cpu: by default embeddings matrices are stored
               on GPU memory if a GPU is used, as it allows
               for faster access, but in some cases the embedding matrix
               may be really big and this parameter forces the placement
               of the embedding matrix in regular memory and the CPU is used
               to resolve them, slightly slowing down the process
               as a result of data transfer between CPU and GPU memory.
        :param dropout: determines if there should be a dropout layer before
               returning the encoder output.
        :type dropout: Boolean
        """
        super().__init__()
        logger.debug(f" {self.name}")

        self.should_softmax = should_softmax
        self.sum_sequence_reducer = SequenceReducer(reduce_mode="sum")

        self.h3_embed = H3Embed(
            embedding_size,
            embeddings_on_cpu=embeddings_on_cpu,
            dropout=dropout,
            weights_initializer=weights_initializer,
            bias_initializer=bias_initializer,
            reduce_output="None",
        )

        self.register_buffer(
            "aggregation_weights",
            torch.Tensor(
                get_initializer(weights_initializer)([H3_INPUT_SIZE, 1])))

        logger.debug("  FCStack")
        self.fc_stack = FCStack(
            first_layer_input_size=self.h3_embed.output_shape[0],
            layers=fc_layers,
            num_layers=num_fc_layers,
            default_output_size=output_size,
            default_use_bias=use_bias,
            default_weights_initializer=weights_initializer,
            default_bias_initializer=bias_initializer,
            default_norm=norm,
            default_norm_params=norm_params,
            default_activation=activation,
            default_dropout=dropout,
        )
Пример #17
0
    def __init__(
        self,
        embedding_size: int = 10,
        embeddings_on_cpu: bool = False,
        fc_layers: Optional[List] = None,
        num_fc_layers: int = 0,
        output_size: int = 10,
        use_bias: bool = True,
        weights_initializer: str = "xavier_uniform",
        bias_initializer: str = "zeros",
        norm: str = None,
        norm_params: Dict = None,
        activation: str = "relu",
        dropout: float = 0,
        reduce_output: str = "sum",
        **kwargs,
    ):
        """
        :param embedding_size: it is the maximum embedding size, the actual
               size will be `min(vocabulary_size, embedding_size)`
               for `dense` representations and exactly `vocabulary_size`
               for the `sparse` encoding, where `vocabulary_size` is
               the number of different strings appearing in the training set
               in the column the feature is named after (plus 1 for
               `<UNK>`).
        :type embedding_size: Integer
        :param embeddings_on_cpu: by default embeddings matrices are stored
               on GPU memory if a GPU is used, as it allows
               for faster access, but in some cases the embedding matrix
               may be really big and this parameter forces the placement
               of the embedding matrix in regular memory and the CPU is used
               to resolve them, slightly slowing down the process
               as a result of data transfer between CPU and GPU memory.
        :param dropout: determines if there should be a dropout layer before
               returning the encoder output.
        :type dropout: Boolean
        """
        super().__init__()
        logger.debug(f" {self.name}")

        self.embedding_size = embedding_size
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)

        logger.debug("  mode Embed")
        self.embed_mode = Embed(
            [str(i) for i in range(3)],
            embedding_size,
            representation="dense",
            embeddings_trainable=True,
            pretrained_embeddings=None,
            force_embedding_size=True,
            embeddings_on_cpu=embeddings_on_cpu,
            dropout=dropout,
            embedding_initializer=weights_initializer,
        )

        logger.debug("  edge Embed")
        self.embed_edge = Embed(
            [str(i) for i in range(7)],
            embedding_size,
            representation="dense",
            embeddings_trainable=True,
            pretrained_embeddings=None,
            force_embedding_size=True,
            embeddings_on_cpu=embeddings_on_cpu,
            dropout=dropout,
            embedding_initializer=weights_initializer,
        )

        logger.debug("  resolution Embed")
        self.embed_resolution = Embed(
            [str(i) for i in range(16)],
            embedding_size,
            representation="dense",
            embeddings_trainable=True,
            pretrained_embeddings=None,
            force_embedding_size=True,
            embeddings_on_cpu=embeddings_on_cpu,
            dropout=dropout,
            embedding_initializer=weights_initializer,
        )

        logger.debug("  base cell Embed")
        self.embed_base_cell = Embed(
            [str(i) for i in range(122)],
            embedding_size,
            representation="dense",
            embeddings_trainable=True,
            pretrained_embeddings=None,
            force_embedding_size=True,
            embeddings_on_cpu=embeddings_on_cpu,
            dropout=dropout,
            embedding_initializer=weights_initializer,
        )

        logger.debug("  cells Embed")
        self.embed_cells = EmbedSequence(
            [str(i) for i in range(8)],
            embedding_size,
            max_sequence_length=(H3_INPUT_SIZE - 4),
            representation="dense",
            embeddings_trainable=True,
            pretrained_embeddings=None,
            force_embedding_size=True,
            embeddings_on_cpu=embeddings_on_cpu,
            dropout=dropout,
            embedding_initializer=weights_initializer,
        )

        logger.debug("  FCStack")
        self.fc_stack = FCStack(
            first_layer_input_size=embedding_size,
            layers=fc_layers,
            num_layers=num_fc_layers,
            default_output_size=output_size,
            default_use_bias=use_bias,
            default_weights_initializer=weights_initializer,
            default_bias_initializer=bias_initializer,
            default_norm=norm,
            default_norm_params=norm_params,
            default_activation=activation,
            default_dropout=dropout,
        )
Пример #18
0
    def __init__(self, feature: Dict[str, Any],
                 other_output_features: Dict[str, "OutputFeature"], *args,
                 **kwargs):
        """Defines defaults, overwrites them based on the feature dictionary, and sets up dependencies.

        Any output feature can depend on one or more other output features. The `other_output_features` input dictionary
        should contain entries for any dependent output features, which is accomplished by constructing output features
        in topographically sorted order. Attributes of any dependent output features are used to properly initialize
        this feature's sizes.
        """
        super().__init__(*args, feature=feature, **kwargs)

        self.reduce_input = None
        self.reduce_dependencies = None

        # List of feature names that this output feature is dependent on.
        self.dependencies = []

        self.fc_layers = None
        self.num_fc_layers = 0
        self.output_size = 256
        self.use_bias = True
        self.weights_initializer = "xavier_uniform"
        self.bias_initializer = "zeros"
        self.norm = None
        self.norm_params = None
        self.activation = "relu"
        self.dropout = 0
        self.input_size = None

        self.overwrite_defaults(feature)

        logger.debug(" output feature fully connected layers")
        logger.debug("  FCStack")

        self.input_size = get_input_size_with_dependencies(
            self.input_size, self.dependencies, other_output_features)
        feature["input_size"] = self.input_size  # needed for future overrides

        self.fc_stack = FCStack(
            first_layer_input_size=self.input_size,
            layers=self.fc_layers,
            num_layers=self.num_fc_layers,
            default_output_size=self.output_size,
            default_use_bias=self.use_bias,
            default_weights_initializer=self.weights_initializer,
            default_bias_initializer=self.bias_initializer,
            default_norm=self.norm,
            default_norm_params=self.norm_params,
            default_activation=self.activation,
            default_dropout=self.dropout,
        )
        self._calibration_module = self.create_calibration_module(feature)
        self._prediction_module = self.create_predict_module()

        # set up two sequence reducers, one for inputs and other for dependencies
        self.reduce_sequence_input = SequenceReducer(
            reduce_mode=self.reduce_input)
        if self.dependencies:
            self.dependency_reducers = torch.nn.ModuleDict()
            # todo: re-evaluate need for separate handling of `attention` reducer
            #       currently this code does not support `attention`
            for dependency in self.dependencies:
                self.dependency_reducers[dependency] = SequenceReducer(
                    reduce_mode=self.reduce_dependencies)
Пример #19
0
    def __init__(
            self,
            embedding_size=10,
            embeddings_on_cpu=False,
            fc_layers=None,
            num_fc_layers=0,
            fc_size=10,
            use_bias=True,
            weights_initializer='glorot_uniform',
            bias_initializer='zeros',
            weights_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            # weights_constraint=None,
            # bias_constraint=None,
            norm=None,
            norm_params=None,
            activation='relu',
            dropout=0,
            reduce_output='sum',
            **kwargs):
        """
            :param embedding_size: it is the maximum embedding size, the actual
                   size will be `min(vocaularyb_size, embedding_size)`
                   for `dense` representations and exacly `vocaularyb_size`
                   for the `sparse` encoding, where `vocabulary_size` is
                   the number of different strings appearing in the training set
                   in the column the feature is named after (plus 1 for `<UNK>`).
            :type embedding_size: Integer
            :param embeddings_on_cpu: by default embedings matrices are stored
                   on GPU memory if a GPU is used, as it allows
                   for faster access, but in some cases the embedding matrix
                   may be really big and this parameter forces the placement
                   of the embedding matrix in regular memroy and the CPU is used
                   to resolve them, slightly slowing down the process
                   as a result of data transfer between CPU and GPU memory.
            :param dropout: determines if there should be a dropout layer before
                   returning the encoder output.
            :type dropout: Boolean
            :param initializer: the initializer to use. If `None`, the default
                   initialized of each variable is used (`glorot_uniform`
                   in most cases). Options are: `constant`, `identity`, `zeros`,
                    `ones`, `orthogonal`, `normal`, `uniform`,
                    `truncated_normal`, `variance_scaling`, `glorot_normal`,
                    `glorot_uniform`, `xavier_normal`, `xavier_uniform`,
                    `he_normal`, `he_uniform`, `lecun_normal`, `lecun_uniform`.
                    Alternatively it is possible to specify a dictionary with
                    a key `type` that identifies the type of initialzier and
                    other keys for its parameters, e.g.
                    `{type: normal, mean: 0, stddev: 0}`.
                    To know the parameters of each initializer, please refer to
                    TensorFlow's documentation.
            :type initializer: str
            :param regularize: if `True` the embedding wieghts are added to
                   the set of weights that get reularized by a regularization
                   loss (if the `regularization_lambda` in `training`
                   is greater than 0).
            :type regularize: Boolean
        """
        super(H3Embed, self).__init__()
        logger.debug(' {}'.format(self.name))

        self.embedding_size = embedding_size
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)

        logger.debug('  mode Embed')
        self.embed_mode = Embed([str(i) for i in range(3)],
                                embedding_size,
                                representation='dense',
                                embeddings_trainable=True,
                                pretrained_embeddings=None,
                                force_embedding_size=True,
                                embeddings_on_cpu=embeddings_on_cpu,
                                dropout=dropout,
                                embedding_initializer=weights_initializer,
                                embedding_regularizer=weights_regularizer)

        logger.debug('  edge Embed')
        self.embed_edge = Embed([str(i) for i in range(7)],
                                embedding_size,
                                representation='dense',
                                embeddings_trainable=True,
                                pretrained_embeddings=None,
                                force_embedding_size=True,
                                embeddings_on_cpu=embeddings_on_cpu,
                                dropout=dropout,
                                embedding_initializer=weights_initializer,
                                embedding_regularizer=weights_regularizer)

        logger.debug('  resolution Embed')
        self.embed_resolution = Embed(
            [str(i) for i in range(16)],
            embedding_size,
            representation='dense',
            embeddings_trainable=True,
            pretrained_embeddings=None,
            force_embedding_size=True,
            embeddings_on_cpu=embeddings_on_cpu,
            dropout=dropout,
            embedding_initializer=weights_initializer,
            embedding_regularizer=weights_regularizer)

        logger.debug('  base cell Embed')
        self.embed_base_cell = Embed([str(i) for i in range(122)],
                                     embedding_size,
                                     representation='dense',
                                     embeddings_trainable=True,
                                     pretrained_embeddings=None,
                                     force_embedding_size=True,
                                     embeddings_on_cpu=embeddings_on_cpu,
                                     dropout=dropout,
                                     embedding_initializer=weights_initializer,
                                     embedding_regularizer=weights_regularizer)

        logger.debug('  cells Embed')
        self.embed_cells = Embed([str(i) for i in range(8)],
                                 embedding_size,
                                 representation='dense',
                                 embeddings_trainable=True,
                                 pretrained_embeddings=None,
                                 force_embedding_size=True,
                                 embeddings_on_cpu=embeddings_on_cpu,
                                 dropout=dropout,
                                 embedding_initializer=weights_initializer,
                                 embedding_regularizer=weights_regularizer)

        logger.debug('  FCStack')
        self.fc_stack = FCStack(
            layers=fc_layers,
            num_layers=num_fc_layers,
            default_fc_size=fc_size,
            default_use_bias=use_bias,
            default_weights_initializer=weights_initializer,
            default_bias_initializer=bias_initializer,
            default_weights_regularizer=weights_regularizer,
            default_bias_regularizer=bias_regularizer,
            default_activity_regularizer=activity_regularizer,
            # default_weights_constraint=weights_constraint,
            # default_bias_constraint=bias_constraint,
            default_norm=norm,
            default_norm_params=norm_params,
            default_activation=activation,
            default_dropout=dropout,
        )
Пример #20
0
    def __init__(
            self,
            input_features=None,
            num_layers=1,
            hidden_size=256,
            num_heads=8,
            transformer_fc_size=256,
            dropout=0.1,
            fc_layers=None,
            num_fc_layers=0,
            fc_size=256,
            use_bias=True,
            weights_initializer='glorot_uniform',
            bias_initializer='zeros',
            weights_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            # weights_constraint=None,
            # bias_constraint=None,
            norm=None,
            norm_params=None,
            fc_activation='relu',
            fc_dropout=0,
            fc_residual=False,
            reduce_output='mean',
            **kwargs
    ):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        if self.reduce_output is None:
            self.supports_masking = True

        logger.debug('  Projectors')
        self.projectors = [Dense(hidden_size) for _ in input_features]

        logger.debug('  TransformerStack')
        self.transformer_stack = TransformerStack(
            hidden_size=hidden_size,
            num_heads=num_heads,
            fc_size=transformer_fc_size,
            num_layers=num_layers,
            dropout=dropout
        )

        if self.reduce_output is not None:
            logger.debug('  FCStack')
            self.fc_stack = FCStack(
                layers=fc_layers,
                num_layers=num_fc_layers,
                default_fc_size=fc_size,
                default_use_bias=use_bias,
                default_weights_initializer=weights_initializer,
                default_bias_initializer=bias_initializer,
                default_weights_regularizer=weights_regularizer,
                default_bias_regularizer=bias_regularizer,
                default_activity_regularizer=activity_regularizer,
                # default_weights_constraint=weights_constraint,
                # default_bias_constraint=bias_constraint,
                default_norm=norm,
                default_norm_params=norm_params,
                default_activation=fc_activation,
                default_dropout=fc_dropout,
                fc_residual=fc_residual,
            )
Пример #21
0
    def __init__(
            self,
            input_features=None,
            embed_input_feature_name=None,  # None or embedding size or "add"
            num_layers=1,
            hidden_size=256,
            num_heads=8,
            transformer_fc_size=256,
            dropout=0.1,
            fc_layers=None,
            num_fc_layers=0,
            fc_size=256,
            use_bias=True,
            weights_initializer='glorot_uniform',
            bias_initializer='zeros',
            weights_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            # weights_constraint=None,
            # bias_constraint=None,
            norm=None,
            norm_params=None,
            fc_activation='relu',
            fc_dropout=0,
            fc_residual=False,
            reduce_output='concat',
            **kwargs
    ):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        if reduce_output is None:
            raise ValueError("TabTransformer requires the `resude_output` "
                             "parametr")
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.supports_masking = True
        self.layer_norm = LayerNormalization()

        self.embed_input_feature_name = embed_input_feature_name
        if self.embed_input_feature_name:
            vocab = [i_f for i_f in input_features
                     if i_f[TYPE] != NUMERICAL or i_f[TYPE] != BINARY]
            if self.embed_input_feature_name == 'add':
                self.embed_i_f_name_layer = Embed(vocab, hidden_size,
                                                  force_embedding_size=True)
                projector_size = hidden_size
            elif isinstance(self.embed_input_feature_name, int):
                if self.embed_input_feature_name > hidden_size:
                    raise ValueError(
                        "TabTransformer parameter "
                        "`embed_input_feature_name` "
                        "specified integer value ({}) "
                        "needs to be smaller than "
                        "`hidden_size` ({}).".format(
                            self.embed_input_feature_name, hidden_size
                        ))
                self.embed_i_f_name_layer = Embed(
                    vocab,
                    self.embed_input_feature_name,
                    force_embedding_size=True,
                )
                projector_size = hidden_size - self.embed_input_feature_name
            else:
                raise ValueError("TabTransformer parameter "
                                 "`embed_input_feature_name` "
                                 "should be either None, an integer or `add`, "
                                 "the current value is "
                                 "{}".format(self.embed_input_feature_name))
        else:
            projector_size = hidden_size

        logger.debug('  Projectors')
        self.projectors = [Dense(projector_size) for i_f in input_features
                           if i_f[TYPE] != NUMERICAL and i_f[TYPE] != BINARY]
        self.skip_features = [i_f[NAME] for i_f in input_features
                              if i_f[TYPE] == NUMERICAL or i_f[TYPE] == BINARY]

        logger.debug('  TransformerStack')
        self.transformer_stack = TransformerStack(
            hidden_size=hidden_size,
            num_heads=num_heads,
            fc_size=transformer_fc_size,
            num_layers=num_layers,
            dropout=dropout
        )

        logger.debug('  FCStack')
        self.fc_stack = FCStack(
            layers=fc_layers,
            num_layers=num_fc_layers,
            default_fc_size=fc_size,
            default_use_bias=use_bias,
            default_weights_initializer=weights_initializer,
            default_bias_initializer=bias_initializer,
            default_weights_regularizer=weights_regularizer,
            default_bias_regularizer=bias_regularizer,
            default_activity_regularizer=activity_regularizer,
            # default_weights_constraint=weights_constraint,
            # default_bias_constraint=bias_constraint,
            default_norm=norm,
            default_norm_params=norm_params,
            default_activation=fc_activation,
            default_dropout=fc_dropout,
            fc_residual=fc_residual,
        )