示例#1
0
    def __init__(
            self,
            encoder_output_dim: int,  # 400+200gnn=600
            action_embedding_dim: int,  # 200
            input_attention: Attention,  # {"type": "dot_product"}
            past_attention: Attention,  # {"type": "dot_product"}
            activation: Activation = Activation.by_name('relu')(),
            predict_start_type_separately: bool = True,  # False
            num_start_types: int = None,
            add_action_bias: bool = True,  # True
            dropout: float = 0.0,  # 0.5
            num_layers: int = 1) -> None:  # 1

        super().__init__(
            encoder_output_dim=encoder_output_dim,
            action_embedding_dim=action_embedding_dim,
            input_attention=input_attention,
            num_start_types=num_start_types,
            activation=activation,
            predict_start_type_separately=predict_start_type_separately,
            add_action_bias=add_action_bias,
            dropout=dropout,
            num_layers=num_layers)

        self._past_attention = past_attention
        self._ent2ent_ff = FeedForward(1, 1, 1, Activation.by_name('linear')())
示例#2
0
    def __init__(self, config: ParsingConfig):
        assert isinstance(config, ParsingConfig)
        super().__init__(config)
        self.config = config
        encoder_dim = config.decoder_config.output_dim

        if self.config.use_pos:
            self.pos_embedding = nn.Embedding(config.num_pos,
                                              config.pos_dim,
                                              padding_idx=0)
            encoder_dim += config.pos_dim

        self.head_arc_feedforward = FeedForward(encoder_dim, 1, config.arc_dim,
                                                Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(config.arc_dim,
                                                     config.arc_dim,
                                                     use_input_biases=True)

        self.head_tag_feedforward = FeedForward(encoder_dim, 1, config.tag_dim,
                                                Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(config.tag_dim,
                                                      config.tag_dim,
                                                      config.num_labels)
        self.dropout = InputVariationalDropout(config.dropout)
        self.use_mst_decoding_for_validation = config.use_mst_decoding_for_validation
示例#3
0
    def __init__(self,
                 sentence_encoder: Seq2VecEncoder,
                 doc_encoder: Seq2VecEncoder,
                 query_encoder: Seq2VecEncoder,
                 use_encoded: bool = False,
                 scorer: Optional[FeedForward] = None,
                 sentence_attention: Optional[Attention] = None,
                 document_attention: Optional[Attention] = None) -> None:

        super(Seq2VecSentenceScorer, self).__init__()

        self.sentence_encoder = sentence_encoder
        self.doc_encoder = doc_encoder
        self.query_encoder = query_encoder
        self.use_encoded = use_encoded
        self.sentence_attention = sentence_attention
        self.document_attention = document_attention
        # get the dimensions for the scorer and for sanity checking
        q_dim = self.query_encoder.get_output_dim()
        d_dim = self.doc_encoder.get_output_dim()

        input_dim = (q_dim + d_dim)
        if use_encoded: input_dim *= 2
        # set up the scorer
        if scorer is None:
            scorer = FeedForward(
                        input_dim=input_dim, num_layers=1,
                        hidden_dims=1, activations=Activation.by_name('linear')(), dropout=0.)
        self.query_transformer = FeedForward(
            input_dim=q_dim, num_layers=1, hidden_dims=q_dim, activations=Activation.by_name('tanh')(), dropout=0.2)
        self.scorer = scorer
        # assertions to ensure our shapes match our assumptions
        assert q_dim == d_dim
        assert self.scorer.get_output_dim() == 1
        assert self.scorer.get_input_dim() == input_dim
    def __init__(self,
                 encoder_output_dim: int,
                 action_embedding_dim: int,
                 input_attention: Attention,
                 past_attention: Attention,
                 activation: Activation = Activation.by_name('relu')(),
                 predict_start_type_separately: bool = True,
                 num_start_types: int = None,
                 add_action_bias: bool = True,
                 dropout: float = 0.0,
                 num_layers: int = 1) -> None:
        super().__init__(encoder_output_dim=encoder_output_dim,
                         action_embedding_dim=action_embedding_dim,
                         input_attention=input_attention,
                         activation=activation,
                         add_action_bias=add_action_bias,
                         dropout=dropout,
                         num_layers=num_layers)

        self._past_attention = past_attention
        self._ent2ent_ff = FeedForward(1, 1, 1, Activation.by_name('linear')())
        self._action2gate = FeedForward(201, 1, 1,
                                        Activation.by_name('sigmoid')())
        self._output_type_projection_layer = Linear(
            encoder_output_dim + encoder_output_dim, action_embedding_dim)
示例#5
0
    def test_rnn_sentence_extractor(self):
        # Hyperparameters
        batch_size = 3
        num_sents = 5
        input_hidden_size = 7
        hidden_size = 11

        # Setup a model
        gru = GRU(input_size=input_hidden_size,
                  hidden_size=hidden_size,
                  bidirectional=True,
                  batch_first=True)
        rnn = PytorchSeq2SeqWrapper(gru)
        feed_forward = FeedForward(input_dim=hidden_size * 2,
                                   num_layers=2,
                                   hidden_dims=[10, 1],
                                   activations=[Activation.by_name('tanh')(), Activation.by_name('linear')()])
        extractor = RNNSentenceExtractor(rnn, feed_forward)

        # Setup some dummy data
        sentence_encodings = torch.randn(batch_size, num_sents, input_hidden_size)
        mask = torch.ones(batch_size, num_sents)

        # Pass the data through and verify the size of the output
        extraction_scores = extractor(sentence_encodings, mask)
        assert extraction_scores.size() == (batch_size, num_sents)
示例#6
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 dropout: float = 0.5,
                 input_dropout: float = 0.5,
                 head_tag_temperature: Optional[float] = None,
                 head_temperature: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(Supertagger, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = \
            arc_feedforward or FeedForward(encoder_dim, 1,
                                           arc_representation_dim,
                                           Activation.by_name("elu")(),
                                           dropout=dropout)
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = \
            tag_feedforward or FeedForward(encoder_dim, 1,
                                           tag_representation_dim,
                                           Activation.by_name("elu")(),
                                           dropout=dropout)
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = BilinearWithBias(tag_representation_dim,
                                             tag_representation_dim,
                                             num_labels)
        self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim", "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim", "arc feedforward output dim")
        self._input_dropout = Dropout(input_dropout)
        self._attachment_scores = CategoricalAccuracy()
        self._tagging_accuracy = CategoricalAccuracy()
        self.head_tag_temperature = head_tag_temperature
        self.head_temperature = head_temperature
        initializer(self)
示例#7
0
    def __init__(
            self,
            vocab: Vocabulary,
            input_unit: Seq2VecEncoder,
            text_field_embedder: TextFieldEmbedder,
            # embedding_projection_dim: int = None,
            classifier_feedforward: FeedForward = None,
            max_step: int = 12,
            n_memories: int = 3,
            self_attention: bool = False,
            memory_gate: bool = False,
            dropout: int = 0.15,
            loss_weights=None,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_classes = max(self.vocab.get_vocab_size("labels"), 2)

        self.text_field_embedder = text_field_embedder

        self.proj = nn.Linear(text_field_embedder.get_output_dim(),
                              input_unit.get_input_dim())
        self.input_unit = input_unit
        self.mac = MACCell(
            text_field_embedder.get_output_dim(
            ),  # input_unit.get_output_dim(),
            max_step=max_step,
            n_memories=n_memories,
            self_attention=self_attention,
            memory_gate=memory_gate,
            dropout=dropout,
            save_attns=False,
        )

        hidden_size = 2 * input_unit.get_output_dim()
        n_layers = 3
        self.classifier = classifier_feedforward or FeedForward(
            input_dim=hidden_size,
            num_layers=n_layers,
            hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes],
            activations=[
                Activation.by_name("relu")(),
                Activation.by_name("relu")(),
                Activation.by_name("linear")()
            ],
            dropout=[dropout, dropout, 0.0])

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(positive_label=1),
            "weighted_f1": WeightedF1Measure(),
            "fbeta": FBetaMeasure(average='micro')
        }

        weights = loss_weights and torch.FloatTensor(loss_weights)
        self.loss = nn.CrossEntropyLoss(weight=weights)

        initializer(self)
示例#8
0
    def __init__(
        self,
        input_dim: int,  # input embedding dimension
        num_layers: int = 6,
        num_heads: int = 8,
        feedforward_hidden_dim: int = None,
        feedforward_dropout: float = 0.1,
        attention_dim: int = None,
        value_dim: int = None,
        residual_dropout: float = 0.1,
        attention_dropout: float = 0.1,
        use_positional_embedding: bool = True,
    ):
        super(TransformerEncoder, self).__init__()

        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._attention_norm_layers: List[LayerNorm] = []
        self._feedforward_layers: List[FeedForward] = []
        self._feedforward_norm_layers: List[LayerNorm] = []

        hidden_dim = input_dim
        attention_dim = attention_dim or (hidden_dim // num_heads)
        value_dim = value_dim or (hidden_dim // num_heads)
        feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim

        for i in range(num_layers):
            attention = MultiHeadSelfAttention(
                num_heads,
                hidden_dim,
                attention_dim * num_heads,
                value_dim * num_heads,
                attention_dropout=attention_dropout)
            self.add_module(f'attention_{i}', attention)
            self._attention_layers.append(attention)

            attention_norm = LayerNorm(hidden_dim)
            self.add_module(f'attention_norm_{i}', attention_norm)
            self._attention_norm_layers.append(attention_norm)

            feedfoward = FeedForward(
                hidden_dim,
                num_layers=2,
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                dropout=feedforward_dropout)
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedforward_layers.append(feedfoward)

            feedforward_norm = LayerNorm(hidden_dim)
            self.add_module(f"feedforward_norm_{i}", feedforward_norm)
            self._feedforward_norm_layers.append(feedforward_norm)

        self._dropout = torch.nn.Dropout(residual_dropout)
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self._use_positional_embedding = use_positional_embedding
示例#9
0
    def __init__(self, vocab: Vocabulary,
                 encoder_dim: int,
                 label_dim: int,
                 edge_dim: int,
                 dropout: float,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None) -> None:
        """
        Parameters
        ----------
        vocab : ``Vocabulary``, required
            A Vocabulary, required in order to compute sizes for input/output projections.
        encoder_dim : ``int``, required.
            The output dimension of the encoder.
        label_dim : ``int``, required.
            The dimension of the MLPs used for dependency tag prediction.
        edge_dim : ``int``, required.
            The dimension of the MLPs used for head arc prediction.
        tag_feedforward : ``FeedForward``, optional, (default = None).
            The feedforward network used to produce tag representations.
            By default, a 1 layer feedforward network with an elu activation is used.
        arc_feedforward : ``FeedForward``, optional, (default = None).
            The feedforward network used to produce arc representations.
            By default, a 1 layer feedforward network with an elu activation is used.
        dropout : ``float``, optional, (default = 0.0)
            The variational dropout applied to the output of the encoder and MLP layers.
        """
        super(DMEdges, self).__init__(vocab)
        self._encoder_dim = encoder_dim

        self.head_arc_feedforward = arc_feedforward or \
                                    FeedForward(encoder_dim, 1,
                                                edge_dim,
                                                Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(edge_dim,
                                                     edge_dim,
                                                     use_input_biases=True)

        num_labels = vocab.get_vocab_size("head_tags") #= edge labels

        self.head_tag_feedforward = tag_feedforward or \
                                    FeedForward(encoder_dim, 1,
                                                label_dim,
                                                Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(label_dim,
                                                      label_dim,
                                                      num_labels)

        self._dropout = InputVariationalDropout(dropout)

        check_dimensions_match(label_dim, self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim", "tag feedforward output dim")
        check_dimensions_match(edge_dim, self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim", "arc feedforward output dim")
示例#10
0
def init_sig(vocab, d_embedding, embedding_dropout_p, sig_depth, logsig,
             all_code_types, feedforward_num_layers, feedforward_hidden_dims,
             feedforward_activations, feedforward_dropout, leadlag, add_time,
             t_max, t_scale, use_timestamps, split_paths):
    # Init feedward params
    feedforward_hidden_dims = [feedforward_hidden_dims
                               ] * feedforward_num_layers
    feedforward_activations = [Activation.by_name(feedforward_activations)()
                               ] * feedforward_num_layers
    feedforward_dropout = [feedforward_dropout] * feedforward_num_layers

    # Needed for final layer
    feedforward_num_layers += 1
    feedforward_hidden_dims.append(1)
    feedforward_activations.append(Activation.by_name('linear')())
    feedforward_dropout.append(0)

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size(),
                                embedding_dim=d_embedding)

    # Handle Augmentations
    augmentations = []
    if add_time:
        augmentations.append('add_time')
    if leadlag:
        augmentations.append('leadlag')
    d_embedding_updated = update_dims(augmentations, d_embedding)
    i_augmentations = init_augmentations(augmentations,
                                         use_timestamps=use_timestamps,
                                         t_max=t_max,
                                         t_scale=t_scale)

    # Embedder maps the input tokens to the appropriate embedding matrix
    word_embeddings: TextFieldEmbedder = BasicTextFieldEmbedder(
        {"tokens": token_embedding})

    # Encoder takes path of (N, L, C) and encodes into state vector
    # encoder = BagOfEmbeddingsEncoder(embedding_dim=d_embedding)
    encoder: Seq2VecEncoder = SignatureEncoder(input_dim=d_embedding_updated,
                                               depth=sig_depth,
                                               logsig=logsig)

    classifier_feedforward: FeedForward = FeedForward(
        input_dim=encoder.get_output_dim() * 3 if
        (all_code_types and split_paths) else encoder.get_output_dim(),
        num_layers=feedforward_num_layers,
        hidden_dims=feedforward_hidden_dims,
        activations=feedforward_activations,
        dropout=feedforward_dropout)

    model = BaseModel(vocab,
                      word_embeddings,
                      encoder,
                      classifier_feedforward,
                      augmentations=i_augmentations,
                      embedding_dropout_p=embedding_dropout_p)

    return model
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 word_encoder: Seq2SeqEncoder,
                 sentence_encoder: Seq2SeqEncoder,
                 classifier_feedforward: Union[FeedForward, Maxout],
                 attended_text_dropout: float  = 0.0,
                 bce_pos_weight: int = 10,
                 use_positional_encoding: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(EtdHAN, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.word_encoder = word_encoder
        self.word_level_attention = FeedForward(word_encoder.get_output_dim(), 
                                                2, 
                                                [word_encoder.get_output_dim(), 1],
                                                [Activation.by_name("tanh")(), Activation.by_name("linear")()],
                                                [True, False])

        self.sentence_encoder = sentence_encoder
        self.sentence_level_attention = FeedForward(sentence_encoder.get_output_dim(), 
                                                    2, 
                                                    [sentence_encoder.get_output_dim(), 1],
                                                    [Activation.by_name("tanh")(), Activation.by_name("linear")()],
                                                    [True, False])
            
        self.classifier_feedforward = classifier_feedforward
        self.use_positional_encoding = use_positional_encoding

        self._dropout = torch.nn.Dropout(attended_text_dropout)
        
        if text_field_embedder.get_output_dim() != word_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the word_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            word_encoder.get_input_dim()))

        self.metrics = {
#             "roc_auc_score": RocAucScore()            
            "hit_5": HitAtK(5),
            "hit_10": HitAtK(10),
            "precision_5": PrecisionAtK(5),
            "precision_10": PrecisionAtK(10)
#             "hit_100": HitAtK(100),
#             "macro_measure": MacroF1Measure(top_k=5,num_label=self.num_classes)
        }
        
        self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.ones(self.num_classes)*bce_pos_weight)

        initializer(self)
示例#12
0
 def from_params(cls, params: Params):
     input_dim = params.pop('input_dim')
     num_layers = params.pop('num_layers')
     hidden_dims = params.pop('hidden_dims')
     activations = params.pop('activations')
     if isinstance(activations, list):
         activations = [Activation.by_name(name)() for name in activations]
     else:
         activations = Activation.by_name(activations)()
     params.assert_empty(cls.__name__)
     return cls(input_dim=input_dim,
                num_layers=num_layers,
                hidden_dims=hidden_dims,
                activations=activations)
示例#13
0
    def __init__(
        self,
        input_dim: int,
        num_heads: int = 8,
        attention_dim: Optional[int] = None,
        value_dim: Optional[int] = None,
        feedforward_hidden_dim: int = None,
        residual_dropout: float = 0.1,
        attention_dropout: float = 0.1,
        feedforward_dropout: float = 0.1,
        use_vanilla_wiring: bool = False,
    ):
        super(UTDecBlock, self).__init__()
        hidden_dim = input_dim
        attention_dim = attention_dim or (hidden_dim // num_heads)
        value_dim = value_dim or (hidden_dim // num_heads)
        feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim

        self._masked_attention = MaskedMultiHeadSelfAttention(
            num_heads,
            hidden_dim,
            attention_dim * num_heads,
            value_dim * num_heads,
            attention_dropout=attention_dropout)
        self._masked_attention_norm = LayerNorm(hidden_dim)

        self._attention = MultiHeadAttention(
            num_heads,
            hidden_dim,
            hidden_dim,
            attention_dim * num_heads,
            value_dim * num_heads,
            attention_dropout=attention_dropout)
        self._dropout = torch.nn.Dropout(residual_dropout)
        self._attention_norm = LayerNorm(hidden_dim)

        # use feedforward net as transition function
        self._feedforward = FeedForward(
            hidden_dim,
            num_layers=2,
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            activations=[
                Activation.by_name('relu')(),
                Activation.by_name('linear')()
            ],
            dropout=feedforward_dropout)
        self._feedforward_norm = LayerNorm(hidden_dim)

        self._use_vanilla_wiring = use_vanilla_wiring
    def prepare_model(args, vocab):
        text_field_embedder = prepare_text_field_embedder(args, vocab)

        seq2seq_encoder = prepare_context_encoder(
            encoder_type=args.encoder_type,
            input_size=text_field_embedder.get_output_dim(),
            encoder_layer_num=args.encoder_layer,
            encoder_size=args.encoder_size,
            encoder_dropout=args.encoder_dropout)

        seq2vec_encoder = CnnEncoder(
            embedding_dim=seq2seq_encoder.get_output_dim(),
            num_filters=args.cnn_hidden,
            ngram_filter_sizes=args.cnn_window,
            conv_layer_activation=Activation.by_name('linear')())

        model = Seq2VecClassificationModel(
            vocab=vocab,
            text_field_embedder=text_field_embedder,
            seq2seq_encoder=seq2seq_encoder,
            seq2vec_encoder=seq2vec_encoder,
            dropout=args.classifier_dropout,
            classification_type=args.classification_type,
            pos_label=args.positive_label,
        )

        return model
示例#15
0
    def __init__(self,
                 embedding_dim: int,
                 num_filters: int,
                 ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5),  # pylint: disable=bad-whitespace
                 conv_layer_activation: Activation = Activation.by_name('relu')(),
                 output_dim: Optional[int] = None) -> None:
        super(CnnEncoder, self).__init__()
        self._embedding_dim = embedding_dim
        self._num_filters = num_filters
        self._ngram_filter_sizes = ngram_filter_sizes
        self._activation = conv_layer_activation
        self._output_dim = output_dim

        self._convolution_layers = [Conv1d(in_channels=self._embedding_dim,
                                           out_channels=self._num_filters,
                                           kernel_size=ngram_size)
                                    for ngram_size in self._ngram_filter_sizes]
        for i, conv_layer in enumerate(self._convolution_layers):
            self.add_module('conv_layer_%d' % i, conv_layer)

        maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes)
        if self._output_dim:
            self.projection_layer = Linear(maxpool_output_dim, self._output_dim)
        else:
            self.projection_layer = None
            self._output_dim = maxpool_output_dim
    def __init__(self,
                 config,
                 num_labels: int,
                 num_pos: int,
                 use_pos: bool,
                 arc_representation_dim: int,
                 arc_feedforward: FeedForward = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.) -> None:
        super(DistanceDependencyParser, self).__init__(config)
        self.bert = BertModel(config)
        self.apply(self.init_bert_weights)

        encoder_dim = config.hidden_size

        self.arc_feedforward = arc_feedforward or \
                                    FeedForward(encoder_dim, 1,
                                                arc_representation_dim,
                                                Activation.by_name("linear")())

        self.arc_attention = DistanceAttention()

        self._dropout = InputVariationalDropout(dropout)

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        self._attachment_scores = UndirectedAttachmentScores()
    def __init__(self,
                 encoder_output_dim: int,
                 action_embedding_dim: int,
                 input_attention: Attention,
                 output_attention: Attention,
                 activation: Activation = Activation.by_name('relu')(),
                 predict_start_type_separately: bool = True,
                 num_start_types: int = None,
                 add_action_bias: bool = True,
                 mixture_feedforward: FeedForward = None,
                 dropout: float = 0.0,
                 num_layers: int = 1) -> None:
        super().__init__(encoder_output_dim=encoder_output_dim,
                         action_embedding_dim=action_embedding_dim,
                         input_attention=input_attention,
                         num_start_types=num_start_types,
                         activation=activation,
                         predict_start_type_separately=predict_start_type_separately,
                         add_action_bias=add_action_bias,
                         dropout=dropout,
                         num_layers=num_layers,
                         mixture_feedforward=mixture_feedforward)
        self._output_attention = output_attention

        # override
        self._input_projection_layer = Linear(encoder_output_dim + action_embedding_dim, encoder_output_dim)

        self._attend_output_projection_layer = Linear(encoder_output_dim*2, encoder_output_dim)

        self._first_attended_output = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim))
        torch.nn.init.normal_(self._first_attended_output)
    def __init__(self,
                 encoder_output_dim: int,
                 action_embedding_dim: int,
                 input_attention: Attention,
                 activation: Activation = Activation.by_name('relu')(),
                 predict_start_type_separately: bool = True,
                 num_start_types: int = None,
                 add_action_bias: bool = True,
                 mixture_feedforward: FeedForward = None,
                 dropout: float = 0.0) -> None:
        super().__init__(encoder_output_dim=encoder_output_dim,
                         action_embedding_dim=action_embedding_dim,
                         input_attention=input_attention,
                         num_start_types=num_start_types,
                         activation=activation,
                         predict_start_type_separately=predict_start_type_separately,
                         add_action_bias=add_action_bias,
                         dropout=dropout)
        self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0]))
        self._mixture_feedforward = mixture_feedforward

        if mixture_feedforward is not None:
            check_dimensions_match(encoder_output_dim, mixture_feedforward.get_input_dim(),
                                   "hidden state embedding dim", "mixture feedforward input dim")
            check_dimensions_match(mixture_feedforward.get_output_dim(), 1,
                                   "mixture feedforward output dim", "dimension for scalar value")
示例#19
0
    def __init__(self,
                 embedding_dim     ,
                 num_filters     ,
                 ngram_filter_sizes                  = (2, 3, 4, 5),  # pylint: disable=bad-whitespace
                 conv_layer_activation             = None,
                 output_dim                = None)        :
        super(CnnEncoder, self).__init__()
        self._embedding_dim = embedding_dim
        self._num_filters = num_filters
        self._ngram_filter_sizes = ngram_filter_sizes
        self._activation = conv_layer_activation or Activation.by_name(u'relu')()
        self._output_dim = output_dim

        self._convolution_layers = [Conv1d(in_channels=self._embedding_dim,
                                           out_channels=self._num_filters,
                                           kernel_size=ngram_size)
                                    for ngram_size in self._ngram_filter_sizes]
        for i, conv_layer in enumerate(self._convolution_layers):
            self.add_module(u'conv_layer_%d' % i, conv_layer)

        maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes)
        if self._output_dim:
            self.projection_layer = Linear(maxpool_output_dim, self._output_dim)
        else:
            self.projection_layer = None
            self._output_dim = maxpool_output_dim
示例#20
0
 def from_params(cls, params: Params):
     input_dim = params.pop_int('input_dim')
     num_layers = params.pop_int('num_layers')
     hidden_dims = params.pop('hidden_dims')
     activations = params.pop('activations')
     dropout = params.pop('dropout', 0.0)
     if isinstance(activations, list):
         activations = [Activation.by_name(name)() for name in activations]
     else:
         activations = Activation.by_name(activations)()
     params.assert_empty(cls.__name__)
     return cls(input_dim=input_dim,
                num_layers=num_layers,
                hidden_dims=hidden_dims,
                activations=activations,
                dropout=dropout)
示例#21
0
    def __init__(
            self,
            embedding_dim: int,
            num_filters: int,
            ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5),  # pylint: disable=bad-whitespace
            conv_layer_activation: Activation = None,
            output_dim: Optional[int] = None) -> None:
        super(ExplainableCnnEncoder, self).__init__()
        self._embedding_dim = embedding_dim
        self._num_filters = num_filters
        self._ngram_filter_sizes = ngram_filter_sizes
        self._activation = conv_layer_activation or Activation.by_name(
            'relu')()
        self._output_dim = output_dim

        self._convolution_layers = [(Conv1d(in_channels=self._embedding_dim,
                                            out_channels=self._num_filters,
                                            kernel_size=ngram_size),
                                     MaxPool1dAll(kernel_size=None))
                                    for ngram_size in self._ngram_filter_sizes]
        for i, (conv_layer,
                maxpool_layer) in enumerate(self._convolution_layers):
            self.add_module('conv_layer_%d' % i, conv_layer)
            self.add_module('maxpool_layer_%d' % i, maxpool_layer)

        maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes)
        if self._output_dim:
            self.projection_layer = Linear(maxpool_output_dim,
                                           self._output_dim)
        else:
            self.projection_layer = None
            self._output_dim = maxpool_output_dim
示例#22
0
    def __init__(self,
                 vector_dim: int,
                 matrix_dim: int,
                 attention_dim: int,
                 values_dim: int,
                 num_heads: int = 1,
                 activation: Activation = None,
                 attention_dropout_prob: float = 0.0,
                 normalize=True) -> None:
        super().__init__(normalize)

        self._num_heads = num_heads
        self._attention_dim = attention_dim
        self._values_dim = values_dim
        self._output_dim = matrix_dim

        if attention_dim % num_heads != 0:
            raise ValueError(
                f"Key size ({attention_dim}) must be divisible by the number of "
                f"attention heads ({num_heads}).")

        self._combined_projection = nn.Linear(matrix_dim,
                                              attention_dim + values_dim)
        self._query_projection = nn.Linear(vector_dim, attention_dim)

        self._scale = (attention_dim // num_heads)**0.5
        # self._output_projection = Linear(values_dim, self._output_dim)
        self._attention_dropout = nn.Dropout(attention_dropout_prob)
        self._output_projection = nn.Linear(values_dim, self._output_dim)

        self._activation = activation or Activation.by_name('linear')()
        self._num_heads = num_heads
        self.reset_parameters()
示例#23
0
 def __init__(self,
              vocab: Vocabulary,
              sentence_embedder: TextFieldEmbedder,
              action_embedding_dim: int,
              encoder: Seq2SeqEncoder,
              attention: Attention,
              decoder_beam_search: BeamSearch,
              max_decoding_steps: int,
              dropout: float = 0.0) -> None:
     super(NlvrDirectSemanticParser,
           self).__init__(vocab=vocab,
                          sentence_embedder=sentence_embedder,
                          action_embedding_dim=action_embedding_dim,
                          encoder=encoder,
                          dropout=dropout)
     self._decoder_trainer = MaximumMarginalLikelihood()
     self._decoder_step = BasicTransitionFunction(
         encoder_output_dim=self._encoder.get_output_dim(),
         action_embedding_dim=action_embedding_dim,
         input_attention=attention,
         activation=Activation.by_name('tanh')(),
         add_action_bias=False,
         dropout=dropout)
     self._decoder_beam_search = decoder_beam_search
     self._max_decoding_steps = max_decoding_steps
     self._action_padding_index = -1
示例#24
0
 def __init__(self, tensor_1_dim, tensor_2_dim, activation=None):
     super(BilinearSimilarity, self).__init__()
     self._weight_matrix = Parameter(
         torch.Tensor(tensor_1_dim, tensor_2_dim))
     self._bias = Parameter(torch.Tensor(1))
     self._activation = activation or Activation.by_name(u'linear')()
     self.reset_parameters()
示例#25
0
    def __init__(self,
                 encoder_output_dim: int,
                 action_embedding_dim: int,
                 input_attention: Attention,
                 activation: Activation = Activation.by_name('relu')(),
                 predict_start_type_separately: bool = True,
                 num_start_types: int = None,
                 add_action_bias: bool = True,
                 mixture_feedforward: FeedForward = None,
                 dropout: float = 0.0) -> None:
        super().__init__(encoder_output_dim=encoder_output_dim,
                         action_embedding_dim=action_embedding_dim,
                         input_attention=input_attention,
                         num_start_types=num_start_types,
                         activation=activation,
                         predict_start_type_separately=predict_start_type_separately,
                         add_action_bias=add_action_bias,
                         dropout=dropout)
        self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0]))
        self._mixture_feedforward = mixture_feedforward

        if mixture_feedforward is not None:
            check_dimensions_match(encoder_output_dim, mixture_feedforward.get_input_dim(),
                                   "hidden state embedding dim", "mixture feedforward input dim")
            check_dimensions_match(mixture_feedforward.get_output_dim(), 1,
                                   "mixture feedforward output dim", "dimension for scalar value")
示例#26
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 dropout: float = 0.1,
                 ff_dim: int = 100):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder

        assert self.embedder.get_output_dim() == self.encoder.get_input_dim()

        self.feedforward = FeedForward(
            encoder.get_output_dim(),
            1,
            hidden_dims=ff_dim,
            activations=Activation.by_name('relu')(),
            dropout=dropout)
        self.out = torch.nn.Linear(
            in_features=self.feedforward.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))
        self.crf = ConditionalRandomField(vocab.get_vocab_size('labels'))

        self.f1 = FBetaMeasure(average='micro')
        self.accuracy = CategoricalAccuracy()
        self.idx_to_label = vocab.get_index_to_token_vocabulary('labels')
示例#27
0
    def __init__(self,
                 matrix_1_dim: int,
                 matrix_2_dim: int,
                 activation: Activation = None,
                 use_input_biases: bool = False,
                 label_dim: int = 1) -> None:
        super(BilinearMatrixAttentionV2, self).__init__()

        if label_dim == 1:
            self._weight_matrix = torch.nn.Parameter(
                torch.Tensor(matrix_1_dim, matrix_2_dim))
        else:
            self._weight_matrix = torch.nn.Parameter(
                torch.Tensor(label_dim, matrix_1_dim, matrix_2_dim))

        if use_input_biases:
            self._weight_bias1 = torch.nn.Parameter(
                torch.Tensor(label_dim, matrix_1_dim))
            self._weight_bias2 = torch.nn.Parameter(
                torch.Tensor(label_dim, matrix_2_dim))

        self.use_input_biases = use_input_biases
        self._bias = torch.nn.Parameter(torch.Tensor(1))
        self._activation = activation or Activation.by_name('linear')()
        self.reset_parameters()
    def __init__(
        self,
        encoder_output_dim: int,
        action_embedding_dim: int,
        input_attention: Attention,
        activation: Activation = Activation.by_name("relu")(),
        add_action_bias: bool = True,
        mixture_feedforward: FeedForward = None,
        dropout: float = 0.0,
        num_layers: int = 1,
    ) -> None:
        super().__init__(
            encoder_output_dim=encoder_output_dim,
            action_embedding_dim=action_embedding_dim,
            input_attention=input_attention,
            activation=activation,
            add_action_bias=add_action_bias,
            dropout=dropout,
            num_layers=num_layers,
        )
        self._mixture_feedforward = mixture_feedforward

        if mixture_feedforward is not None:
            check_dimensions_match(
                encoder_output_dim,
                mixture_feedforward.get_input_dim(),
                "hidden state embedding dim",
                "mixture feedforward input dim",
            )
            check_dimensions_match(
                mixture_feedforward.get_output_dim(),
                1,
                "mixture feedforward output dim",
                "dimension for scalar value",
            )
 def __init__(self,
              vocab: Vocabulary,
              sentence_embedder: TextFieldEmbedder,
              action_embedding_dim: int,
              encoder: Seq2SeqEncoder,
              attention: Attention,
              decoder_beam_search: BeamSearch,
              max_decoding_steps: int,
              dropout: float = 0.0) -> None:
     super(NlvrDirectSemanticParser, self).__init__(vocab=vocab,
                                                    sentence_embedder=sentence_embedder,
                                                    action_embedding_dim=action_embedding_dim,
                                                    encoder=encoder,
                                                    dropout=dropout)
     self._decoder_trainer = MaximumMarginalLikelihood()
     self._decoder_step = BasicTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(),
                                                  action_embedding_dim=action_embedding_dim,
                                                  input_attention=attention,
                                                  num_start_types=1,
                                                  activation=Activation.by_name('tanh')(),
                                                  predict_start_type_separately=False,
                                                  add_action_bias=False,
                                                  dropout=dropout)
     self._decoder_beam_search = decoder_beam_search
     self._max_decoding_steps = max_decoding_steps
     self._action_padding_index = -1
示例#30
0
 def __init__(self,
              encoder_output_dim: int,
              decoder_input_dim: int,
              action_embedding_dim: int,
              input_attention: Attention,
              sql_attention: Attention = None,
              sql_output_dim: int = 100,
              activation: Activation = Activation.by_name('relu')(),
              predict_start_type_separately: bool = True,
              num_start_types: int = None,
              add_action_bias: bool = True,
              copy_gate: FeedForward = None,
              dropout: float = 0.0,
              num_layers: int = 1) -> None:
     super().__init__(
         encoder_output_dim=encoder_output_dim,
         decoder_input_dim=decoder_input_dim,
         action_embedding_dim=action_embedding_dim,
         input_attention=input_attention,
         sql_attention=sql_attention,
         sql_output_dim=sql_output_dim,
         num_start_types=num_start_types,
         activation=activation,
         predict_start_type_separately=predict_start_type_separately,
         add_action_bias=add_action_bias,
         dropout=dropout,
         num_layers=num_layers)
     # control the copy gate
     self._copy_gate = copy_gate
示例#31
0
    def __init__(
            self,
            embedding_dim: int,
            num_filters: int,
            ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5),  # pylint: disable=bad-whitespace
            conv_layer_activation: Activation = None,
            output_dim: Optional[int] = None) -> None:
        super(CnnEncoder, self).__init__()
        self._embedding_dim = embedding_dim
        self._num_filters = num_filters
        self._ngram_filter_sizes = ngram_filter_sizes
        self._activation = conv_layer_activation or Activation.by_name(
            'relu')()
        self._output_dim = output_dim

        self._convolution_layers = [
            Conv1d(in_channels=self._embedding_dim,
                   out_channels=self._num_filters,
                   kernel_size=ngram_size)
            for ngram_size in self._ngram_filter_sizes
        ]
        for i, conv_layer in enumerate(self._convolution_layers):
            self.add_module('conv_layer_%d' % i, conv_layer)

        self._output_dim = self._num_filters * len(self._ngram_filter_sizes)
    def __init__(self,
                 in_params: int,
                 matrix_1_dim: int,
                 matrix_2_dim: int,
                 activation: Activation = None,
                 use_input_biases: bool = False,
                 label_dim: int = 1) -> None:
        super().__init__()

        self.in_params = in_params

        if use_input_biases:
            matrix_1_dim += 1
            matrix_2_dim += 1

        if label_dim == 1:
            self._weight_matrix = Parameter(
                torch.Tensor(in_params, matrix_1_dim, matrix_2_dim))
        else:
            self._weight_matrix = Parameter(
                torch.Tensor(in_params, label_dim, matrix_1_dim, matrix_2_dim))

        self._bias = Parameter(torch.Tensor(1))
        self._activation = activation or Activation.by_name('linear')()
        self._use_input_biases = use_input_biases
        self.reset_parameters()
示例#33
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        final_feedforward: FeedForward,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:

        super().__init__(vocab, regularizer)

        # Model components
        self._embedder = text_field_embedder
        self._feed_forward = final_feedforward

        self._cnn_claim_encoder = CnnEncoder(
            embedding_dim=self._embedder.get_output_dim(), num_filters=100)
        self._cnn_evidence_encoder = CnnEncoder(
            embedding_dim=self._embedder.get_output_dim(), num_filters=100)

        self._static_feedforward_dimension = 300
        self._static_feedforward = FeedForward(
            input_dim=self._cnn_claim_encoder.get_output_dim() * 2,
            hidden_dims=self._static_feedforward_dimension,
            num_layers=1,
            activations=Activation.by_name('relu')())

        # For accuracy and loss for training/evaluation of model
        self._accuracy = CategoricalAccuracy()
        self._loss = nn.CrossEntropyLoss()

        # Initialize weights
        initializer(self)
示例#34
0
    def __init__(
        self,
        embedding_dim: int,
        num_filters: int,
        ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5),
        conv_layer_activation: Activation = None,
        output_dim: Optional[int] = None,
    ) -> None:
        super().__init__()
        self._embedding_dim = embedding_dim
        self._num_filters = num_filters
        self._ngram_filter_sizes = ngram_filter_sizes
        self._activation = conv_layer_activation or Activation.by_name(
            "relu")()

        self._convolution_layers = [
            Conv1d(
                in_channels=self._embedding_dim,
                out_channels=self._num_filters,
                kernel_size=ngram_size,
            ) for ngram_size in self._ngram_filter_sizes
        ]
        for i, conv_layer in enumerate(self._convolution_layers):
            self.add_module("conv_layer_%d" % i, conv_layer)

        maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes)
        if output_dim:
            self.projection_layer = Linear(maxpool_output_dim, output_dim)
            self._output_dim = output_dim
        else:
            self.projection_layer = None
            self._output_dim = maxpool_output_dim
    def __init__(self,
                 vocab: Vocabulary,
                 sentence_embedder: TextFieldEmbedder,
                 action_embedding_dim: int,
                 encoder: Seq2SeqEncoder,
                 attention: Attention,
                 beam_size: int,
                 max_decoding_steps: int,
                 max_num_finished_states: int = None,
                 dropout: float = 0.0,
                 normalize_beam_score_by_length: bool = False,
                 checklist_cost_weight: float = 0.6,
                 dynamic_cost_weight: Dict[str, Union[int, float]] = None,
                 penalize_non_agenda_actions: bool = False,
                 initial_mml_model_file: str = None) -> None:
        super(NlvrCoverageSemanticParser, self).__init__(vocab=vocab,
                                                         sentence_embedder=sentence_embedder,
                                                         action_embedding_dim=action_embedding_dim,
                                                         encoder=encoder,
                                                         dropout=dropout)
        self._agenda_coverage = Average()
        self._decoder_trainer: DecoderTrainer[Callable[[CoverageState], torch.Tensor]] = \
                ExpectedRiskMinimization(beam_size=beam_size,
                                         normalize_by_length=normalize_beam_score_by_length,
                                         max_decoding_steps=max_decoding_steps,
                                         max_num_finished_states=max_num_finished_states)

        # Instantiating an empty NlvrWorld just to get the number of terminals.
        self._terminal_productions = set(NlvrWorld([]).terminal_productions.values())
        self._decoder_step = CoverageTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(),
                                                        action_embedding_dim=action_embedding_dim,
                                                        input_attention=attention,
                                                        num_start_types=1,
                                                        activation=Activation.by_name('tanh')(),
                                                        predict_start_type_separately=False,
                                                        add_action_bias=False,
                                                        dropout=dropout)
        self._checklist_cost_weight = checklist_cost_weight
        self._dynamic_cost_wait_epochs = None
        self._dynamic_cost_rate = None
        if dynamic_cost_weight:
            self._dynamic_cost_wait_epochs = dynamic_cost_weight["wait_num_epochs"]
            self._dynamic_cost_rate = dynamic_cost_weight["rate"]
        self._penalize_non_agenda_actions = penalize_non_agenda_actions
        self._last_epoch_in_forward: int = None
        # TODO (pradeep): Checking whether file exists here to avoid raising an error when we've
        # copied a trained ERM model from a different machine and the original MML model that was
        # used to initialize it does not exist on the current machine. This may not be the best
        # solution for the problem.
        if initial_mml_model_file is not None:
            if os.path.isfile(initial_mml_model_file):
                archive = load_archive(initial_mml_model_file)
                self._initialize_weights_from_archive(archive)
            else:
                # A model file is passed, but it does not exist. This is expected to happen when
                # you're using a trained ERM model to decode. But it may also happen if the path to
                # the file is really just incorrect. So throwing a warning.
                logger.warning("MML model file for initializing weights is passed, but does not exist."
                               " This is fine if you're just decoding.")
示例#36
0
 def __init__(self,
              vector_dim: int,
              matrix_dim: int,
              activation: Activation = None,
              normalize: bool = True) -> None:
     super().__init__(normalize)
     self._weight_matrix = Parameter(torch.Tensor(vector_dim, matrix_dim))
     self._bias = Parameter(torch.Tensor(1))
     self._activation = activation or Activation.by_name('linear')()
     self.reset_parameters()
示例#37
0
 def from_params(cls, params: Params) -> 'LinearSimilarity':
     tensor_1_dim = params.pop_int("tensor_1_dim")
     tensor_2_dim = params.pop_int("tensor_2_dim")
     combination = params.pop("combination", "x,y")
     activation = Activation.by_name(params.pop("activation", "linear"))()
     params.assert_empty(cls.__name__)
     return cls(tensor_1_dim=tensor_1_dim,
                tensor_2_dim=tensor_2_dim,
                combination=combination,
                activation=activation)
示例#38
0
 def __init__(self,
              tensor_1_dim: int,
              tensor_2_dim: int,
              combination: str = 'x,y',
              activation: Activation = Activation.by_name('linear')()) -> None:
     super(LinearSimilarity, self).__init__()
     self._combination = combination
     combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim])
     self._weight_vector = Parameter(torch.Tensor(combined_dim))
     self._bias = Parameter(torch.Tensor(1))
     self._activation = activation
     self.reset_parameters()
示例#39
0
 def from_params(cls, params: Params) -> 'CnnEncoder':
     embedding_dim = params.pop_int('embedding_dim')
     output_dim = params.pop_int('output_dim', None)
     num_filters = params.pop_int('num_filters')
     conv_layer_activation = Activation.by_name(params.pop("conv_layer_activation", "relu"))()
     ngram_filter_sizes = tuple(params.pop('ngram_filter_sizes', [2, 3, 4, 5]))
     params.assert_empty(cls.__name__)
     return cls(embedding_dim=embedding_dim,
                num_filters=num_filters,
                ngram_filter_sizes=ngram_filter_sizes,
                conv_layer_activation=conv_layer_activation,
                output_dim=output_dim)
    def __init__(self,
                 matrix_1_dim: int,
                 matrix_2_dim: int,
                 activation: Activation = None,
                 use_input_biases: bool = False) -> None:
        super().__init__()
        if use_input_biases:
            matrix_1_dim += 1
            matrix_2_dim += 1
        self._weight_matrix = Parameter(torch.Tensor(matrix_1_dim, matrix_2_dim))

        self._bias = Parameter(torch.Tensor(1))
        self._activation = activation or Activation.by_name('linear')()
        self._use_input_biases = use_input_biases
        self.reset_parameters()
    def __init__(self,
                 encoder_output_dim: int,
                 action_embedding_dim: int,
                 input_attention: Attention,
                 activation: Activation = Activation.by_name('relu')(),
                 predict_start_type_separately: bool = True,
                 num_start_types: int = None,
                 add_action_bias: bool = True,
                 dropout: float = 0.0,
                 num_layers: int = 1) -> None:
        super().__init__()
        self._input_attention = input_attention
        self._add_action_bias = add_action_bias
        self._activation = activation
        self._num_layers = num_layers

        self._predict_start_type_separately = predict_start_type_separately
        if predict_start_type_separately:
            self._start_type_predictor = Linear(encoder_output_dim, num_start_types)
            self._num_start_types = num_start_types
        else:
            self._start_type_predictor = None
            self._num_start_types = None

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        output_dim = encoder_output_dim
        input_dim = output_dim
        # Our decoder input will be the concatenation of the decoder hidden state and the previous
        # action embedding, and we'll project that down to the decoder's `input_dim`, which we
        # arbitrarily set to be the same as `output_dim`.
        self._input_projection_layer = Linear(output_dim + action_embedding_dim, input_dim)
        # Before making a prediction, we'll compute an attention over the input given our updated
        # hidden state. Then we concatenate those with the decoder state and project to
        # `action_embedding_dim` to make a prediction.
        self._output_projection_layer = Linear(output_dim + encoder_output_dim, action_embedding_dim)
        if self._num_layers > 1:
            self._decoder_cell = LSTM(input_dim, output_dim, self._num_layers)
        else:
            # We use a ``LSTMCell`` if we just have one layer because it is slightly faster since we are
            # just running the LSTM for one step each time.
            self._decoder_cell = LSTMCell(input_dim, output_dim)

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
 def __init__(self,
              encoder_output_dim: int,
              action_embedding_dim: int,
              input_attention: Attention,
              activation: Activation = Activation.by_name('relu')(),
              predict_start_type_separately: bool = True,
              num_start_types: int = None,
              add_action_bias: bool = True,
              dropout: float = 0.0) -> None:
     super().__init__(encoder_output_dim=encoder_output_dim,
                      action_embedding_dim=action_embedding_dim,
                      input_attention=input_attention,
                      num_start_types=num_start_types,
                      activation=activation,
                      predict_start_type_separately=predict_start_type_separately,
                      add_action_bias=add_action_bias,
                      dropout=dropout)
     # See the class docstring for a description of what this does.
     self._checklist_multiplier = Parameter(torch.FloatTensor([1.0]))
示例#43
0
    def __init__(self,
                 tensor_1_dim: int,
                 tensor_2_dim: int,
                 combination: str = 'x,y',
                 activation: Activation = None,
                 prior = None) -> None:
        super(LinearSimilarityVB, self).__init__()
        self._combination = combination
        combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim])
        
        self.posterior_mean = False # Flag to know if we sample from the posterior mean or we actually sample
        
        ## If no prior is specified we just create it ourselves
        if (type(prior) == type (None)):
            prior = Vil.Prior(0.5, np.log(0.1),np.log(0.5))
        
        size_combination = int(torch.Tensor(combined_dim).size()[0])
#        print ("Combination size: ", size_combination)
        prior =  prior.get_standarized_Prior(size_combination)
        self.prior = prior 
        
        """
        Mean and rhos of the parameters
        """
        self.mu_weight = Parameter(torch.Tensor(combined_dim))# , requires_grad=True
        self.rho_weight = Parameter(torch.Tensor(combined_dim))

        self.rho_bias = Parameter(torch.Tensor(1))
        self.mu_bias = Parameter(torch.Tensor(1))
            
        """
        The sampled weights
        """
        self.weight = torch.Tensor(combined_dim)
        self.bias = torch.Tensor(1)
        
        self._activation = activation or Activation.by_name('linear')()
        
        ## Initialize the Variational variables
        self.reset_parameters()
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiaffineDependencyParser, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim", "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim", "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE}
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. "
                    "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
示例#45
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 edge_prediction_threshold: float = 0.5,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(GraphParser, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(f"edge_prediction_threshold must be between "
                                     f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("labels")
        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim,
                                                    tag_representation_dim,
                                                    label_dim=num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim", "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim", "arc feedforward output dim")

        self._unlabelled_f1 = F1Measure(positive_label=1)
        self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none')
        self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none')
        initializer(self)