Пример #1
0
 def __init__(self, source_size, target_size):
     super(Discriminator, self).__init__()
     self._classifier = FeedForward(source_size, 1, target_size, Activation.by_name("elu")())
Пример #2
0
 def test_init_checks_hidden_dim_consistency(self):
     with pytest.raises(ConfigurationError):
         FeedForward(2, 4, [5, 5], Activation.by_name('relu')())
Пример #3
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        tag_feedforward: FeedForward = None,
        arc_feedforward: FeedForward = None,
        pos_tag_embedding: Embedding = None,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        edge_prediction_threshold: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(
                f"edge_prediction_threshold must be between "
                f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or FeedForward(
            encoder_dim, 1, arc_representation_dim,
            Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("labels")
        self.head_tag_feedforward = tag_feedforward or FeedForward(
            encoder_dim, 1, tag_representation_dim,
            Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim,
                                                    tag_representation_dim,
                                                    label_dim=num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        check_dimensions_match(
            tag_representation_dim,
            self.head_tag_feedforward.get_output_dim(),
            "tag representation dim",
            "tag feedforward output dim",
        )
        check_dimensions_match(
            arc_representation_dim,
            self.head_arc_feedforward.get_output_dim(),
            "arc representation dim",
            "arc feedforward output dim",
        )

        self._unlabelled_f1 = F1Measure(positive_label=1)
        self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none")
        self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none")
        initializer(self)
Пример #4
0
    def __init__(self,
                 vocab: Vocabulary,
                 encoder: Seq2SeqEncoder,
                 entity_encoder: Seq2VecEncoder,
                 decoder_beam_search: BeamSearch,
                 question_embedder: TextFieldEmbedder,
                 input_attention: Attention,
                 past_attention: Attention,
                 max_decoding_steps: int,
                 action_embedding_dim: int,
                 gnn: bool = True,
                 decoder_use_graph_entities: bool = True,
                 decoder_self_attend: bool = True,
                 gnn_timesteps: int = 2,
                 parse_sql_on_decoding: bool = True,
                 add_action_bias: bool = True,
                 use_neighbor_similarity_for_linking: bool = True,
                 dataset_path: str = 'dataset',
                 training_beam_size: int = None,
                 decoder_num_layers: int = 1,
                 dropout: float = 0.0,
                 rule_namespace: str = 'rule_labels',
                 scoring_dev_params: dict = None,
                 debug_parsing: bool = False) -> None:
        super().__init__(vocab)
        self.vocab = vocab
        self._encoder = encoder
        self._max_decoding_steps = max_decoding_steps
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._rule_namespace = rule_namespace
        self._question_embedder = question_embedder
        self._add_action_bias = add_action_bias
        self._scoring_dev_params = scoring_dev_params or {}
        self.parse_sql_on_decoding = parse_sql_on_decoding
        self._entity_encoder = TimeDistributed(entity_encoder)
        self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking
        self._self_attend = decoder_self_attend
        self._decoder_use_graph_entities = decoder_use_graph_entities

        self._action_padding_index = -1  # the padding value used by IndexField

        self._exact_match = Average()
        self._sql_evaluator_match = Average()
        self._action_similarity = Average()
        self._acc_single = Average()
        self._acc_multi = Average()
        self._beam_hit = Average()

        self._action_embedding_dim = action_embedding_dim

        num_actions = vocab.get_vocab_size(self._rule_namespace)
        if self._add_action_bias:
            input_action_dim = action_embedding_dim + 1
        else:
            input_action_dim = action_embedding_dim
        self._action_embedder = Embedding(num_embeddings=num_actions,
                                          embedding_dim=input_action_dim)
        self._output_action_embedder = Embedding(
            num_embeddings=num_actions, embedding_dim=action_embedding_dim)

        encoder_output_dim = encoder.get_output_dim()
        if gnn:
            encoder_output_dim += action_embedding_dim

        self._first_action_embedding = torch.nn.Parameter(
            torch.FloatTensor(action_embedding_dim))
        self._first_attended_utterance = torch.nn.Parameter(
            torch.FloatTensor(encoder_output_dim))
        self._first_attended_output = torch.nn.Parameter(
            torch.FloatTensor(action_embedding_dim))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_utterance)
        torch.nn.init.normal_(self._first_attended_output)

        self._num_entity_types = 9
        self._embedding_dim = question_embedder.get_output_dim()

        self._entity_type_encoder_embedding = Embedding(
            self._num_entity_types, self._embedding_dim)
        self._entity_type_decoder_embedding = Embedding(
            self._num_entity_types, action_embedding_dim)

        self._linking_params = torch.nn.Linear(16, 1)
        torch.nn.init.uniform_(self._linking_params.weight, 0, 1)

        num_edge_types = 3
        self._gnn = GatedGraphConv(self._embedding_dim,
                                   gnn_timesteps,
                                   num_edge_types=num_edge_types,
                                   dropout=dropout)

        self._decoder_num_layers = decoder_num_layers

        self._beam_search = decoder_beam_search
        self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size)

        if decoder_self_attend:
            self._transition_function = AttendPastSchemaItemsTransitionFunction(
                encoder_output_dim=encoder_output_dim,
                action_embedding_dim=action_embedding_dim,
                input_attention=input_attention,
                past_attention=past_attention,
                predict_start_type_separately=False,
                add_action_bias=self._add_action_bias,
                dropout=dropout,
                num_layers=self._decoder_num_layers)
        else:
            self._transition_function = LinkingTransitionFunction(
                encoder_output_dim=encoder_output_dim,
                action_embedding_dim=action_embedding_dim,
                input_attention=input_attention,
                predict_start_type_separately=False,
                add_action_bias=self._add_action_bias,
                dropout=dropout,
                num_layers=self._decoder_num_layers)

        self._ent2ent_ff = FeedForward(action_embedding_dim, 1,
                                       action_embedding_dim,
                                       Activation.by_name('relu')())

        self._neighbor_params = torch.nn.Linear(self._embedding_dim,
                                                self._embedding_dim)

        # TODO: Remove hard-coded dirs
        self._evaluate_func = partial(
            evaluate,
            db_dir=os.path.join(dataset_path, 'database'),
            table=os.path.join(dataset_path, 'tables.json'),
            check_valid=False)

        self.debug_parsing = debug_parsing
Пример #5
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 arc_representation_dim: int,
                 tag_representation_dim: int,
                 r_lambda: float = 1e-2,
                 normalize: bool = False,
                 arc_feedforward: FeedForward = None,
                 tag_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 dep_tag_embedding: Embedding = None,
                 predicate_embedding: Embedding = None,
                 delta_type: str = "hinge_ce",
                 subtract_gold: float = 0.0,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 gumbel_t: float = 0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SRLGraphParserBase, self).__init__(vocab, regularizer)
        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.r_lambda = r_lambda
        self.normalize = normalize
        self.as_base = False
        #   print ("predicates",self.vocab._index_to_token["predicates"])
        #   print ("tags",self.vocab._index_to_token["tags"])
        self.subtract_gold = subtract_gold
        self.delta_type = delta_type
        num_labels = self.vocab.get_vocab_size("tags")
        print("num_labels", num_labels)
        self.gumbel_t = gumbel_t
        node_dim = predicate_embedding.get_output_dim()
        encoder_dim = encoder.get_output_dim()
        self.arg_arc_feedforward = arc_feedforward or \
                                   FeedForward(encoder_dim, 1,
                                               arc_representation_dim,
                                               Activation.by_name("elu")())
        self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        self.arg_tag_feedforward = tag_feedforward or \
                                   FeedForward(encoder_dim, 1,
                                               tag_representation_dim,
                                               Activation.by_name("elu")())
        self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(
            tag_representation_dim,
            tag_representation_dim,
            label_dim=num_labels,
            use_input_biases=True)  #,activation=Activation.by_name("tanh")()

        self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim,
                                                Activation.by_name("elu")())
        self._pos_tag_embedding = pos_tag_embedding or None
        self._dep_tag_embedding = dep_tag_embedding or None
        self._pred_embedding = predicate_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        #   check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim")

        self._labelled_f1 = IterativeLabeledF1Measure(
            negative_label=0,
            negative_pred=0,
            selected_metrics=["F", "p_F", "l_P", "l_R"])
        self._tag_loss = torch.nn.NLLLoss(reduction="none")  # ,ignore_index=-1
        self._sense_loss = torch.nn.NLLLoss(
            reduction="none")  # ,ignore_index=-1
        initializer(self)
Пример #6
0
    def __init__(self,
                 vocab: Vocabulary,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 pos_embed_dim: int = None,
                 lang_embed_dim: int = None,
                 use_lang_feedforward: bool = False,
                 lang_feedforward: FeedForward = None,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DependencyDecoder, self).__init__(vocab, regularizer)

        self.pos_tag_embedding = None
        if pos_embed_dim is not None:
            self.pos_tag_embedding = Embedding(
                self.vocab.get_vocab_size("upos"), pos_embed_dim)

        self.lang_embedding = None
        if lang_embed_dim is not None:
            self.lang_embedding = Embedding(self.vocab.get_vocab_size("langs"),
                                            lang_embed_dim)

        self.dropout = torch.nn.Dropout(p=dropout)

        self.encoder = encoder
        encoder_output_dim = encoder.get_output_dim()

        self.use_lang_feedforward = use_lang_feedforward
        if self.lang_embedding is not None and use_lang_feedforward:
            self.lang_feedforward = lang_feedforward or \
                                    FeedForward(self.output_dim, 1,
                                                self.output_dim,
                                                Activation.by_name("elu")())

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_output_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_output_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._dropout = InputVariationalDropout(dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder_output_dim]))

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim",
                               "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim",
                               "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
Пример #7
0
    def __init__(self,
                 vocab: Vocabulary,
                 task: str,
                 encoder: Seq2SeqEncoder,
                 lang_embed_dim: int = None,
                 use_lang_feedforward: bool = False,
                 lang_feedforward: FeedForward = None,
                 label_smoothing: float = 0.0,
                 dropout: float = 0.0,
                 adaptive: bool = False,
                 features: List[str] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(TagDecoder, self).__init__(vocab, regularizer)

        self.lang_embedding = None
        if lang_embed_dim is not None:
            self.lang_embedding = Embedding(self.vocab.get_vocab_size("langs"), lang_embed_dim)

        self.dropout = torch.nn.Dropout(p=dropout)

        self.task = task
        self.encoder = encoder
        self.output_dim = encoder.get_output_dim()
        self.label_smoothing = label_smoothing
        self.num_classes = self.vocab.get_vocab_size(task)
        self.adaptive = adaptive
        self.features = features if features else []

        self.use_lang_feedforward = use_lang_feedforward
        if self.lang_embedding is not None and use_lang_feedforward:
            self.lang_feedforward = lang_feedforward or \
                                     FeedForward(self.output_dim, 1,
                                                 self.output_dim,
                                                 Activation.by_name("elu")())

        self.metrics = {
            "acc": CategoricalAccuracy(),
            # "acc3": CategoricalAccuracy(top_k=3)
        }

        if self.adaptive:
            # TODO
            adaptive_cutoffs = [round(self.num_classes / 15), 3 * round(self.num_classes / 15)]
            self.task_output = AdaptiveLogSoftmaxWithLoss(self.output_dim,
                                                          self.num_classes,
                                                          cutoffs=adaptive_cutoffs,
                                                          div_value=4.0)
        else:
            self.task_output = TimeDistributed(Linear(self.output_dim, self.num_classes))

        self.feature_outputs = torch.nn.ModuleDict()
        self.features_metrics = {}
        for feature in self.features:
            self.feature_outputs[feature] = TimeDistributed(Linear(self.output_dim,
                                                                   vocab.get_vocab_size(feature)))
            self.features_metrics[feature] = {
                "acc": CategoricalAccuracy(),
            }

        initializer(self)
Пример #8
0
    def __init__(self,
                 vocab: Vocabulary,
                 mention_feedforward: FeedForward,
                 relation_feedforward: FeedForward,
                 feature_size: int,
                 spans_per_word: float,
                 span_emb_dim: int,
                 use_biaffine_rel: bool,
                 rel_prop: int = 0,
                 rel_prop_dropout_A: float = 0.0,
                 rel_prop_dropout_f: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 positive_label_weight: float = 1.0,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(RelationExtractor, self).__init__(vocab, regularizer)

        # Need to hack this for cases where there's no relation data. It breaks Ulme's code.
        self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1)

        # Span candidate scorer.
        # TODO(dwadden) make sure I've got the input dim right on this one.
        feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(mention_feedforward),
            TimeDistributed(
                torch.nn.Linear(mention_feedforward.get_output_dim(), 1)))
        self._mention_pruner = Pruner(feedforward_scorer)

        # Relation scorer.
        self._use_biaffine_rel = use_biaffine_rel
        if self._use_biaffine_rel:
            self._biaffine = torch.nn.Linear(span_emb_dim, span_emb_dim)
        else:
            self._relation_feedforward = relation_feedforward
            self._relation_scorer = torch.nn.Linear(
                relation_feedforward.get_output_dim(), self._n_labels)

        self._spans_per_word = spans_per_word

        # TODO(dwadden) Add code to compute relation F1.
        # self._candidate_recall = CandidateRecall()
        self._relation_metrics = RelationMetrics1()

        class_weights = torch.cat([
            torch.tensor([1.0]),
            positive_label_weight * torch.ones(self._n_labels)
        ])
        self._loss = torch.nn.CrossEntropyLoss(reduction="sum",
                                               ignore_index=-1,
                                               weight=class_weights)
        self.rel_prop = rel_prop

        # Relation Propagation
        self._A_network = FeedForward(input_dim=self._n_labels,
                                      num_layers=1,
                                      hidden_dims=span_emb_dim,
                                      activations=lambda x: x,
                                      dropout=rel_prop_dropout_A)
        self._f_network = FeedForward(input_dim=2 * span_emb_dim,
                                      num_layers=1,
                                      hidden_dims=span_emb_dim,
                                      activations=torch.nn.Sigmoid(),
                                      dropout=rel_prop_dropout_f)

        initializer(self)
Пример #9
0
def predict_only_lee():
    # load datasetreader
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory + "/log.log"))

    batch_size = 1
    epochs = 10
    max_seq_len = 512
    max_span_width = 30
    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    token_indexer = PretrainedBertIndexer("bert-base-cased",
                                          do_lowercase=False)
    conll_reader = ConllCorefBertReader(
        max_span_width=max_span_width,
        token_indexers={"tokens": token_indexer})
    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    processed_reader_dir = Path(directory + "processed/")

    train_ds = None
    test_ds = None
    if processed_reader_dir.is_dir():
        print("Loading indexed from checkpoints")
        train_path = Path(directory + "processed/train_d")
        if train_path.exists():
            train_ds = pickle.load(
                open(directory + "processed/conll/train_d", "rb"))
            val_ds = pickle.load(
                open(directory + "processed/conll/val_d", "rb"))
            test_ds = pickle.load(
                open(directory + "processed/conll/test_d", "rb"))
        else:
            print("checkpoints not found")
            train_ds, val_ds, test_ds = (
                reader.read(dataset_folder + fname) for fname in [
                    "train.english.v4_gold_conll", "dev.english.v4_gold_conll",
                    "test.english.v4_gold_conll"
                ])
            pickle.dump(train_ds, open(directory + "processed/train_d", "wb"))
            pickle.dump(val_ds, open(directory + "processed/val_d", "wb"))
            pickle.dump(test_ds, open(directory + "processed/test_d", "wb"))
            print("saved checkpoints")

    vocab = Vocabulary()
    iterator = BasicIterator(batch_size=batch_size)
    iterator.index_with(vocab)

    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder

    bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",
                                           top_layer_only=True,
                                           requires_grad=True)

    word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder},
                                            allow_unmatched_keys=True)
    BERT_DIM = word_embedding.get_output_dim()

    shared_layer = torch.nn.LSTM(BERT_DIM,
                                 HIDDEN_DIM,
                                 batch_first=True,
                                 bidirectional=True)

    seq2seq = PytorchSeq2SeqWrapper(shared_layer)
    #seq2vec = PytorchSeq2VecWrapper(torch.nn.LSTM(BERT_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))
    mention_feedforward = FeedForward(input_dim=2336,
                                      num_layers=2,
                                      hidden_dims=150,
                                      activations=torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim=7776,
                                         num_layers=2,
                                         hidden_dims=150,
                                         activations=torch.nn.ReLU())
    model1 = CoreferenceResolver(vocab=vocab,
                                 text_field_embedder=word_embedding,
                                 context_layer=seq2seq,
                                 mention_feedforward=mention_feedforward,
                                 antecedent_feedforward=antecedent_feedforward,
                                 feature_size=768,
                                 max_span_width=max_span_width,
                                 spans_per_word=0.4,
                                 max_antecedents=250,
                                 lexical_dropout=0.2)

    conll_test_iterator = iterator(test_ds, num_epochs=1, shuffle=False)
    USE_GPU = 1

    #serialization_dir=directory + "saved_models/multitask/"

    #TRAINED_MODEL_PATH = directory + "saved_models/multitask/conll/model_state_epoch_9.th"

    TRAINED_MODEL_PATH = directory + "saved_models/current_run_model_state/model_state_epoch_99.th"

    model1.eval()
    model1.load_state_dict(torch.load(TRAINED_MODEL_PATH))
    model1.eval()

    num_batches = len(test_ds)

    for i in range(20):
        batch = next(conll_test_iterator, None)
        output = model1.forward(**batch)

        #let us print out the predictions in the first document of this batch
        pairs = []
        for index, j in enumerate(output['predicted_antecedents'][0]):
            if j is not -1:
                i1 = output['top_spans'][0][index]
                i2 = output['top_spans'][0][output['antecedent_indices'][index]
                                            [j]]
                d0 = output['document'][0]
                pairs.append([d0[i1[0]:i1[1] + 1], d0[i2[0]:i2[1] + 1]])

        #pairs
        #print(pairs)
        metrics = model1.get_metrics()
        print(metrics['coref_f1'])
Пример #10
0
    def __init__(self,
                 options,
                 tag_dim,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiaffineDependencyParser, self).__init__(None, regularizer)

        self.device = options.device

        encoder = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(tag_dim,
                          options.lstm_dims,
                          batch_first=True,
                          bidirectional=True))
        # encoder = PytorchSeq2SeqWrapper(torch.nn.LSTM(tag_dim, options.lstm_dims, batch_first=True))
        self.encoder = encoder
        # TODO: IMPORTANT
        num_labels = options.num_labels
        self.ablation = options.ablation
        # print(num_labels)
        tag_representation_dim = options.tag_representation_dim  # 100
        arc_representation_dim = options.arc_representation_dim  # 200

        encoder_dim = self.encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)


        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = tag_dim  #text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim",
                               "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim",
                               "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        self._pos_to_ignore = set()
        # tags = self.vocab.get_token_to_index_vocabulary("pos")
        # punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE}
        # self._pos_to_ignore = set(punctuation_tag_indices.values())
        # logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. "
        #             "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
Пример #11
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2SeqEncoder,
                 variational_autoencoder: FeedForward = None,
                 sentiment_classifier: FeedForward = None,
                 topic_dim: int = 20,
                 freeze_feature_extraction: bool = False,
                 classification_mode: bool = False,
                 pretrained_file: str = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(TopicRNN, self).__init__(vocab, regularizer)

        self.metrics = {
            'cross_entropy': Average(),
            'negative_kl_divergence': Average(),
            'stopword_loss': Average()
        }

        self.classification_mode = classification_mode
        if classification_mode:
            self.metrics['sentiment'] = CategoricalAccuracy()

        if pretrained_file:
            archive = load_archive(pretrained_file)
            pretrained_model = archive.model
            self._init_from_archive(pretrained_model)
        else:
            # Model parameter definition.
            #
            # Defaults reflect Dieng et al.'s decisions when training their semi-unsupervised
            # IMDB sentiment classifier.
            self.text_field_embedder = text_field_embedder
            self.vocab_size = self.vocab.get_vocab_size("tokens")
            self.text_encoder = text_encoder
            self.topic_dim = topic_dim
            self.vocabulary_projection_layer = TimeDistributed(
                Linear(text_encoder.get_output_dim(), self.vocab_size))

            # Parameter gamma from the paper; projects hidden states into binary logits for whether a
            # word is a stopword.
            self.stopword_projection_layer = TimeDistributed(
                Linear(text_encoder.get_output_dim(), 2))

            self.tokens_to_index = vocab.get_token_to_index_vocabulary()

            # This step should only ever be performed ONCE.
            # When running allennlp train, the vocabulary will be constructed before the model instantiation, but
            # we can't create the stopless namespace until we get here.
            # Check if there already exists a stopless namespace: if so refrain from altering it.
            if "stopless" not in vocab._token_to_index.keys():
                assert self.tokens_to_index[DEFAULT_PADDING_TOKEN] == 0 and \
                       self.tokens_to_index[DEFAULT_OOV_TOKEN] == 1
                for token, _ in self.tokens_to_index.items():
                    if token not in STOP_WORDS:
                        vocab.add_token_to_namespace(token, "stopless")

                # Since a vocabulary with the stopless namespace hasn't been saved, save one for convienience.
                vocab.save_to_files("vocabulary")

            # Compute stop indices in the normal vocab space to prevent stop words
            # from contributing to the topic additions.
            self.stop_indices = torch.LongTensor(
                [vocab.get_token_index(stop) for stop in STOP_WORDS])

            # Learnable topics.
            # TODO: How should these be initialized?
            self.beta = nn.Parameter(torch.rand(topic_dim, self.vocab_size))

            # mu: The mean of the variational distribution.
            self.mu_linear = nn.Linear(topic_dim, topic_dim)

            # sigma: The root standard deviation of the variational distribution.
            self.sigma_linear = nn.Linear(topic_dim, topic_dim)

            # noise: used when sampling.
            self.noise = MultivariateNormal(torch.zeros(topic_dim),
                                            torch.eye(topic_dim))

            stopless_dim = vocab.get_vocab_size("stopless")
            self.variational_autoencoder = variational_autoencoder or FeedForward(
                # Takes as input the word frequencies in the stopless dimension and projects
                # the word frequencies into a latent topic representation.
                #
                # Each latent representation will help tune the variational dist.'s parameters.
                stopless_dim,
                3,
                [500, 500, topic_dim],
                torch.nn.ReLU(),
            )

            # The shape for the feature vector for sentiment classification.
            # (RNN Hidden Size + Inference Network output dimension).
            sentiment_input_size = text_encoder.get_output_dim() + topic_dim
            self.sentiment_classifier = sentiment_classifier or FeedForward(
                # As done by the paper; a simple single layer with 50 hidden units
                # and sigmoid activation for sentiment classification.
                sentiment_input_size,
                2,
                [50, 2],
                torch.nn.Sigmoid(),
            )

        if freeze_feature_extraction:
            # Freeze the RNN and VAE pipeline so that only the classifier is trained.
            for name, param in self.named_parameters():
                if "sentiment_classifier" not in name:
                    param.requires_grad = False

        self.sentiment_criterion = nn.CrossEntropyLoss()

        self.num_samples = 50

        initializer(self)
Пример #12
0
    def __init__(self,
                 vocab: Vocabulary,
                 base_model_archive: str,
                 refiner: Seq2SeqEncoder,
                 rep_dim: int,
                 encoder: Seq2SeqEncoder = None,
                 train_score: float = 10.0,
                 dropout: float = 0.3,
                 delta_type: str = "hinge_ce",
                 train_linear: float = 1,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SRLGraphParserRefine, self).__init__(vocab, regularizer)
        self.train_score = train_score
        self.delta_type = delta_type
        self.train_linear = train_linear
        base_model: SRLGraphParserBase = load_archive(base_model_archive).model
        base_model.gumbel_t = refiner.gumbel_t
        base_model.subtract_gold = refiner.subtract_gold
        self.subtract_gold = refiner.subtract_gold
        base_model.as_base = True
        self.encoder = None if encoder is None else encoder
        assert self.encoder is not None, "have not implemented reuse for now"

        num_labels = self.vocab.get_vocab_size("tags")

        sense_dim = base_model._pred_embedding.get_output_dim()

        encoder_dim = self.encoder.get_output_dim(
        ) if self.encoder else base_model.encoder.get_output_dim()

        self.rep_dim = rep_dim
        self.predicte_rep_feedforward = FeedForward(
            encoder_dim, 1, rep_dim,
            Activation.by_name("elu")())
        self.argument_rep_feedforward = FeedForward(
            encoder_dim, 1, rep_dim,
            Activation.by_name("elu")())
        self.refiner = refiner
        self.refiner.initialize_network(n_tags=num_labels,
                                        sense_dim=sense_dim,
                                        rep_dim=rep_dim)
        self._dropout = InputVariationalDropout(dropout)

        #   check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim")

        self._labelled_f1 = IterativeLabeledF1Measure(
            negative_label=0,
            negative_pred=0,
            selected_metrics=["F", "l_R", "p_F"])
        self._tag_loss = torch.nn.NLLLoss(reduction="none")  # ,ignore_index=-1
        self._sense_loss = torch.nn.NLLLoss(
            reduction="none")  # ,ignore_index=-1
        self.sparse_max = Sparsemax()
        initializer(self)
        self._pred_embedding = copy.deepcopy(
            base_model._pred_embedding
        )  #get a trainable copy of predicate sense embedding in any case
        for param in base_model.parameters():
            param.requires_grad = False
        for param in self._pred_embedding.parameters():
            assert param.requires_grad
        self.base_model = base_model
Пример #13
0
    def __init__(self,
                 idiom_vector_path: str,
                 idiom_graph_path: str,
                 dropout: float,
                 vocab: Vocabulary,
                 content_embedder: TextFieldEmbedder,
                 neighbor_num: int = 7,
                 mode: List[str] = None) -> None:
        super().__init__(vocab)
        self.content_embedder = content_embedder

        idiom_list, idiom_vectors = [], []
        with open(idiom_vector_path) as fh:
            for line in fh:
                idiom_list.append(line.strip().split()[0])
                idiom_vectors.append(list(map(float,
                                              line.strip().split()[1:])))

        self.graph_embedder = GraphEmbedder(idiom_graph_path,
                                            neighbor_num=neighbor_num,
                                            drop_neighbor=False)

        embedding_dim = self.content_embedder.get_output_dim()
        self.option_embedder = modules.Embedding(
            num_embeddings=len(idiom_list),
            embedding_dim=embedding_dim,
            # 使用 预训练的成语向量
            # weight=torch.FloatTensor(idiom_vectors)
        )

        self.dropout = nn.Dropout(dropout)
        self.scorer = nn.Linear(self.content_embedder.get_output_dim(), 1)

        embedding_size = self.content_embedder.get_output_dim()

        self.neighbour_reasoner = StackedSelfAttentionEncoder(
            input_dim=embedding_size,
            hidden_dim=embedding_size,
            projection_dim=embedding_size,
            feedforward_hidden_dim=embedding_size,
            num_layers=1,
            num_attention_heads=2,
            use_positional_encoding=False)
        self.option_encoder = FirstVecEncoder(embedding_dim=embedding_size)

        self.option_reasoner = StackedSelfAttentionEncoder(
            input_dim=embedding_size,
            hidden_dim=embedding_size,
            projection_dim=embedding_size,
            feedforward_hidden_dim=embedding_size,
            num_layers=1,
            num_attention_heads=2,
            use_positional_encoding=False)

        if mode is None:
            mode = ['raw', 'ocn', 'nn']
        else:
            for item in mode:
                assert item in ['raw', 'ocn', 'nn'], f"{item} is invalid"
        self.mode = mode

        self.data_merger = FeedForward(
            input_dim=embedding_size * len(mode),
            num_layers=1,
            hidden_dims=embedding_size,
            activations=Activation.by_name('linear')(),
            dropout=0.1)

        self.loss = nn.CrossEntropyLoss()
        self.acc = CategoricalAccuracy()
Пример #14
0
 def make_feedforward(input_dim):
     return FeedForward(input_dim=input_dim,
                        num_layers=feedforward_params["num_layers"],
                        hidden_dims=feedforward_params["hidden_dims"],
                        activations=torch.nn.ReLU(),
                        dropout=feedforward_params["dropout"])
Пример #15
0
    torch.nn.GRU(2 * hid_dim, hid_dim, batch_first=True, bidirectional=True))
matcher_forward2 = BiMpmMatching(hid_dim, 20, is_forward=True)
matcher_backward2 = BiMpmMatching(hid_dim, 20, is_forward=False)

aggregator = PytorchSeq2VecWrapper(
    torch.nn.GRU(
        matcher_word.get_output_dim() + matcher_forward1.get_output_dim() +
        matcher_backward1.get_output_dim() +
        matcher_forward2.get_output_dim() + matcher_backward2.get_output_dim(),
        hid_dim,
        batch_first=True,
        bidirectional=True))

classifier = FeedForward(
    hid_dim * 2 * 2,
    3, [hid_dim * 2, hid_dim,
        vocab.get_vocab_size('labels')],
    Activation.by_name('leaky_relu')(),
    dropout=dropout)

model = BiMpm(vocab=vocab,
              text_field_embedder=embedder,
              matcher_word=matcher_word,
              encoder1=encoder1,
              matcher_forward1=matcher_forward1,
              matcher_backward1=matcher_backward1,
              encoder2=encoder2,
              matcher_forward2=matcher_forward2,
              matcher_backward2=matcher_backward2,
              aggregator=aggregator,
              classifier_feedforward=classifier,
              dropout=dropout)
    def __init__(self, config: SpanProposalConfig, bert_dir: str = ""):
        super().__init__()

        self.config = config

        num_pos_labels = len(config.pos_tags)
        hidden_size = config.additional_layer_dim if config.additional_layer > 0 else config.pos_dim + config.bert_config.hidden_size

        self.bert = AutoModel.from_pretrained(
            pretrained_model_name_or_path=bert_dir, config=config.bert_config)

        if config.pos_dim > 0:
            self.pos_embedding = nn.Embedding(num_pos_labels, config.pos_dim)
            nn.init.xavier_uniform_(self.pos_embedding.weight)
            if (config.additional_layer
                    and config.additional_layer_type != "lstm"
                    and config.pos_dim + config.bert_config.hidden_size !=
                    hidden_size):
                self.fuse_layer = nn.Linear(
                    config.pos_dim + config.bert_config.hidden_size,
                    hidden_size)
                nn.init.xavier_uniform_(self.fuse_layer.weight)
                self.fuse_layer.bias.data.zero_()
            else:
                self.fuse_layer = None
        else:
            self.pos_embedding = None

        if config.additional_layer > 0:
            if config.additional_layer_type == "transformer":
                new_config = deepcopy(config.bert_config)
                new_config.hidden_size = hidden_size
                new_config.num_hidden_layers = config.additional_layer
                new_config.hidden_dropout_prob = new_config.attention_probs_dropout_prob = config.mrc_dropout
                # new_config.attention_probs_dropout_prob = config.biaf_dropout  # todo add to hparams and tune
                self.additional_encoder = BertEncoder(new_config)
                self.additional_encoder.apply(self._init_bert_weights)
            else:
                assert hidden_size % 2 == 0, "Bi-LSTM need an even hidden_size"
                self.additional_encoder = StackedBidirectionalLstmSeq2SeqEncoder(
                    input_size=config.pos_dim + config.bert_config.hidden_size,
                    hidden_size=hidden_size // 2,
                    num_layers=config.additional_layer,
                    recurrent_dropout_probability=config.mrc_dropout,
                    use_highway=True)

        else:
            self.additional_encoder = None

        self._dropout = InputVariationalDropout(config.mrc_dropout)

        self.subtree_start_feedforward = FeedForward(
            hidden_size, 1, config.arc_representation_dim,
            Activation.by_name("elu")())
        self.subtree_end_feedforward = deepcopy(self.subtree_start_feedforward)

        # todo: equivalent to self-attention?
        self.subtree_start_attention = BilinearMatrixAttention(
            config.arc_representation_dim,
            config.arc_representation_dim,
            use_input_biases=True)
        self.subtree_end_attention = deepcopy(self.subtree_start_attention)

        # init linear children
        for layer in self.children():
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                if layer.bias is not None:
                    layer.bias.data.zero_()
Пример #17
0
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 encoder,
                 tag_representation_dim,
                 arc_representation_dim,
                 tag_feedforward=None,
                 arc_feedforward=None,
                 pos_tag_embedding=None,
                 use_mst_decoding_for_validation=True,
                 dropout=0.0,
                 input_dropout=0.0,
                 initializer=InitializerApplicator(),
                 regularizer=None):
        super(BiaffineDependencyParser, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or\
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name(u"elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size(u"head_tags")

        self.head_tag_feedforward = tag_feedforward or\
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name(u"elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               u"text field embedding dim",
                               u"encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               u"tag representation dim",
                               u"tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               u"arc representation dim",
                               u"arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary(u"pos")
        punctuation_tag_indices = dict((tag, index)
                                       for tag, index in list(tags.items())
                                       if tag in POS_TO_IGNORE)
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            "Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. "
            u"Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
Пример #18
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 lemmatize_helper: LemmatizeHelper,
                 task_config: TaskConfig,
                 morpho_vector_dim: int = 0,
                 gram_val_representation_dim: int = -1,
                 lemma_representation_dim: int = -1,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DependencyParser, self).__init__(vocab, regularizer)

        self.TopNCnt = 3

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.lemmatize_helper = lemmatize_helper
        self.task_config = task_config

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        assert self.task_config.params.get("use_pos_tag",
                                           False) == (self._pos_tag_embedding
                                                      is not None)

        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        if gram_val_representation_dim <= 0:
            self._gram_val_output = torch.nn.Linear(
                encoder_dim, self.vocab.get_vocab_size("grammar_value_tags"))
        else:
            self._gram_val_output = torch.nn.Sequential(
                Dropout(dropout),
                torch.nn.Linear(encoder_dim, gram_val_representation_dim),
                Dropout(dropout),
                torch.nn.Linear(
                    gram_val_representation_dim,
                    self.vocab.get_vocab_size("grammar_value_tags")))

        if lemma_representation_dim <= 0:
            self._lemma_output = torch.nn.Linear(encoder_dim,
                                                 len(lemmatize_helper))
        else:
            # Заведем выход предсказания грамматической метки на вход лемматизатора -- ЭКСПЕРИМЕНТАЛЬНОЕ
            #actual_input_dim = encoder_dim
            actual_input_dim = encoder_dim + self.vocab.get_vocab_size(
                "grammar_value_tags")
            self._lemma_output = torch.nn.Sequential(
                Dropout(dropout),
                torch.nn.Linear(actual_input_dim, lemma_representation_dim),
                Dropout(dropout),
                torch.nn.Linear(lemma_representation_dim,
                                len(lemmatize_helper)))

        representation_dim = text_field_embedder.get_output_dim(
        ) + morpho_vector_dim
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(representation_dim, encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim",
                               "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim",
                               "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info("HELLO FROM INIT")
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        self._gram_val_prediction_accuracy = CategoricalAccuracy()
        self._lemma_prediction_accuracy = CategoricalAccuracy()

        initializer(self)
Пример #19
0
    def __init__(self,  \

        doc_definition,
        sent_definition,
        pretrained,
        num_workers,
        num_epochs,
        dropout_sent = 0.0,
        dropout_doc = 0.0,
        use_sent_objective = True,
        concat_sent_scores = True,
        dataset_class = DatasetBertTC,
        scorer_class = ScorerXray,
        grad_max_norm = 1.0,
        loss_reduction = 'sum',
        batch_size = 5,
        lr = 1e-5,
        lr_ratio = 1.0,
        attention_query_dim = 100,
        max_length = 50,
        max_sent_count = 50,
        linebreak_bound = True,
        keep_ws = False,
        project_sent = False,
        project_size = 200,
        optimizer_params = None,
        dataloader_params = None,
        hyperparams = None,
        dataset_params = None,


        ):

        super(ModelBertTC, self).__init__( \
            hyperparams = hyperparams,
            dataset_params = dataset_params,
            dataloader_params = dataloader_params,
            optimizer_params = optimizer_params,
            num_workers = num_workers,
            num_epochs = num_epochs,
            dataset_class = dataset_class,
            scorer_class = scorer_class
            )

        self.pretrained = pretrained
        self.use_sent_objective = use_sent_objective
        self.concat_sent_scores = concat_sent_scores
        self.grad_max_norm = grad_max_norm
        self.loss_reduction = loss_reduction

        self.doc_definition = doc_definition
        self.sent_definition = sent_definition
        self.num_workers = num_workers
        self.batch_size = batch_size
        self.lr = lr
        self.lr_ratio = lr_ratio
        self.max_length = max_length
        self.max_sent_count = max_sent_count

        self.linebreak_bound = linebreak_bound
        self.keep_ws = keep_ws

        self.project_sent = project_sent
        self.project_size = project_size

        if self.concat_sent_scores:
            assert self.use_sent_objective

        self.bert = AutoModel.from_pretrained(self.pretrained)

        hidden_size = self.bert.config.hidden_size

        self.sent_attention = nn.ModuleDict(OrderedDict())
        self.doc_output_layers = nn.ModuleDict(OrderedDict())
        self.sent_ffnn = nn.ModuleDict(OrderedDict())
        self.sent_classifiers = nn.ModuleDict(OrderedDict())

        for k, label_set in doc_definition.items():


            self.sent_classifiers[k] = SentClassifiers( \
                                        input_dim = hidden_size,
                                        num_tags = 2,
                                        loss_reduction = self.loss_reduction,
                                        dropout = dropout_sent,
                                        sent_definition = sent_definition[k],
                                        )

            if self.concat_sent_scores:
                n = len(sent_definition[k]) * 2
            else:
                n = 0

            if self.project_sent:
                self.sent_ffnn[k] = FeedForward( \
                        input_dim = hidden_size+n,
                        num_layers = 1,
                        hidden_dims = self.project_size,
                        activations = get_activation('tanh'),
                        dropout = 0)

                out_dim = self.project_size
            else:
                out_dim = hidden_size + n


            self.sent_attention[k] = Attention( \
                                    input_dim = out_dim,
                                    dropout = dropout_doc,
                                    use_ffnn = True,
                                    activation = 'tanh',
                                    query_dim = attention_query_dim)

            self.doc_output_layers[k] = nn.Linear(out_dim, len(label_set))

        self.get_summary()
Пример #20
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder_0: Seq2SeqEncoder,
                 encoder_1: Seq2SeqEncoder,
                 encoder_2: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 use_layer_normalization: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiaffineDependencyParser, self).__init__(vocab, regularizer)

        a = vocab.get_index_to_token_vocabulary(namespace='tokens')
        # glyph_config['idx2word'] = {k: v for k, v in a.items()}

        # self.glyph = GlyphEmbedding(glyph_config)

        self.text_field_embedder = text_field_embedder

        self.encoder_0 = encoder_0
        self.encoder_1 = encoder_1
        self.encoder_2 = encoder_2

        encoder_dim = self.encoder_2.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    arc_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                        FeedForward(encoder_dim, 1,
                                                    tag_representation_dim,
                                                    Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        # self._dropout = Dropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, self.encoder_2.get_output_dim()]))

        self.use_layer_normalization = use_layer_normalization

        if use_layer_normalization:
            self.norm_input = torch.nn.LayerNorm(
                self.encoder_0.get_input_dim())
            self.norm_hidden = torch.nn.LayerNorm(
                self.encoder_0.get_output_dim())

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        # check_dimensions_match(representation_dim, encoder.get_input_dim(),
        #                        "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim",
                               "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim",
                               "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
Пример #21
0
EMBEDDING_DIM = 10
HIDDEN_DIM = 10
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokeds'), \
                            embedding_dim=EMBEDDING_DIM)
word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})

# To classify each sentence, we need to convert the sequence of embeddings into a single vector.
# In AllenNLP, the model that handles this is referred to as a Seq2VecEncoder:
# a mapping from sequences to a single vector.
encoder = PytorchSeq2VecWrapper(
    torch.nn.LSTM(EMBEDDING_DIM,
                  HIDDEN_DIM,
                  bidirectional=True,
                  batch_first=True))
feedforward = FeedForward(input_dim=EMBEDDING_DIM,
                          num_layers=1,
                          hidden_dims=[256],
                          activations=[torch.nn.ReLU])
integrator_encoder = PytorchSeq2SeqWrapper(
    torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
# model: parameters have to be modified according to the paper
model = BiattentiveClassificationNetwork(
    text_field_embedder=word_embeddings,
    encoder=encoder,
    vocab=vocab,
    embedding_dropout=0.0,
    pre_encode_feedforward=feedforward,
    integrator=integrator_encoder,
    integrator_dropout=0.0,
    output_layer=feedforward,
    elmo=None)  # not sure about the parameters
Пример #22
0
    def __init__(
            self,
            vocab: Vocabulary,
            text_field_embedder: TextFieldEmbedder,
            encoder: Seq2SeqEncoder,
            arc_representation_dim: int,
            tag_representation_dim: int,
            rank: int,
            capsule_dim: int,
            iter_num: int,
            arc_feedforward: FeedForward = None,
            tag_feedforward: FeedForward = None,
            pos_tag_embedding: Embedding = None,
            #dep_tag_embedding: Embedding = None,
            predicate_embedding: Embedding = None,
            delta_type: str = "hinge_ce",
            subtract_gold: bool = False,
            dropout: float = 0.0,
            input_dropout: float = 0.0,
            edge_prediction_threshold: float = 0.5,
            gumbel_t: float = 1,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None,
            double_loss: bool = True,
            base_average: bool = False,
            bilinear_matrix_capsule: bool = True,
            using_global: bool = False,
            passing_type: str = 'plain',
            global_node: bool = False,
            multi_both: bool = False) -> None:
        super(SRLGraphParserBase, self).__init__(vocab, regularizer)
        self.multi_both = multi_both
        self.capsule_dim = capsule_dim
        self.num_labels = num_labels = self.vocab.get_vocab_size("arc_types")
        # print("num_labels", num_labels)

        self.get_global_layer = Plain_Feedforward(
            (num_labels + 1) * capsule_dim, arc_representation_dim,
            Activation.by_name('relu')())
        self.bilinear_matrix_capsule_layer_for_global_node = BilinearMatrix(
            capsule_dim, capsule_dim)
        self.global_node = global_node

        self.using_global = using_global
        self.passing_type = passing_type

        self.iter_num = iter_num
        self.double_loss = double_loss
        self.base_average = base_average
        self.bilinear_matrix_capsule = bilinear_matrix_capsule

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.subtract_gold = subtract_gold
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(
                f"edge_prediction_threshold must be between "
                f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

    #   print ("predicates",self.vocab._index_to_token["predicates"])
    #   print ("arc_types",self.vocab._index_to_token["arc_types"])
        self.delta_type = delta_type

        self.gumbel_t = gumbel_t
        node_dim = predicate_embedding.get_output_dim()
        encoder_dim = encoder.get_output_dim()
        #self.arg_arc_feedforward = arc_feedforward or \
        #                           FeedForward(encoder_dim, 1,
        #                                       arc_representation_dim,
        #                                       Activation.by_name("elu")())
        #self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward)

        #self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
        #arc_representation_dim,
        #label_dim=capsule_dim,
        #use_input_biases=True)

        self.arg_tag_feedforward = tag_feedforward or \
                                   FeedForward(encoder_dim, 1,
                                               tag_representation_dim,
                                               Activation.by_name("elu")())
        self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention_Lowrank(
            tag_representation_dim * 2,
            tag_representation_dim * 2,
            rank,
            label_dim=(num_labels + 1) * self.capsule_dim,
            use_input_biases=True)  #,activation=Activation.by_name("tanh")()
        if self.bilinear_matrix_capsule == True:
            self.bilinear_matrix_capsule_layer = BilinearMatrix(
                capsule_dim, capsule_dim)
        self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim,
                                                Activation.by_name("elu")())
        self._pos_tag_embedding = pos_tag_embedding or None
        #self._dep_tag_embedding = dep_tag_embedding or None
        self._pred_embedding = predicate_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        #   check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim")

        self._labelled_f1 = IterativeLabeledF1Measure(
            negative_label=0,
            negative_pred=0,
            selected_metrics=["F", "l_F", "p_F", "u_F"])
        self._tag_loss = torch.nn.NLLLoss(reduction="none")  # ,ignore_index=-1
        self._sense_loss = torch.nn.NLLLoss(
            reduction="none")  # ,ignore_index=-1
        initializer(self)
Пример #23
0
def _make_feedforward(input_dim, output_dim):
    return FeedForwardEncoder(
        FeedForward(
            input_dim=input_dim, num_layers=1, activations=torch.relu, hidden_dims=output_dim
        )
    )
Пример #24
0
def train_only_swag():
    # load datasetreader
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory + "/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 100
    max_seq_len = 512
    max_span_width = 30
    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    token_indexer = PretrainedBertIndexer("bert-base-cased",
                                          do_lowercase=False)
    swag_reader = SWAGDatasetReader(
        tokenizer=token_indexer.wordpiece_tokenizer,
        lazy=True,
        token_indexers=token_indexer)
    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    swag_datasets = load_swag(swag_reader, directory)
    swag_vocab = Vocabulary()

    swag_vocab = Vocabulary()
    swag_iterator = BasicIterator(batch_size=batch_size)
    swag_iterator.index_with(swag_vocab)

    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder

    bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",
                                           top_layer_only=True,
                                           requires_grad=True)

    word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder},
                                            allow_unmatched_keys=True)
    BERT_DIM = word_embedding.get_output_dim()
    seq2vec = PytorchSeq2VecWrapper(
        torch.nn.LSTM(BERT_DIM,
                      HIDDEN_DIM,
                      batch_first=True,
                      bidirectional=True))
    mention_feedforward = FeedForward(input_dim=2336,
                                      num_layers=2,
                                      hidden_dims=150,
                                      activations=torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim=7776,
                                         num_layers=2,
                                         hidden_dims=150,
                                         activations=torch.nn.ReLU())

    model = SWAGExampleModel(vocab=swag_vocab,
                             text_field_embedder=word_embedding,
                             phrase_encoder=seq2vec)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    USE_GPU = 1
    val_iterator = swag_iterator(swag_datasets[1], num_epochs=1, shuffle=True)
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=swag_iterator,
        validation_iterator=swag_iterator,
        train_dataset=swag_datasets[0],
        validation_dataset=swag_datasets[1],
        validation_metric="+accuracy",
        cuda_device=0 if USE_GPU else -1,
        serialization_dir=directory +
        "saved_models/current_run_model_state_swag",
        num_epochs=epochs,
    )

    metrics = trainer.train()
    # save the model
    with open(directory + "saved_models/current_run_model_state", 'wb') as f:
        torch.save(model.state_dict(), f)
    def __init__(self, \
            input_dim,
            query_dim = None,
            projection_dim = 100,
            doc_num_tags = None,
            use_ffnn = True,
            dropout_sent_classifier = 0.0,
            dropout_doc_classifier = 0.0,
            activation = 'tanh',
            loss_reduction = 'sum',
            use_sent_objective = False,
            concat_sent_scores = False,
            sent_definition = None,

            #use_ffnn = True,
            #ffnn_hidden_dim = 50,
            #ffnn_dropout = 0.0,
            #ffnn_activation_fn = 'tanh'
            ):

        super(HierarchicalClassifier, self).__init__()

        self.use_sent_objective = use_sent_objective
        self.concat_sent_scores = concat_sent_scores
        self.sent_definition = sent_definition
        #self.use_ffnn = use_ffnn

        if use_ffnn:
            assert query_dim is not None
            assert doc_num_tags is not None
        else:
            if query_dim is not None:
                logging.warn(f"Overriding query_dim to: {input_dim}")
            query_dim = input_dim

        self.loss_reduction = loss_reduction


        self.word_attention = Attention( \
                                input_dim = input_dim,
                                dropout = 0,
                                use_ffnn = use_ffnn,
                                query_dim = query_dim,
                                activation = activation,
                                )

        if self.use_sent_objective:
            self.sent_classifiers = SentClassifiers( \
                                input_dim = input_dim,
                                num_tags = 2,
                                loss_reduction = loss_reduction,
                                dropout = dropout_sent_classifier,
                                include_projection = True,
                                projection_dim = projection_dim,
                                sent_definition = sent_definition)

            input_dim += self.sent_classifiers.output_dim * int(
                self.concat_sent_scores)


        self.doc_ffnn = FeedForward( \
                input_dim = input_dim,
                num_layers = 1,
                hidden_dims = projection_dim,
                activations = get_activation('tanh'),
                dropout = dropout_doc_classifier)

        self.sent_attention = Attention( \
                                input_dim = projection_dim,
                                dropout = 0,
                                use_ffnn = use_ffnn,
                                query_dim = query_dim,
                                activation = activation,
                                )

        self.output_layer = nn.Linear(projection_dim, doc_num_tags)
Пример #26
0
def multitask_learning():
    # load datasetreader
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory + "/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 10
    max_seq_len = 512
    max_span_width = 30
    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    token_indexer = PretrainedBertIndexer("bert-base-cased",
                                          do_lowercase=False)
    conll_reader = ConllCorefBertReader(
        max_span_width=max_span_width,
        token_indexers={"tokens": token_indexer})
    swag_reader = SWAGDatasetReader(
        tokenizer=token_indexer.wordpiece_tokenizer,
        lazy=True,
        token_indexers=token_indexer)
    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    conll_datasets, swag_datasets = load_datasets(conll_reader, swag_reader,
                                                  directory)
    conll_vocab = Vocabulary()
    swag_vocab = Vocabulary()
    conll_iterator = BasicIterator(batch_size=batch_size)
    conll_iterator.index_with(conll_vocab)

    swag_vocab = Vocabulary()
    swag_iterator = BasicIterator(batch_size=batch_size)
    swag_iterator.index_with(swag_vocab)

    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder

    bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",
                                           top_layer_only=True,
                                           requires_grad=True)

    word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder},
                                            allow_unmatched_keys=True)
    BERT_DIM = word_embedding.get_output_dim()

    seq2seq = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(BERT_DIM,
                      HIDDEN_DIM,
                      batch_first=True,
                      bidirectional=True))
    seq2vec = PytorchSeq2VecWrapper(
        torch.nn.LSTM(BERT_DIM,
                      HIDDEN_DIM,
                      batch_first=True,
                      bidirectional=True))
    mention_feedforward = FeedForward(input_dim=2336,
                                      num_layers=2,
                                      hidden_dims=150,
                                      activations=torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim=7776,
                                         num_layers=2,
                                         hidden_dims=150,
                                         activations=torch.nn.ReLU())
    model1 = CoreferenceResolver(vocab=conll_vocab,
                                 text_field_embedder=word_embedding,
                                 context_layer=seq2seq,
                                 mention_feedforward=mention_feedforward,
                                 antecedent_feedforward=antecedent_feedforward,
                                 feature_size=768,
                                 max_span_width=max_span_width,
                                 spans_per_word=0.4,
                                 max_antecedents=250,
                                 lexical_dropout=0.2)

    model2 = SWAGExampleModel(vocab=swag_vocab,
                              text_field_embedder=word_embedding,
                              phrase_encoder=seq2vec)
    optimizer1 = optim.Adam(model1.parameters(), lr=lr)
    optimizer2 = optim.Adam(model2.parameters(), lr=lr)

    swag_train_iterator = swag_iterator(swag_datasets[0],
                                        num_epochs=1,
                                        shuffle=True)
    conll_train_iterator = conll_iterator(conll_datasets[0],
                                          num_epochs=1,
                                          shuffle=True)
    swag_val_iterator = swag_iterator(swag_datasets[1],
                                      num_epochs=1,
                                      shuffle=True)
    conll_val_iterator: q = conll_iterator(conll_datasets[1],
                                           num_epochs=1,
                                           shuffle=True)
    task_infos = {"swag": {"model": model2, "optimizer": optimizer2, "loss": 0.0, "iterator": swag_iterator, "train_data": swag_datasets[0], "val_data": swag_datasets[1], "num_train": len(swag_datasets[0]), "num_val": len(swag_datasets[1]), "lr": lr, "score": {"accuracy":0.0}}, \
                    "conll": {"model": model1, "iterator": conll_iterator, "loss": 0.0, "val_data": conll_datasets[1], "train_data": conll_datasets[0], "optimizer": optimizer1, "num_train": len(conll_datasets[0]), "num_val": len(conll_datasets[1]),"lr": lr, "score": {"coref_prediction": 0.0, "coref_recall": 0.0, "coref_f1": 0.0,"mention_recall": 0.0}}}
    USE_GPU = 1
    trainer = MultiTaskTrainer(task_infos=task_infos,
                               num_epochs=epochs,
                               serialization_dir=directory +
                               "saved_models/multitask/")
    metrics = trainer.train()
Пример #27
0
 def test_init_checks_activation_consistency(self):
     with pytest.raises(ConfigurationError):
         FeedForward(
             2, 4, 5,
             [Activation.by_name('relu')(),
              Activation.by_name('relu')()])
Пример #28
0
def train_only_lee():
    # This is WORKING!
    # load datasetreader
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory + "/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 100
    max_seq_len = 512
    max_span_width = 30
    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    token_indexer = PretrainedBertIndexer("bert-base-cased",
                                          do_lowercase=False)
    reader = ConllCorefBertReader(max_span_width=max_span_width,
                                  token_indexers={"tokens": token_indexer})

    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    processed_reader_dir = Path(directory + "processed/")

    train_ds = None
    if processed_reader_dir.is_dir():
        print("Loading indexed from checkpoints")
        train_path = Path(directory + "processed/train_d")
        if train_path.exists():
            train_ds = pickle.load(
                open(directory + "processed/conll/train_d", "rb"))
            val_ds = pickle.load(
                open(directory + "processed/conll/val_d", "rb"))
            test_ds = pickle.load(
                open(directory + "processed/conll/test_d", "rb"))
        else:
            print("checkpoints not found")
            train_ds, val_ds, test_ds = (
                reader.read(dataset_folder + fname) for fname in [
                    "train.english.v4_gold_conll", "dev.english.v4_gold_conll",
                    "test.english.v4_gold_conll"
                ])
            pickle.dump(train_ds, open(directory + "processed/train_d", "wb"))
            pickle.dump(val_ds, open(directory + "processed/val_d", "wb"))
            pickle.dump(test_ds, open(directory + "processed/test_d", "wb"))
            print("saved checkpoints")
    # restore checkpoint here

    #vocab = Vocabulary.from_instances(train_ds + val_ds)
    vocab = Vocabulary()
    iterator = BasicIterator(batch_size=batch_size)
    iterator.index_with(vocab)

    val_iterator = BasicIterator(batch_size=batch_size)
    val_iterator.index_with(vocab)
    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder

    bert_embedder = PretrainedBertEmbedder(
        pretrained_model="bert-base-cased",
        top_layer_only=True,  # conserve memory
        requires_grad=True)
    # here, allow_unmatched_key = True since we dont pass in offsets since
    #we allow for word embedings of the bert-tokenized, wnot necessiarly the
    # original tokens
    # see the documetnation for offsets here for more info:
    # https://github.com/allenai/allennlp/blob/master/allennlp/modules/token_embedders/bert_token_embedder.py
    word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder},
                                            allow_unmatched_keys=True)
    BERT_DIM = word_embedding.get_output_dim()
    # at each batch, sample from the two, and load th eLSTM
    shared_layer = torch.nn.LSTM(BERT_DIM,
                                 HIDDEN_DIM,
                                 batch_first=True,
                                 bidirectional=True)
    seq2seq = PytorchSeq2SeqWrapper(shared_layer)
    mention_feedforward = FeedForward(input_dim=2336,
                                      num_layers=2,
                                      hidden_dims=150,
                                      activations=torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim=7776,
                                         num_layers=2,
                                         hidden_dims=150,
                                         activations=torch.nn.ReLU())

    model = CoreferenceResolver(vocab=vocab,
                                text_field_embedder=word_embedding,
                                context_layer=seq2seq,
                                mention_feedforward=mention_feedforward,
                                antecedent_feedforward=antecedent_feedforward,
                                feature_size=768,
                                max_span_width=max_span_width,
                                spans_per_word=0.4,
                                max_antecedents=250,
                                lexical_dropout=0.2)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # and then we can do the shared loss
    #
    # Get
    USE_GPU = 0
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=iterator,
        validation_iterator=val_iterator,
        train_dataset=train_ds,
        validation_dataset=val_ds,
        validation_metric="+coref_f1",
        cuda_device=0 if USE_GPU else -1,
        serialization_dir=directory + "saved_models/only_lee",
        num_epochs=epochs,
    )

    metrics = trainer.train()
    # save the model
    with open(directory + "saved_models/current_run_model_state", 'wb') as f:
        torch.save(model.state_dict(), f)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        tag_feedforward: FeedForward = None,
        arc_feedforward: FeedForward = None,
        pos_tag_embedding: Embedding = None,
        use_mst_decoding_for_validation: bool = True,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or FeedForward(
            encoder_dim, 1, arc_representation_dim,
            Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or FeedForward(
            encoder_dim, 1, tag_representation_dim,
            Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )

        check_dimensions_match(
            tag_representation_dim,
            self.head_tag_feedforward.get_output_dim(),
            "tag representation dim",
            "tag feedforward output dim",
        )
        check_dimensions_match(
            arc_representation_dim,
            self.head_arc_feedforward.get_output_dim(),
            "arc representation dim",
            "arc feedforward output dim",
        )

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()
        initializer(self)
def train_valid_base_text_model(model_name):
    """

    :param model_name: the full model name to use
    :return:
    """
    token_indexer = {"tokens": ELMoTokenCharactersIndexer()}

    def tokenizer(x: str):
        return [
            w.text for w in SpacyWordSplitter(language='en_core_web_sm',
                                              pos_tags=False).split_words(x)
        ]

    reader = TextExpDataSetReader(token_indexers=token_indexer,
                                  tokenizer=tokenizer,
                                  add_numeric_data=False)
    train_instances = reader.read(train_data_file_path)
    validation_instances = reader.read(validation_data_file_path)
    vocab = Vocabulary()

    # TODO: change this if necessary
    # batch_size should be: 10 or 9 depends on the input
    # and not shuffle so all the data of the same pair will be in the same batch
    iterator = BasicIterator(
        batch_size=batch_size)  # , instances_per_epoch=10)
    #  sorting_keys=[('sequence_review', 'list_num_tokens')])
    iterator.index_with(vocab)

    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                   'elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                  'elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

    # TODO: check the output of this
    # elmo_embedder = Elmo(options_file, weight_file, num_output_representations=2)
    # word_embeddings = elmo_embedder
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    review_attention_layer = models.AttentionSoftMaxLayer(
        BilinearMatrixAttention(word_embeddings.get_output_dim(),
                                word_embeddings.get_output_dim()))
    seq_attention_layer = models.AttentionSoftMaxLayer(
        DotProductMatrixAttention())

    feed_forward = FeedForward(input_dim=batch_size,
                               num_layers=2,
                               hidden_dims=[batch_size, 1],
                               activations=ReLU(),
                               dropout=[0.2, 0.0])
    fc_review_rep = FeedForward(input_dim=124,
                                num_layers=1,
                                hidden_dims=[10],
                                activations=ReLU())

    criterion = nn.MSELoss()

    metrics_dict = {
        'mean_absolute_error': MeanAbsoluteError(),
    }

    model = models.BasicTextModel(
        word_embedding=word_embeddings,
        review_representation_layer=review_attention_layer,
        seq_representation_layer=seq_attention_layer,
        vocab=vocab,
        criterion=criterion,
        metrics_dict=metrics_dict,
        classifier_feedforward=feed_forward,
        fc_review_rep=fc_review_rep)

    optimizer = optim.Adam(model.parameters(), lr=0.1)
    num_epochs = 2

    run_log_directory = utils.set_folder(
        datetime.now().strftime(
            f'{model_name}_{num_epochs}_epochs_%d_%m_%Y_%H_%M_%S'), 'logs')

    if not os.path.exists(run_log_directory):
        os.makedirs(run_log_directory)

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=iterator,
        train_dataset=train_instances,
        validation_dataset=validation_instances,
        num_epochs=num_epochs,
        shuffle=False,
        serialization_dir=run_log_directory,
        patience=10,
        histogram_interval=10,
    )

    model_dict = trainer.train()

    print(f'{model_name}: evaluation measures are:')
    for key, value in model_dict.items():
        print(f'{key}: {value}')