Пример #1
0
    def _make_output(self, inputs, params):
        task = params.get('task', 'classification')
        task_type = params.get('task_type', 'multiclass')

        if task == 'classification':
            logits = tf.contrib.layers.fully_connected(inputs, self._num_classes, activation_fn=None, scope='logits')
            if task_type == 'multiclass':
                predictions = tf.cast(tf.argmax(logits, -1), tf.int32)
                output_score = tf.nn.softmax(logits, -1)
            elif task_type == 'multilabel':
                threshold = params.get('threshold', 0.5)
                output_score = tf.sigmoid(logits)
                predictions = tf.cast(tf.greater(output_score, threshold), tf.int32)
            elif task_type == 'topk':
                output_score = logits #tf.nn.softmax(logits, -1)
                predictions = tf.cast(tf.greater(logits, 0), tf.int32) #tf.cast(tf.argmax(logits, -1), tf.int32) #
               # predictions = tf.one_hot(predictions, depth=self._num_classes, axis=-1, dtype=tf.int32)
            else:
                raise ConfigureError("Task type %s is not support for task %s. "
                                     "Only multiclass and multilabel is support for task %s" % (task_type, task, task))
        elif task == 'rank':
            logits = tf.contrib.layers.fully_connected(inputs, 1, activation_fn=None, scope='logits')
            predictions = logits
            output_score = logits
        else:
            raise ConfigureError(
                "Task %s is not support. Only task and classification tasks are supported" % task)

        output_dict = {'logits': logits, 'predictions': {'predictions': predictions, 'output_score': output_score}}
        output_score = tf.estimator.export.PredictOutput(output_score)
        output_predictions = tf.estimator.export.PredictOutput(predictions)
        export_outputs = {"output_score": output_score}
        output_dict['export_outputs'] = export_outputs
        return output_dict
Пример #2
0
    def load_from_files(self, directory):
        if not os.path.exists(directory):
            logger.warning("Vocabulary directory %s does not exist.", directory)
            return False
        namespaces_file = os.path.join(directory, NAMESPACE_PADDING_FILE)
        if not os.path.exists(namespaces_file):
            logger.warning("Vocabulary namespaces file %s does not exist", namespaces_file)
            return False

        vocab_filenames = [filename for filename in os.listdir(directory)
                            if filename.startswith(VOCAB_FILE[:6]) and filename.endswith(VOCAB_FILE[-4:])]
        if len(vocab_filenames) == 0:
            logger.warning("Vocabulary file %s does not exist")

        self._non_padded_namespaces = load_from_txt(namespaces_file)

        for vocab_filename in vocab_filenames:
            namespace = vocab_filename[6:-4]
            vocab_namespace_file = os.path.join(directory, vocab_filename)
            self._namespace_to_path[namespace] = vocab_namespace_file
            vocab_namespace = load_from_txt(vocab_namespace_file)
            self._index_to_token[namespace] = dict((index, token) for index, token in enumerate(vocab_namespace))
            self._token_to_index[namespace] = dict((token, index) for index, token in enumerate(vocab_namespace))

        if self.valid():
            return True
        else:
            raise ConfigureError("Vocabulary valid error")
Пример #3
0
    def _process(self, example):
        #example['label'] = example['label'][0]
        fields: Dict[str, Field] = {}
        if 'premise' in example:
            tokenized_premise = self._tokenizer.tokenize(example['premise'])
            fields["premise"] = TextField(tokenized_premise,
                                          self._token_indexers,
                                          max_length=self._max_length)

        if 'hypothesis' in example:
            tokenized_hypothesis = self._tokenizer.tokenize(
                example['hypothesis'])
            fields["hypothesis"] = TextField(tokenized_hypothesis,
                                             self._token_indexers,
                                             max_length=self._max_length)
        if 'label' in example:
            if isinstance(example['label'], list):
                if self._num_label is None:
                    raise ConfigureError(
                        "the number of labels is not provided for multi-label classification."
                    )
                fields['label'] = MultiLabelField(example['label'],
                                                  num_label=self._num_label)
            else:
                fields['label'] = LabelField(example['label'])
        return Instance(fields)
Пример #4
0
 def pop_choice(self, path, choice, default=None):
     value = self.pop(path, default)
     if value not in choice:
         raise ConfigureError(
             "value %s get by key %s is not in acceptable choices %s" %
             (value, path, str(choice)))
     return value
Пример #5
0
    def init_from_params(cls, params, vocab):
        config_file = params.pop('config_file', None)
        if config_file is None:
            raise ConfigureError(
                "Please provide ELMo config file for ELMo embedding.")
        # weight_file = params.pop('weight_file', None)
        # if weight_file is None:
        #     logger.warning("The ELMo embedding is initialize randomly.")
        encoder_name = params.pop("encoder_name", "elmo")
        vocab_namespace = params.pop('namespace', 'elmo_characters')
        dropout_rate = params.pop_float('dropout_rate', 0.0)

        ckpt_to_initialize_from = params.pop('ckpt_to_initialize_from', None)
        weight_file = params.pop('weight_file', None)
        if ckpt_to_initialize_from is None and weight_file is None:
            logger.warning("The ELMo embedding is initialize randomly.")

        # tmp_dir = params.pop('tmp_dir', None)
        # if tmp_dir is None:
        #     if weight_file:
        #         tmp_dir = os.path.dirname(weight_file)
        #     else:
        #         tmp_dir = "./"

        params.assert_empty(cls.__name__)

        return cls(config_file=config_file,
                   ckpt_to_initialize_from=ckpt_to_initialize_from,
                   dropout_rate=dropout_rate,
                   encoder_name=encoder_name,
                   vocab_namespace=vocab_namespace,
                   weight_file=weight_file)
Пример #6
0
    def init_from_params(cls, params, vocab):
        config_file = params.pop('config_file', None)
        if config_file is None:
            raise ConfigureError("Please provide bert config file for bert embedding.")
        old_vocab_file = params.pop('vocab_file', None)
        if old_vocab_file is None:
            logger.warning("The vocab file is not provided. We consider the embedding vocab is the same as the data "
                           "vocab acquiescently.")
        ckpt_to_initialize_from = params.pop('ckpt_to_initialize_from', None)
        if ckpt_to_initialize_from is None:
            logger.warning("The bert embedding is initialize randomly.")
        num_oov_buckets = params.pop_int("num_oov_buckets", 0)
        use_one_hot_embeddings = params.pop_bool("use_one_hot_embeddings", False)
        encoder_name = params.pop("encoder_name", "bert")
        vocab_namespace = params.pop("namespace", 'tokens')
        mask_namespace = params.pop("mask_namespace", None)
        new_vocab_file = vocab.get_vocab_path(vocab_namespace)
        new_vocab_size = vocab.get_vocab_size(vocab_namespace)
        projection_dim = params.pop_int("projection_dim", None)
        dropout_rate = params.pop_float("dropout_rate", 0.0)
        remove_bos_eos = params.pop_bool("remove_bos_eos", True)
        params.assert_empty(cls.__name__)

        return cls(config_file=config_file, ckpt_to_initialize_from=ckpt_to_initialize_from,
                   new_vocab_file=new_vocab_file, new_vocab_size=new_vocab_size, num_oov_buckets= num_oov_buckets,
                   old_vocab_file=old_vocab_file, vocab_namespace=vocab_namespace,
                   remove_bos_eos = remove_bos_eos,
                   mask_namespace=mask_namespace, projection_dim=projection_dim, dropout_rate=dropout_rate,
                   use_one_hot_embeddings=use_one_hot_embeddings, encoder_name=encoder_name)
Пример #7
0
 def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str,
                                                                   int]]):
     if token.text is None:
         raise ConfigureError(
             'CharactersIndexer needs a tokenizer that retains text')
     for character in self._character_tokenizer.tokenize(token.text):
         # If `text_id` is set on the character token (e.g., if we're using byte encoding), we
         # will not be using the vocab for this character.
         if getattr(character, 'text_id', None) is None:
             counter[self._namespace][character.text] += 1
Пример #8
0
    def _read(self, mode: str):
        filename = self.get_filename_by_mode(mode)
        if filename:
            file_path = os.path.join(self._data_path, filename)
            if file_path.lower().endswith("jsonl"):
                if self._field_mapping is None:
                    raise ConfigureError(
                        "field mapping is not provided for jsonl file.")
                with open(file_path, 'r') as json_file:
                    logger.info("Reading instances from jsonl dataset at: %s",
                                file_path)
                    for line in json_file:
                        fields = json.loads(line)
                        example = {}
                        for (field_tar,
                             field_src) in self._field_mapping.items():
                            example[field_tar] = fields[field_src]
                        yield self._process(example)

                        # example = {}
                        # example['premise'] = fields['answer']
                        # example['hypothesis'] = fields['question']
                        # example['label'] = fields['label']
                        # yield self._process(example)

            if file_path.lower().endswith("tsv"):
                if self._field_mapping is None:
                    raise ConfigureError(
                        "field mapping is not provided for tsv file.")
                with open(file_path, 'r') as csv_file:
                    logger.info("Reading instances from tsv dataset at: %s",
                                file_path)
                    for line in csv_file:
                        fields = line.strip().split("\t")
                        example = {}
                        for (field_tar,
                             field_src) in self._field_mapping.items():
                            example[field_tar] = fields[int(field_src)]
                        yield self._process(example)

        else:
            return None
Пример #9
0
def takes_arg(obj, arg: str) -> bool:
    """
Checks whether the provided obj takes a certain arg.
If it's a class, we're really checking whether its constructor does.
If it's a function or method, we're checking the object itself.
Otherwise, we raise an error.
"""
    if inspect.isclass(obj):
        signature = inspect.signature(obj.__init__)
    elif inspect.ismethod(obj) or inspect.isfunction(obj):
        signature = inspect.signature(obj)
    else:
        raise ConfigureError(f"object {obj} is not callable")
    return arg in signature.parameters
Пример #10
0
    def _make_loss(self, logits, labels, params):
        task = params.get('task', 'classification')
        task_type = params.get('task_type', 'multiclass')
        if task == 'classification':
            if task_type == 'multiclass':
                #loss = GHM_Loss().ghm_class_loss(logits=logits, targets=labels)
                loss = focal_loss(logits=logits, labels=labels)
                # loss = tf.reduce_mean(
                #     tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits))
            elif task_type == 'multilabel':
                loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits))
            elif task_type == 'topk':
                loss = multilabel_categorical_crossentropy(labels=labels, logits=logits)
            else:
                raise ConfigureError("Task type %s is not support for task %s. "
                                     "Only multiclass and multilabel is support for task %s" % (task_type, task, task))

        elif task == 'rank':
            loss = rank_hinge_loss(labels=labels, logits=logits, params=params)
        else:
            raise ConfigureError(
                "Task %s is not support. Only task and classification tasks are supported" % task)
        return loss
Пример #11
0
def _read_pretrained_embeddings_text(pretrained_file, embedding_dim, vocab,
                                     vocab_namespace):
    vocab_tokens = vocab.get_vocab_tokens(vocab_namespace)
    vocab_size = vocab.get_vocab_size(vocab_namespace)
    embeddings = {}
    logger.info("Reading pretrained embeddings from: %s" % pretrained_file)
    with open(pretrained_file, 'r', encoding='utf-8') as embeddings_file:
        for line in tqdm.tqdm(embeddings_file):
            token = line.split(" ", 1)[0]
            if token in vocab_tokens:
                fields = line.rstrip().split(' ')
                if len(fields) - 1 != embedding_dim:

                    logger.warning(
                        "Found line with wrong number of dimensions (expected: %d; actual: %d): %s",
                        embedding_dim,
                        len(fields) - 1, line)
                    continue

                vector = np.asarray(fields[1:], dtype='float32')
                embeddings[token] = vector

    if not embeddings:
        ConfigureError(
            "The embedding_dim or vocabulary does not fit the pretrained embedding."
        )
    all_embeddings = np.asarray(list(embeddings.values()))
    embeddings_mean = float(np.mean(all_embeddings))
    embeddings_std = float(np.std(all_embeddings))
    embedding_matrix = np.random.normal(embeddings_mean, embeddings_std,
                                        (vocab_size, embedding_dim))
    embedding_matrix = embedding_matrix.astype(np.float32)
    num_tokens_found = 0
    index_to_tokens = vocab.get_vocab_index_to_token(vocab_namespace)
    for i in range(vocab_size):
        token = index_to_tokens[i]
        if token in embeddings:
            embedding_matrix[i] = embeddings[token]
            num_tokens_found += 1
        else:
            logger.debug(
                "Token %s was not found in the embedding file. Initialising randomly.",
                token)

    logger.info("Pretrained embeddings were found for %d out of %d tokens",
                num_tokens_found, vocab_size)
    return embedding_matrix
Пример #12
0
def rank_hinge_loss(labels, logits, params):
    num_retrieval = params.get('num_retrieval', None)
    if num_retrieval is None:
        raise ConfigureError(
            "The parameter num_retrieval is not assigned or the dataset is not support rank loss."
        )
    margin = params.get('rank_loss_margin', 1.0)
    labels = tf.argmax(labels, axis=-1)
    labels = tf.reshape(labels, (-1, num_retrieval))
    logits = tf.reshape(logits, (-1, num_retrieval))
    label_mask = tf.cast(tf.sign(labels), tf.float32)
    label_count = tf.reduce_sum(label_mask, axis=-1)
    y_pos = tf.reduce_sum(label_mask * logits, axis=-1) / label_count
    y_neg = tf.reduce_sum(
        (1. - label_mask) * logits, axis=-1) / (num_retrieval - label_count)
    loss = tf.maximum(0., margin - y_pos + y_neg)
    loss = tf.reduce_mean(loss)
    return loss
Пример #13
0
    def tokens_to_indices(self, tokens: List[Token], vocabulary: Vocabulary):
        # TODO(brendanr): Retain the token to index mappings in the vocabulary and remove this
        # pylint pragma. See:
        # https://github.com/allenai/allennlp/blob/master/allennlp/data/token_indexers/wordpiece_indexer.py#L113
        # pylint: disable=unused-argument

        texts = [token.text for token in tokens]
        texts = [ELMoCharacterMapper.bos_token
                 ] + texts + [ELMoCharacterMapper.eos_token]
        if any(text is None for text in texts):
            raise ConfigureError(
                'ELMoTokenCharactersIndexer needs a tokenizer '
                'that retains text')
        return {
            self._namespace: [
                np.array(ELMoCharacterMapper.convert_word_to_char_ids(text),
                         dtype=np.int64) for text in texts
            ]
        }
Пример #14
0
    def init_from_params(cls, params, vocab):
        token_embedder_params = params.pop('encoders', None)

        if token_embedder_params is not None:
            token_embedders = [
                Encoder.init_from_params(subparams, vocab=vocab)
                for name, subparams in token_embedder_params.items()
            ]
            # if isinstance(token_embedder_params, Dict):
            #
            # else:
            #     token_embedders = [
            #         Encoder.init_from_params(subparams, vocab=vocab)
            #         for subparams in token_embedder_params
            #     ]
        else:
            raise ConfigureError("The parameters of embeddings is not provided.")

        params.assert_empty(cls.__name__)
        return cls(token_embedders)
Пример #15
0
 def tokens_to_indices(self, tokens: List[Token], vocabulary: Vocabulary):
     indices = []
     for token in itertools.chain(self._start_tokens, tokens,
                                  self._end_tokens):
         token_indices = np.zeros(self._max_word_length, dtype=np.int64)
         if token.text is None:
             raise ConfigureError(
                 'TokenCharactersIndexer needs a tokenizer that retains text'
             )
         for character_idx, character in enumerate(
                 self._character_tokenizer.tokenize(token.text)):
             if character_idx >= self._max_word_length:
                 break
             else:
                 if getattr(character, 'text_id', None) is not None:
                     # `text_id` being set on the token means that we aren't using the vocab, we just
                     # use this id instead.
                     index = character.text_id
                 else:
                     index = vocabulary.get_token_index(
                         character.text, self._namespace)
                 token_indices[character_idx] = index
         indices.append(token_indices)
     return {self._namespace: indices}
Пример #16
0
 def assert_empty(self, class_name):
     if self._params:
         raise ConfigureError(
             "Extra parameters are provided %s for class %s" %
             (str(self._params), class_name))
Пример #17
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            h_s, c1 = nn.lstm(premise_tokens,
                              self._hidden_dim,
                              seq_len=prem_seq_lengths,
                              name='premise')
            h_t, c2 = nn.lstm(hypothesis_tokens,
                              self._hidden_dim,
                              seq_len=hyp_seq_lengths,
                              name='hypothesis')

        lstm_m = MatchLSTMCell(self._hidden_dim, h_s, prem_mask)

        k_m, _ = tf.nn.dynamic_rnn(lstm_m,
                                   h_t,
                                   hyp_seq_lengths,
                                   dtype=tf.float32)

        k_valid = select(k_m, hyp_seq_lengths)
        output_dict = self._make_output(k_valid, params)

        if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
            if 'label/labels' not in features:
                raise ConfigureError(
                    "The input features should contain label with vocabulary namespace "
                    "labels int %s dataset." % mode)
            labels_embedding = features_embedding['label/labels']
            labels = features['label/labels']

            loss = self._make_loss(labels=labels_embedding,
                                   logits=output_dict['logits'],
                                   params=params)
            output_dict['loss'] = loss
            metrics = dict()
            metrics['accuracy'] = tf.metrics.accuracy(
                labels=labels, predictions=output_dict['predictions'])
            metrics['precision'] = tf.metrics.precision(
                labels=labels, predictions=output_dict['predictions'])
            metrics['recall'] = tf.metrics.recall(
                labels=labels, predictions=output_dict['predictions'])
            # metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)
            output_dict['metrics'] = metrics
            # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
            #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
        return output_dict
Пример #18
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)

            premise_outs, c1 = nn.bi_lstm(premise_tokens,
                                          self._hidden_dim,
                                          seq_len=prem_seq_lengths,
                                          name='premise')

            premise_bi = tf.concat(premise_outs, axis=2)

            premise_bi = premise_bi * prem_mask

            eps = 1e-11
            ### Mean pooling
            premise_sum = tf.reduce_sum(premise_bi, 1)
            premise_ave = tf.div(
                premise_sum,
                tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1) +
                eps)

            # MLP layer
            h_mlp = tf.contrib.layers.fully_connected(premise_ave,
                                                      self._hidden_dim,
                                                      scope='fc1')
            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            output_dict = self._make_output(h_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['map'] = tf.metrics.average_precision_at_k(
                    labels=tf.cast(labels, tf.int64),
                    predictions=output_dict['logits'],
                    k=2)
                metrics['precision_1'] = tf.metrics.precision_at_k(
                    labels=tf.cast(labels, tf.int64),
                    predictions=output_dict['logits'],
                    k=1,
                    class_id=1)

                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Пример #19
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            # prem_mask = tf.expand_dims(prem_mask, -1)
            # hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            with tf.variable_scope("Attend"):
                F_a_bar = self._feedForwardBlock(premise_tokens,
                                                 self._hidden_dim,
                                                 'F',
                                                 is_training=is_training)
                F_b_bar = self._feedForwardBlock(hypothesis_tokens,
                                                 self._hidden_dim,
                                                 'F',
                                                 isReuse=True,
                                                 is_training=is_training)

                # e_i,j = F'(a_hat, b_hat) = F(a_hat).T * F(b_hat) (1)
                #alignment_attention = Attention(self.hidden_size, self.hidden_size)
                #alpha = alignment_attention(F_b_bar, F_a_bar, keys_mask=self.query_mask)
                #beta = alignment_attention(F_a_bar, F_b_bar, keys_mask=self.doc_mask)
                alpha, beta = nn.bi_uni_attention(F_a_bar,
                                                  F_b_bar,
                                                  query_len=prem_seq_lengths,
                                                  key_len=hyp_seq_lengths)

            with tf.variable_scope("Compare"):
                a_beta = tf.concat([premise_tokens, alpha], axis=2)
                b_alpha = tf.concat([hypothesis_tokens, beta], axis=2)

                # v_1,i = G([a_bar_i, beta_i])
                # v_2,j = G([b_bar_j, alpha_j]) (3)
                v_1 = self._feedForwardBlock(a_beta,
                                             self._hidden_dim,
                                             'G',
                                             is_training=is_training)
                v_2 = self._feedForwardBlock(b_alpha,
                                             self._hidden_dim,
                                             'G',
                                             isReuse=True,
                                             is_training=is_training)

            with tf.variable_scope("Aggregate"):
                # v1 = \sum_{i=1}^l_a v_{1,i}
                # v2 = \sum_{j=1}^l_b v_{2,j} (4)
                v1_sum = tf.reduce_sum(v_1, axis=1)
                v2_sum = tf.reduce_sum(v_2, axis=1)

                # y_hat = H([v1, v2]) (5)
                v = tf.concat([v1_sum, v2_sum], axis=1)

                ff_outputs = self._feedForwardBlock(v,
                                                    self._hidden_dim,
                                                    'H',
                                                    is_training=is_training)

                output_dict = self._make_output(ff_outputs, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                #metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [tf.shape(hypothesis_tokens), tf.shape(premise_tokens),
                #                          tf.shape(alpha), tf.shape(beta)]
            return output_dict
Пример #20
0
    def forward(self, features, labels, mode, params):
        eps = 1e-12
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            s = self._max_length
            d0 = premise_tokens.get_shape()[2]

            # zero padding to inputs for wide convolution

            def pad_for_wide_conv(x):
                return tf.pad(
                    x,
                    np.array([[0, 0], [0, 0],
                              [self._kernel_size - 1, self._kernel_size - 1],
                              [0, 0]]),
                    "CONSTANT",
                    name="pad_wide_conv")

            def cos_sim(v1, v2):
                norm1 = tf.sqrt(tf.reduce_sum(tf.square(v1), axis=1))
                norm2 = tf.sqrt(tf.reduce_sum(tf.square(v2), axis=1))
                dot_products = tf.reduce_sum(v1 * v2, axis=1, name="cos_sim")

                return dot_products / (norm1 * norm2 + eps)

            def make_attention_mat(x1, x2):
                # x1, x2 = [batch, height, width, 1] = [batch, d, s, 1]
                # x2 => [batch, height, 1, width]
                # [batch, width, wdith] = [batch, s, s]
                euclidean = tf.sqrt(
                    tf.reduce_sum(tf.square(x1 - tf.matrix_transpose(x2)),
                                  axis=1) + eps)
                return 1.0 / (1.0 + euclidean)

            def convolution(name_scope, x, d, reuse):
                with tf.name_scope(name_scope + "-conv"):
                    with tf.variable_scope("conv") as scope:
                        conv = tf.contrib.layers.conv2d(
                            inputs=x,
                            num_outputs=self._hidden_dim,
                            kernel_size=(d, self._kernel_size),
                            stride=1,
                            padding="VALID",
                            activation_fn=tf.nn.tanh,
                            weights_initializer=tf.contrib.layers.
                            xavier_initializer_conv2d(),
                            #weights_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg),
                            biases_initializer=tf.constant_initializer(1e-04),
                            reuse=reuse,
                            trainable=True,
                            scope=scope)
                        # Weight: [filter_height, filter_width, in_channels, out_channels]
                        # output: [batch, 1, input_width+filter_Width-1, out_channels] == [batch, 1, s+w-1, di]

                        # [batch, di, s+w-1, 1]
                        conv_trans = tf.transpose(conv, [0, 3, 2, 1],
                                                  name="conv_trans")
                        return conv_trans

            def w_pool(variable_scope, x, attention):
                # x: [batch, di, s+w-1, 1]
                # attention: [batch, s+w-1]
                with tf.variable_scope(variable_scope + "-w_pool"):
                    if self._model_type == "ABCNN2" or self._model_type == "ABCNN3":
                        pools = []
                        # [batch, s+w-1] => [batch, 1, s+w-1, 1]
                        attention = tf.transpose(
                            tf.expand_dims(tf.expand_dims(attention, -1), -1),
                            [0, 2, 1, 3])

                        for i in range(s):
                            # [batch, di, w, 1], [batch, 1, w, 1] => [batch, di, 1, 1]
                            pools.append(
                                tf.reduce_sum(
                                    x[:, :, i:i + self._kernel_size, :] *
                                    attention[:, :,
                                              i:i + self._kernel_size, :],
                                    axis=2,
                                    keep_dims=True))

                        # [batch, di, s, 1]
                        w_ap = tf.concat(pools, axis=2, name="w_ap")
                    else:
                        w_ap = tf.layers.average_pooling2d(
                            inputs=x,
                            # (pool_height, pool_width)
                            pool_size=(1, self._kernel_size),
                            strides=1,
                            padding="VALID",
                            name="w_ap")
                        # [batch, di, s, 1]

                    return w_ap

            def all_pool(variable_scope, x):
                with tf.variable_scope(variable_scope + "-all_pool"):
                    if variable_scope.startswith("input"):
                        pool_width = s
                        d = d0
                    else:
                        pool_width = s + self._kernel_size - 1
                        d = self._hidden_dim

                    all_ap = tf.layers.average_pooling2d(
                        inputs=x,
                        # (pool_height, pool_width)
                        pool_size=(1, pool_width),
                        strides=1,
                        padding="VALID",
                        name="all_ap")
                    # [batch, di, 1, 1]

                    # [batch, di]
                    all_ap_reshaped = tf.reshape(all_ap, [-1, d])
                    # all_ap_reshaped = tf.squeeze(all_ap, [2, 3])

                    return all_ap_reshaped

            def CNN_layer(variable_scope, x1, x2, d):
                # x1, x2 = [batch, d, s, 1]
                with tf.variable_scope(variable_scope):
                    if self._model_type == "ABCNN1" or self._model_type == "ABCNN3":
                        with tf.name_scope("att_mat"):
                            aW = tf.get_variable(
                                name="aW",
                                shape=(s, d),
                                initializer=tf.contrib.layers.
                                xavier_initializer(),
                                #regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg)
                            )

                            # [batch, s, s]
                            att_mat = make_attention_mat(x1, x2)

                            # [batch, s, s] * [s,d] => [batch, s, d]
                            # matrix transpose => [batch, d, s]
                            # expand dims => [batch, d, s, 1]
                            x1_a = tf.expand_dims(
                                tf.matrix_transpose(
                                    tf.einsum("ijk,kl->ijl", att_mat, aW)), -1)
                            x2_a = tf.expand_dims(
                                tf.matrix_transpose(
                                    tf.einsum("ijk,kl->ijl",
                                              tf.matrix_transpose(att_mat),
                                              aW)), -1)

                            # [batch, d, s, 2]
                            x1 = tf.concat([x1, x1_a], axis=3)
                            x2 = tf.concat([x2, x2_a], axis=3)

                    left_conv = convolution(name_scope="left",
                                            x=pad_for_wide_conv(x1),
                                            d=d,
                                            reuse=False)
                    right_conv = convolution(name_scope="right",
                                             x=pad_for_wide_conv(x2),
                                             d=d,
                                             reuse=True)

                    left_attention, right_attention = None, None

                    if self._model_type == "ABCNN2" or self._model_type == "ABCNN3":
                        # [batch, s+w-1, s+w-1]
                        att_mat = make_attention_mat(left_conv, right_conv)
                        # [batch, s+w-1], [batch, s+w-1]
                        left_attention, right_attention = tf.reduce_sum(
                            att_mat, axis=2), tf.reduce_sum(att_mat, axis=1)

                    left_wp = w_pool(variable_scope="left",
                                     x=left_conv,
                                     attention=left_attention)
                    left_ap = all_pool(variable_scope="left", x=left_conv)
                    right_wp = w_pool(variable_scope="right",
                                      x=right_conv,
                                      attention=right_attention)
                    right_ap = all_pool(variable_scope="right", x=right_conv)

                    return left_wp, left_ap, right_wp, right_ap

            x1_expanded = tf.expand_dims(
                tf.transpose(premise_tokens, [0, 2, 1]), -1)
            x2_expanded = tf.expand_dims(
                tf.transpose(hypothesis_tokens, [0, 2, 1]), -1)

            LO_0 = all_pool(variable_scope="input-left", x=x1_expanded)
            RO_0 = all_pool(variable_scope="input-right", x=x2_expanded)

            LI_1, LO_1, RI_1, RO_1 = CNN_layer(variable_scope="CNN-1",
                                               x1=x1_expanded,
                                               x2=x2_expanded,
                                               d=d0)
            sims = [cos_sim(LO_0, RO_0), cos_sim(LO_1, RO_1)]

            #if self._num_layers > 1:
            for i in range(1, self._num_layers):
                _, LO_2, _, RO_2 = CNN_layer(variable_scope="CNN-2",
                                             x1=LI_1,
                                             x2=RI_1,
                                             d=self._hidden_dim)
                # self.test = LO_2
                # self.test2 = RO_2
                sims.append(cos_sim(LO_2, RO_2))

            with tf.variable_scope("output-layer"):
                output_features = tf.concat([tf.stack(sims, axis=1)],
                                            axis=1,
                                            name="output_features")

                output_dict = self._make_output(output_features, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['auc'] = tf.metrics.auc(
                    labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
            return output_dict
Пример #21
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            #########Word Embedding####################
            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_ins = []
            hypothesis_ins = []

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            premise_ins.append(premise_tokens)
            hypothesis_ins.append(hypothesis_tokens)

            premise_chars = features_embedding.get('premise/chars', None)
            hypothesis_chars = features_embedding.get('hypothesis/chars', None)

            if premise_chars is not None and hypothesis_chars is not None:
                with tf.variable_scope("conv") as scope:
                    conv_pre = nn.multi_conv1d_max(
                        premise_chars,
                        self._char_filter_size,
                        self._char_filter_channel_dims,
                        "VALID",
                        is_training,
                        self._dropout_rate,
                        scope='conv')
                    scope.reuse_variables()
                    conv_hyp = nn.multi_conv1d_max(
                        hypothesis_chars,
                        self._char_filter_size,
                        self._char_filter_channel_dims,
                        "VALID",
                        is_training,
                        self._dropout_rate,
                        scope='conv')
                    # conv_pre = tf.reshape(conv_pre, [-1, self.sequence_length, config.char_out_size])
                    # conv_hyp = tf.reshape(conv_hyp, [-1, self.sequence_length, config.char_out_size])

                    premise_ins.append(conv_pre)
                    hypothesis_ins.append(conv_hyp)

            premise_pos = features_embedding.get('premise/pos_tags', None)
            hypothesis_pos = features_embedding.get('hypothesis/pos_tags',
                                                    None)

            if premise_pos is not None and hypothesis_pos is not None:
                premise_ins.append(premise_pos)
                hypothesis_ins.append(hypothesis_pos)

            premise_exact_match = features.get('premise/exact_match_labels',
                                               None)
            hypothesis_exact_match = features.get(
                'hypothesis/exact_match_labels', None)

            if premise_exact_match is not None and hypothesis_exact_match is not None:
                premise_ins.append(
                    tf.expand_dims(tf.cast(premise_exact_match, tf.float32),
                                   -1))
                hypothesis_ins.append(
                    tf.expand_dims(tf.cast(hypothesis_exact_match, tf.float32),
                                   -1))

            premise_in = tf.concat(premise_ins, axis=2)
            hypothesis_in = tf.concat(hypothesis_ins, axis=2)

            premise_in = nn.highway_network(premise_in,
                                            2,
                                            output_size=self._hidden_dim,
                                            dropout_rate=self._dropout_rate,
                                            is_trainging=is_training,
                                            scope="premise_highway")
            hypothesis_in = nn.highway_network(hypothesis_in,
                                               2,
                                               output_size=self._hidden_dim,
                                               dropout_rate=self._dropout_rate,
                                               is_trainging=is_training,
                                               scope="hypothesis_highway")

            ########Attention Stack-GRU################
            def gru_network(input, input_len, name="gru_network"):
                with tf.variable_scope(name):
                    gru_input = input
                    for i in range(self._num_rnn_layer):
                        with tf.variable_scope("layer_%s" % i):
                            seq, c1 = nn.gru(gru_input,
                                             self._hidden_dim,
                                             seq_len=input_len,
                                             initializer=self._initializer)
                            gru_input = tf.concat([gru_input, seq], axis=2)
                return gru_input

            premise_gru = gru_network(premise_in,
                                      prem_seq_lengths,
                                      name='premise_gru_network')
            hypothesis_gru = gru_network(hypothesis_in,
                                         hyp_seq_lengths,
                                         name='hypothesis_gru_network')

            premise_gru = premise_gru * prem_mask
            hypothesis_gru = hypothesis_gru * hyp_mask
            #########
            premise_att = nn.attention_pool(premise_gru,
                                            self._hidden_dim,
                                            seq_len=prem_seq_lengths,
                                            initializer=self._initializer,
                                            name='premise_attention_pool')
            hypothesis_att = nn.attention_pool(
                hypothesis_gru,
                self._hidden_dim,
                seq_len=hyp_seq_lengths,
                initializer=self._initializer,
                name='hypothesis_attention_pool')

            ############Dynamic Re-read Mechanism################

            def dynamic_reread(h_seq_a,
                               h_a,
                               h_b,
                               h_a_len,
                               name="dymanic_reread"):
                with tf.variable_scope(name):
                    h_a_pre = h_a
                    # h_a_pre = nn.highway_layer(h_a, self._hidden_dim, initializer=self._initializer,
                    #                            scope="h_a_pre_highway")
                    # h_seq_a = nn.highway_layer(h_seq_a, self._hidden_dim, initializer=self._initializer,
                    #                            scope="h_seq_a_highway")
                    # h_b = nn.highway_layer(h_b, self._hidden_dim, initializer=self._initializer,
                    #                        scope="h_b_highway")
                    #####
                    w_d = tf.get_variable(
                        "w_d_weights",
                        (h_seq_a.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    u_d = tf.get_variable(
                        "u_d_weights",
                        (h_a_pre.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    m_d = tf.get_variable(
                        "m_d_weights",
                        (h_b.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    omega_d = tf.get_variable("omega_d_weights",
                                              (h_a_pre.shape[-1].value, 1),
                                              initializer=self._initializer)
                    ##########
                    m_d_h_b = tf.tensordot(h_b, m_d, axes=[-1, 0])
                    h_seq_a_w_d = tf.tensordot(h_seq_a, w_d, axes=[-1, 0])

                    if h_a_len is not None:
                        mask = tf.expand_dims(tf.sequence_mask(
                            h_a_len, tf.shape(h_seq_a)[1], dtype=tf.float32),
                                              axis=2)
                    else:
                        mask = None
                    gru_cell = tf.nn.rnn_cell.GRUCell(
                        h_a_pre.shape[-1].value,
                        kernel_initializer=self._initializer)

                    for i in range(self._reread_length):
                        u_d_h_a_pre = tf.tensordot(h_a_pre, u_d, axes=[-1, 0])
                        m_a = tf.nn.tanh(
                            h_seq_a_w_d +
                            tf.expand_dims(m_d_h_b + u_d_h_a_pre, 1))
                        m_a = tf.tensordot(m_a, omega_d, axes=[-1, 0])
                        if mask is not None:
                            m_a = m_a + (1. - mask) * tf.float32.min
                        alpha = tf.nn.softmax(self._beta * m_a, axis=1)
                        alpha = tf.reduce_sum(alpha * h_seq_a, axis=1)
                        gru_output, gru_state = gru_cell(alpha, h_a_pre)
                        h_a_pre = gru_state
                    return gru_output

            premise_v = dynamic_reread(premise_gru,
                                       premise_att,
                                       hypothesis_att,
                                       prem_seq_lengths,
                                       name='premise_dynamic_reread')
            hypothesis_v = dynamic_reread(hypothesis_gru,
                                          hypothesis_att,
                                          premise_att,
                                          hyp_seq_lengths,
                                          name='hypothesis_dynamic_reread')

            ########label prediction##############

            h = tf.concat([
                premise_att, hypothesis_att, hypothesis_att * premise_att,
                hypothesis_att - premise_att
            ],
                          axis=-1)
            v = tf.concat([
                premise_v, hypothesis_v, hypothesis_v * premise_v,
                hypothesis_v - premise_v
            ],
                          axis=-1)

            # h MLP layer
            h_mlp = tf.layers.dense(h,
                                    self._hidden_dim,
                                    activation=tf.nn.relu,
                                    kernel_initializer=self._initializer,
                                    name='h_fc1')
            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            h_logits = tf.layers.dense(h_drop,
                                       self._num_classes,
                                       activation=None,
                                       kernel_initializer=self._initializer,
                                       name='h_logits')

            p_h = tf.nn.softmax(h_logits)

            # # MLP layer
            v_mlp = tf.layers.dense(v,
                                    self._hidden_dim,
                                    activation=tf.nn.relu,
                                    kernel_initializer=self._initializer,
                                    name='v_fc1')
            # Dropout applied to classifier
            v_drop = tf.layers.dropout(v_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            v_logits = tf.layers.dense(v_drop,
                                       self._num_classes,
                                       activation=None,
                                       kernel_initializer=self._initializer,
                                       name='v_logits')

            p_v = tf.nn.softmax(v_logits)
            ####
            alpha_h = tf.layers.dense(h,
                                      1,
                                      activation=tf.nn.sigmoid,
                                      kernel_initializer=self._initializer,
                                      bias_initializer=tf.zeros_initializer())
            alpha_v = tf.layers.dense(v,
                                      1,
                                      activation=tf.nn.sigmoid,
                                      kernel_initializer=self._initializer,
                                      bias_initializer=tf.zeros_initializer())
            # # h MLP layer
            fuse_mlp = tf.layers.dense(alpha_h * h + alpha_v * v,
                                       self._hidden_dim,
                                       activation=tf.nn.relu,
                                       kernel_initializer=self._initializer,
                                       name='fuse_fc1')
            # Dropout applied to classifier
            fuse_drop = tf.layers.dropout(fuse_mlp,
                                          self._dropout_rate,
                                          training=is_training)
            #Get prediction
            output_dict = self._make_output(fuse_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                h_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=labels_embedding, logits=h_logits))
                v_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=labels_embedding, logits=v_logits))
                fuse_loss = self._make_loss(labels=labels_embedding,
                                            logits=output_dict['logits'],
                                            params=params)

                output_dict['loss'] = v_loss + h_loss + fuse_loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Пример #22
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            # 2.Input Encoder
            # 2.1 Highway Encoder
            query_emb = premise_tokens
            doc_emb = hypothesis_tokens
            query_len = prem_seq_lengths
            doc_len = hyp_seq_lengths
            query_mask = prem_mask
            doc_mask = hyp_mask
            project_dim = premise_tokens.shape[-1].value
            query_length = tf.shape(premise_tokens)[1]
            doc_length = tf.shape(hypothesis_tokens)[1]

            query_output = nn.highway_network(query_emb, 1, dropout_rate=self._dropout_rate, is_trainging=is_training,
                                              scope="query_highway")
            doc_output = nn.highway_network(doc_emb, 1, dropout_rate=self._dropout_rate, is_trainging=is_training,
                                            scope="doc_highway")

            # # 2.2 Co-Attention
            M = tf.Variable(tf.random_normal([project_dim, project_dim], stddev=0.1))
            tmp = tf.einsum("ijk,kl->ijl", query_output, M)
            S = tf.matmul(tmp, doc_output, transpose_b=True)  # [batch, q, d]
            S_mask = tf.matmul(query_mask, doc_mask, transpose_b=True)
            S_mean = S * S_mask #
            S_align_max = S + (1. - S_mask) * tf.float32.min

            # 2.2.1 Extractive Pooling
            # Max Pooling
            query_score = tf.nn.softmax(tf.reduce_max(S_align_max, axis=2, keepdims=True), axis=1)
            query_maxpooling = tf.reduce_sum(query_score * query_output, axis=1) # [batch, r]

            doc_score = tf.nn.softmax(tf.reduce_max(S_align_max, axis=1, keepdims=True), axis=2)
            doc_maxpooling = tf.reduce_sum(tf.transpose(doc_score, [0, 2, 1]) * doc_output, axis=1) # [batch, r]

            # Mean Pooling
            query_score = tf.nn.softmax(tf.reduce_sum(S_mean, axis=2, keepdims=True)/(tf.expand_dims(tf.expand_dims(tf.cast(doc_len, tf.float32)+self._eps, -1), -1)), axis=1)
            query_meanpooling = tf.reduce_sum(query_score * query_output, axis=1)  # [batch, r]
            doc_score = tf.nn.softmax(tf.reduce_sum(S_mean, axis=1, keepdims=True)/(tf.expand_dims(tf.expand_dims(tf.cast(query_len, tf.float32)+self._eps, -1), -1)), axis=2)
            doc_meanpooling = tf.reduce_sum(tf.transpose(doc_score, [0, 2, 1]) * doc_output, axis=1)  # [batch, r]

            # 2.2.2 Alignment Pooling
            query_alignment = tf.matmul(tf.nn.softmax(S_align_max, axis=2), doc_output)  # [batch, q, r]
            doc_alignment = tf.matmul(tf.nn.softmax(S_align_max, axis=1), query_output, transpose_a=True)  # [batch, d, r]

            # 2.2.3 Intra Attention
            query_selfattn = nn.self_attention(query_output, query_len)
            doc_selfattn = nn.self_attention(doc_output, doc_len)

            # 2.3 Multi-Cast Attention
            query_maxpooling = tf.tile(tf.expand_dims(query_maxpooling, axis=1), [1, query_length, 1])
            query_meanpooling = tf.tile(tf.expand_dims(query_meanpooling, axis=1), [1, query_length, 1])
            doc_maxpooling = tf.tile(tf.expand_dims(doc_maxpooling, axis=1), [1, doc_length, 1])
            doc_meanpooling = tf.tile(tf.expand_dims(doc_meanpooling, axis=1), [1, doc_length, 1])

            query_max_fc, query_max_fm, query_max_fs = self.cast_attention(query_maxpooling, query_emb, self.nn_fc, name="query_max_pooling")
            query_mean_fc, query_mean_fm, query_mean_fs = self.cast_attention(query_meanpooling, query_emb, self.nn_fc, name="query_mean_pooling")
            query_align_fcm, query_align_fm, query_align_fs = self.cast_attention(query_alignment, query_emb, self.nn_fc, name="query_align_pooling")
            query_selfattn_fc, query_selfattn_fm, query_selfattn_fs = self.cast_attention(query_selfattn, query_emb, self.nn_fc, name="query_self_pooling")

            doc_max_fc, doc_max_fm, doc_max_fs = self.cast_attention(doc_maxpooling, doc_emb, self.nn_fc, name="doc_max_pooling")
            doc_mean_fc, doc_mean_fm, doc_mean_fs = self.cast_attention(doc_meanpooling, doc_emb, self.nn_fc, name="doc_mean_pooling")
            doc_align_fcm, doc_align_fm, doc_align_fs = self.cast_attention(doc_alignment, doc_emb, self.nn_fc, name="doc_align_pooling")
            doc_selfattn_fc, doc_selfattn_fm, doc_selfattn_fs = self.cast_attention(doc_selfattn, doc_emb, self.nn_fc, name="doc_self_pooling")

            query_cast = tf.concat(
                [query_max_fc, query_max_fm, query_max_fs, query_mean_fc, query_mean_fm, query_mean_fs, query_align_fcm,
                 query_align_fm, query_align_fs, query_selfattn_fc, query_selfattn_fm, query_selfattn_fs, query_output],
                axis=2)
            doc_cast = tf.concat(
                [doc_max_fc, doc_max_fm, doc_max_fs, doc_mean_fc, doc_mean_fm, doc_mean_fs, doc_align_fcm,
                 doc_align_fm, doc_align_fs, doc_selfattn_fc, doc_selfattn_fm, doc_selfattn_fs, doc_output], axis=2)

            # query_cast = tf.concat(
            #     [
            #      query_output],
            #     axis=2)
            # doc_cast = tf.concat(
            #     [doc_output], axis=2)

            query_cast = tf.layers.dropout(query_cast, self._dropout_rate, training=is_training)
            doc_cast = tf.layers.dropout(doc_cast, self._dropout_rate, training=is_training)

            query_hidden, _ = nn.bi_lstm(query_cast, self._hidden_dim, name="query_lstm")
            doc_hidden, _ = nn.bi_lstm(doc_cast, self._hidden_dim, name="doc_lstm")

            query_hidden = tf.concat(query_hidden, axis=2)
            doc_hidden = tf.concat(doc_hidden, axis=2)
            query_hidden = tf.layers.dropout(query_hidden, self._dropout_rate, training=is_training)
            doc_hidden = tf.layers.dropout(doc_hidden, self._dropout_rate, training=is_training)

            #query_hidden_max = query_hidden + (1. - query_mask) * tf.float32.min
            #doc_hidden_max = doc_hidden + (1. - doc_mask) * tf.float32.min
            query_hidden_mean = query_hidden * query_mask
            doc_hidden_mean = doc_hidden * doc_mask

            query_sum = tf.reduce_sum(query_hidden_mean, axis=1)
            query_mean = tf.div(query_sum, tf.expand_dims(tf.cast(query_len, tf.float32), -1) + self._eps)

            query_max = tf.reduce_max(query_hidden_mean, axis=1)
            query_final = tf.concat([query_mean, query_max], axis=1)

            doc_sum = tf.reduce_sum(doc_hidden_mean, axis=1)
            doc_mean = tf.div(doc_sum, tf.expand_dims(tf.cast(doc_len, tf.float32), -1) + self._eps)

            doc_max = tf.reduce_max(doc_hidden_mean, axis=1)
            doc_final = tf.concat([doc_mean, doc_max], axis=1)

            final = tf.concat([query_final, doc_final, query_final * doc_final, query_final - doc_final], axis=1)
            #yout = nn.highway_network(final, 2, dropout_rate=self._drop_rate, is_trainging=is_training)
            # MLP layer
            yout = tf.contrib.layers.fully_connected(final, self._hidden_dim, scope='fc1')
            # Dropout applied to classifier

            output_dict = self._make_output(yout, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = []
                # debug_ops = [query_mean_fs]#[query_maxpooling, query_max_fc] [query_max_fm, query_max_fs],[query_mean_fc, query_mean_fm] , ,
                # for op in debug_ops:
                #     output_dict['debugs'].append(tf.shape(op))
                # output_dict['debugs'].append(query_length)
            return output_dict
Пример #23
0
    def forward(self, features, labels, mode, params):
        outputs = dict()
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        for (feature_key, feature) in features.items():
            if '/' not in feature_key:
                continue
            feature_namespace = feature_key.split("/")[1].strip()
            if feature_namespace == self._vocab_namespace:
                with tf.variable_scope("embedding/" + self._vocab_namespace,
                                       reuse=tf.AUTO_REUSE):
                    if self._weight is None:
                        if not self._trainable:
                            logger.warning(
                                "No pretrained embedding is assigned. The embedding should be trainable."
                            )
                        logger.debug("loading random embedding.")
                        if self._padding_zero:
                            word_embeddings = tf.get_variable(
                                "embedding_weight",
                                shape=(self._num_embeddings - 1,
                                       self._embedding_dim),
                                initializer=initializers.xavier_initializer(),
                                trainable=self._trainable)
                            pad_embeddings = tf.constant(np.zeros(
                                [1, self._embedding_dim]),
                                                         dtype=tf.float32)
                            self._embeddings = tf.concat(
                                [pad_embeddings, word_embeddings], axis=0)
                        else:
                            self._embeddings = tf.get_variable(
                                "embedding_weight",
                                shape=(self._num_embeddings,
                                       self._embedding_dim),
                                initializer=initializers.xavier_initializer(),
                                trainable=self._trainable)
                    else:
                        if self._weight.shape != (self._num_embeddings,
                                                  self._embedding_dim):
                            raise ConfigureError(
                                "The parameter of embedding with shape (%s, %s), "
                                "but the pretrained embedding with shape %s." %
                                (self._num_embeddings, self._embedding_dim,
                                 self._weight.shape))
                        logger.debug(
                            "loading pretrained embedding with trainable %s." %
                            self._trainable)
                        if self._padding_zero:
                            word_embeddings = tf.get_variable(
                                "embedding_weight",
                                initializer=self._weight[1:, :],
                                trainable=self._trainable)
                            pad_embeddings = tf.constant(np.zeros(
                                [1, self._embedding_dim]),
                                                         dtype=tf.float32)
                            self._embeddings = tf.concat(
                                [pad_embeddings, word_embeddings], axis=0)
                        else:
                            self._embeddings = tf.get_variable(
                                "embedding_weight",
                                initializer=self._weight,
                                trainable=self._trainable)
                            # tf.Variable(self._weight, trainable=self._trainable, name='embedding_weight')
                    emb = tf.nn.embedding_lookup(self._embeddings, feature)

                    dropout_rate = params.get('dropout_rate')
                    if dropout_rate is None:
                        dropout_rate = self._dropout_rate
                    emb_drop = tf.layers.dropout(emb,
                                                 dropout_rate,
                                                 training=is_training)
                    if self._projection_dim:
                        emb_drop = tf.layers.dense(
                            emb_drop,
                            self._projection_dim,
                            use_bias=False,
                            kernel_initializer=initializers.xavier_initializer(
                            ))
                    outputs[feature_key] = emb_drop
        return outputs
Пример #24
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            premise_tokens = features_embedding.get('premise/tokens', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)

            hidden_size = premise_tokens.shape[-1].value

            with tf.variable_scope("pooler"):
                # We "pool" the model by simply taking the hidden state corresponding
                # to the first token. We assume that this has been pre-trained
                premise_first_token_tensor = tf.squeeze(premise_tokens[:,
                                                                       0:1, :],
                                                        axis=1)
                hypothesis_first_token_tensor = tf.squeeze(
                    hypothesis_tokens[:, 0:1, :], axis=1)

                dense_input = tf.concat([
                    premise_first_token_tensor, hypothesis_first_token_tensor,
                    premise_first_token_tensor - hypothesis_first_token_tensor,
                    premise_first_token_tensor * hypothesis_first_token_tensor
                ],
                                        axis=-1)
                output_layer = tf.layers.dense(
                    dense_input,
                    hidden_size,
                    activation=tf.tanh,
                    kernel_initializer=create_initializer(
                        self._initializer_range))

            if is_training:
                # I.e., 0.1 dropout
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

            output_dict = self._make_output(output_layer, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                # metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)

                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          v_1_ave, v_2_ave, h_mlp, logits]
            return output_dict
Пример #25
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2

            #prem_mask = tf.expand_dims(prem_mask, -1)
            prem_mask = tf.cast(prem_mask, tf.bool)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)

            with tf.variable_scope('san_fb1'):
                x_fw1 = query_encode_san(premise_tokens, prem_mask,
                                         'forward')  # bs, ql, vec
                x_bw1 = query_encode_san(premise_tokens, prem_mask,
                                         'backward')  # bs, ql, vec
                x_fusion = fusion_gate(premise_tokens, prem_mask, x_fw1,
                                       x_bw1)  # bs, ql, vec
            with tf.variable_scope('san_md'):
                x_code = query_encode_md(x_fusion, prem_mask)  # bs, vec

                pre_logits = tf.nn.relu(
                    linear(x_code,
                           self._hidden_dim,
                           True,
                           scope='pre_logits_linear',
                           is_train=True))  # bs, vec
                logits = linear(pre_logits,
                                self._num_classes,
                                False,
                                scope='get_output',
                                is_train=True)  # bs, cn

            output_dict = self._make_output(logits, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Пример #26
0
 def get_vocab_index_to_token(self, namespace='tokens'):
     if namespace not in self._token_to_index: raise ConfigureError("namespace %s not in vocabulary."%namespace)
     return self._index_to_token[namespace]
Пример #27
0
    def __init__(self,
                 data_reader=None,
                 train_input_fn=None,
                 valid_input_fn=None,
                 test_input_fn=None,
                 serving_feature_spec=None,
                 model=None,
                 hparams=HParams(),
                 run_config: RunConfig = RunConfig()):
        if data_reader is not None and train_input_fn is None:
            self._train_input_fn, self._valid_input_fn, self._test_input_fn = self.make_input_fns(
                data_reader)
            self._serving_feature_spec = data_reader.get_raw_serving_input_receiver_features(
                DataSplit.EVAL)
        else:
            self._train_input_fn = train_input_fn
            self._valid_input_fn = valid_input_fn
            self._test_input_fn = test_input_fn
            self._serving_feature_spec = serving_feature_spec
        if self._train_input_fn is None:
            raise ConfigureError("The train dataset is not provided.")

        if data_reader:
            hparams.add_hparam("num_retrieval",
                               data_reader.get_num_retrieval())

        if model is None:
            raise ConfigureError("Please provide model for training.")
        self._model_fn = model.make_estimator_model_fn()

        if hparams.per_process_gpu_memory_fraction is not None and 0 < hparams.per_process_gpu_memory_fraction <= 1:
            session_config = tf.ConfigProto(log_device_placement=True,
                                            allow_soft_placement=True)
            session_config.gpu_options.per_process_gpu_memory_fraction = hparams.per_process_gpu_memory_fraction
            run_config = run_config.replace(session_config=session_config)

        self._estimator = tf.estimator.Estimator(
            model_fn=self._model_fn,
            config=run_config,
            params=hparams,
            warm_start_from=model.get_warm_start_setting())

        train_hooks = []
        if tf_version[1] >= 10 and tf_version[1] <= 13:
            early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook(
                self._estimator,
                metric_name='loss',
                max_steps_without_decrease=hparams.
                early_stopping_max_steps_without_decrease,
                min_steps=hparams.early_stopping_min_steps)
            train_hooks.append(early_stopping)

        exporters = None
        if self._serving_feature_spec:
            serving_input_receiver_fn = (
                tf.estimator.export.build_raw_serving_input_receiver_fn(
                    self._serving_feature_spec))
            exporters = []
            if tf_version[1] >= 9:
                best_exporter = tf.estimator.BestExporter(
                    name="best_exporter",
                    serving_input_receiver_fn=serving_input_receiver_fn,
                    exports_to_keep=5)
                exporters.append(best_exporter)
            latest_export = tf.estimator.LatestExporter(
                name='latest_exporter',
                serving_input_receiver_fn=serving_input_receiver_fn,
                exports_to_keep=5)
            exporters.append(latest_export)

        self._train_spec = tf.estimator.TrainSpec(
            input_fn=self._train_input_fn,
            max_steps=hparams.train_steps,
            hooks=train_hooks)
        if self._valid_input_fn:
            self._valid_spec = tf.estimator.EvalSpec(
                input_fn=self._valid_input_fn,
                steps=hparams.eval_steps,
                exporters=exporters,
                throttle_secs=hparams.throttle_secs)
        #self._estimator.evaluate(self._valid_input_fn, steps=hparams.eval_steps, name=DataSplit.TEST)
        tf.estimator.train_and_evaluate(self._estimator, self._train_spec,
                                        self._valid_spec)
Пример #28
0
    def forward(self, features, labels, mode, params):
        global_step = tf.train.get_or_create_global_step()
        dropout_keep_rate = tf.train.exponential_decay(self._keep_prob, global_step,
                                                       self._dropout_decay_step, self._dropout_decay_rate,
                                                       staircase=False, name='dropout_keep_rate')
        tf.summary.scalar('dropout_keep_rate', dropout_keep_rate)

        params.add_hparam('dropout_rate', 1 - dropout_keep_rate)
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_ins = []
            hypothesis_ins = []

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            premise_ins.append(premise_tokens)
            hypothesis_ins.append(hypothesis_tokens)

            premise_chars = features_embedding.get('premise/chars', None)
            hypothesis_chars = features_embedding.get('hypothesis/chars', None)

            if premise_chars is not None and hypothesis_chars is not None:

                with tf.variable_scope("conv") as scope:
                    conv_pre = nn.multi_conv1d_max(premise_chars, self._char_filter_size, self._char_filter_channel_dims,
                                                   "VALID", is_training, dropout_keep_rate, scope='conv')
                    scope.reuse_variables()
                    conv_hyp = nn.multi_conv1d_max(hypothesis_chars, self._char_filter_size, self._char_filter_channel_dims,
                                                   "VALID", is_training, dropout_keep_rate, scope='conv')
                    #conv_pre = tf.reshape(conv_pre, [-1, self.sequence_length, config.char_out_size])
                    #conv_hyp = tf.reshape(conv_hyp, [-1, self.sequence_length, config.char_out_size])

                    premise_ins.append(conv_pre)
                    hypothesis_ins.append(conv_hyp)

            premise_pos = features_embedding.get('premise/pos_tags', None)
            hypothesis_pos = features_embedding.get('hypothesis/pos_tags', None)

            if premise_pos is not None and hypothesis_pos is not None:
                premise_ins.append(premise_pos)
                hypothesis_ins.append(hypothesis_pos)

            premise_exact_match = features.get('premise/exact_match_labels', None)
            hypothesis_exact_match = features.get('hypothesis/exact_match_labels', None)

            if premise_exact_match is not None and hypothesis_exact_match is not None:
                premise_ins.append(tf.expand_dims(tf.cast(premise_exact_match, tf.float32), -1))
                hypothesis_ins.append(tf.expand_dims(tf.cast(hypothesis_exact_match, tf.float32), -1))

            premise_in = tf.concat(premise_ins, axis=2)
            hypothesis_in = tf.concat(hypothesis_ins, axis=2)

            with tf.variable_scope("highway") as scope:
                premise_in = nn.highway_network(premise_in, self._highway_num_layers)
                scope.reuse_variables()
                hypothesis_in = nn.highway_network(hypothesis_in, self._highway_num_layers)

            with tf.variable_scope("prepro") as scope:
                pre = premise_in
                hyp = hypothesis_in
                for i in range(self._num_self_att_enc_layers):
                    with tf.variable_scope("attention_encoder_%s" % i, reuse=False):
                        pre_att = nn.self_attention(pre, prem_seq_lengths, func='tri_linear',
                                                    scope="premise_self_attention")
                        p = nn.fuse_gate(pre, pre_att, scope="premise_fuse_gate")
                        hyp_att = nn.self_attention(hyp, hyp_seq_lengths, func='tri_linear',
                                                    scope="hypothesis_self_attention")
                        h = nn.fuse_gate(hyp, hyp_att, scope="hypothesis_fuse_gate")

                        pre = p
                        hyp = h
                        nn.variable_summaries(p, "p_self_enc_summary_layer_{}".format(i))
                        nn.variable_summaries(h, "h_self_enc_summary_layer_{}".format(i))

            with tf.variable_scope("main") as scope:
                pre = p
                hyp = h

                with tf.variable_scope("interaction"):
                    pre_length = tf.shape(pre)[1]
                    hyp_length = tf.shape(hyp)[1]
                    pre_new = tf.tile(tf.expand_dims(pre, 2), [1, 1, hyp_length, 1])
                    hyp_new = tf.tile(tf.expand_dims(hyp, 1), [1, pre_length, 1, 1])
                    bi_att_mx = pre_new * hyp_new

                    # mask = tf.expand_dims(tf.sequence_mask(query_len, tf.shape(query)[1], dtype=tf.float32),
                    #                       axis=2) * \
                    #        tf.expand_dims(tf.sequence_mask(key_len, tf.shape(key)[1], dtype=tf.float32), axis=1)
                    bi_att_mx = tf.layers.dropout(bi_att_mx, 1-dropout_keep_rate, training=is_training)

                with tf.variable_scope("dense_net"):
                    dim = bi_att_mx.get_shape().as_list()[-1]
                    act = tf.nn.relu if self._first_scale_down_layer_relu else None
                    fm = tf.contrib.layers.convolution2d(bi_att_mx,
                                                         int(dim * self._dense_net_first_scale_down_ratio),
                                                         self._first_scale_down_kernel, padding="SAME",
                                                         activation_fn=act)

                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="first_dense_net_block")
                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='second_transition_layer')
                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="second_dense_net_block")
                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='third_transition_layer')
                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="third_dense_net_block")

                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='fourth_transition_layer')

                    shape_list = list(fm.get_shape())
                    #print(shape_list)
                    premise_final = tf.reshape(fm, [-1, shape_list[1] * shape_list[2] * shape_list[3]])

            output_dict = self._make_output(premise_final, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                #######l2 loss#################
                if self._l2_loss:
                    if self._sigmoid_growing_l2loss:
                        weights_added = tf.add_n([tf.nn.l2_loss(tensor) for tensor in tf.trainable_variables() if
                                                  tensor.name.endswith("weights:0") or tensor.name.endswith('kernel:0') or tensor.name.endswith('filter:0')])
                        full_l2_step = tf.constant(self._weight_l2loss_step_full_reg, dtype=tf.int32, shape=[],
                                                   name='full_l2reg_step')
                        full_l2_ratio = tf.constant(self._l2_regularization_ratio, dtype=tf.float32, shape=[],
                                                    name='l2_regularization_ratio')
                        gs_flt = tf.cast(global_step, tf.float32)
                        half_l2_step_flt = tf.cast(full_l2_step / 2, tf.float32)

                        # (self.global_step - full_l2_step / 2)
                        # tf.cast((self.global_step - full_l2_step / 2) * 8, tf.float32) / tf.cast(full_l2_step / 2 ,tf.float32)
                        # l2loss_ratio = tf.sigmoid( tf.cast((self.global_step - full_l2_step / 2) * 8, tf.float32) / tf.cast(full_l2_step / 2 ,tf.float32)) * full_l2_ratio
                        l2loss_ratio = tf.sigmoid(((gs_flt - half_l2_step_flt) * 8) / half_l2_step_flt) * full_l2_ratio
                        tf.summary.scalar('l2loss_ratio', l2loss_ratio)
                        l2loss = weights_added * l2loss_ratio
                    else:
                        l2loss = tf.add_n([tf.nn.l2_loss(tensor) for tensor in tf.trainable_variables() if
                                           tensor.name.endswith("weights:0") or tensor.name.endswith(
                                               'kernel:0')]) * tf.constant(self._l2_regularization_ratio,
                                                                           dtype='float', shape=[],
                                                                           name='l2_regularization_ratio')
                    tf.summary.scalar('l2loss', l2loss)
                ######diff loss###############################
                diffs = []
                for i in range(self._num_self_att_enc_layers):
                    for tensor in tf.trainable_variables():
                        #print(tensor.name)
                        if tensor.name == "diin/prepro/attention_encoder_{}/premise_self_attention/similar_mat/similar_func/arg/kernel:0".format(
                                i):
                            l_lg = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_self_attention/similar_mat/similar_func/arg/kernel:0".format(
                                i):
                            r_lg = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_1/kernel:0".format(i):
                            l_fg_lhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_1/kernel:0".format(
                                i):
                            r_fg_lhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_1/kernel:0".format(i):
                            l_fg_rhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_1/kernel:0".format(
                                i):
                            r_fg_rhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_2/kernel:0".format(i):
                            l_fg_lhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_2/kernel:0".format(
                                i):
                            r_fg_lhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_2/kernel:0".format(i):
                            l_fg_rhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_2/kernel:0".format(
                                i):
                            r_fg_rhs_2 = tensor

                        if tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_3/kernel:0".format(
                                i):
                            l_fg_lhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_3/kernel:0".format(
                                i):
                            r_fg_lhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_3/kernel:0".format(
                                i):
                            l_fg_rhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_3/kernel:0".format(
                                i):
                            r_fg_rhs_3 = tensor

                    diffs += [l_lg - r_lg, l_fg_lhs_1 - r_fg_lhs_1, l_fg_rhs_1 - r_fg_rhs_1, l_fg_lhs_2 - r_fg_lhs_2,
                              l_fg_rhs_2 - r_fg_rhs_2]
                    diffs += [l_fg_lhs_3 - r_fg_lhs_3, l_fg_rhs_3 - r_fg_rhs_3]
                diff_loss = tf.add_n([tf.nn.l2_loss(tensor) for tensor in diffs]) * tf.constant(
                    self._diff_penalty_loss_ratio, dtype='float', shape=[], name='diff_penalty_loss_ratio')
                tf.summary.scalar('diff_loss', diff_loss)
                ###############################
                output_dict['loss'] = loss + l2loss + diff_loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])

                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict
Пример #29
0
    def __init__(self,
                 data_reader=None,
                 eval_input_fn=None,
                 num_classes=None,
                 vocab=None,
                 export_dir=None,
                 output_file=None,
                 hparams=HParams()):
        if data_reader is not None and eval_input_fn is None:
            self._eval_input_fn = data_reader.make_estimator_input_fn(
                DataSplit.EVAL, force_repeat=False)
            vocab = data_reader.get_vocab()
        else:
            self._eval_input_fn = eval_input_fn
        if num_classes is None:
            num_classes = vocab.get_vocab_size(namespace='labels')
        task = hparams.get('task', 'classification')
        task_type = hparams.get('task_type', 'multiclass')

        labels = list(range(num_classes))

        dataset = self._eval_input_fn()
        iterator = dataset.make_initializable_iterator()
        dataset.make_initializable_iterator()
        next_element = iterator.get_next()

        self.saved_model_loader = loader_impl.SavedModelLoader(export_dir)

        mode = DataSplit.PREDICT
        signature_def = get_signature_def_for_mode(self.saved_model_loader,
                                                   mode)

        input_map = generate_input_map(signature_def, next_element)
        output_tensor_names = [
            value.name for value in signature_def.outputs.values()
        ]
        try:
            tags = model_fn.EXPORT_TAG_MAP[mode]
        except AttributeError as e:
            tags = ['serve']
        saver, output_tensors = self.saved_model_loader.load_graph(
            tf.get_default_graph(),
            tags,
            input_map=input_map,
            return_elements=output_tensor_names)
        output_map = dict(zip(output_tensor_names, output_tensors))
        outputs = {
            key: output_map[value.name]
            for (key, value) in signature_def.outputs.items()
        }

        # predict_fn = tf.contrib.predictor.from_saved_model(export_dir)

        #####xlsx wirte######
        tsv_file = open(output_file, 'w')
        # wb = Workbook(write_only=True)
        # ws = wb.create_sheet('examples')
        # ws.append(['question', 'answer', 'true_label', 'predict', 'score'])

        y_true = []
        y_pred = []
        total_num = 0
        # accuracy = 0
        # confusion_matrix = [[0 for j in range(num_classes)] for i in range(num_classes)]
        if hparams.per_process_gpu_memory_fraction is not None and 0 < hparams.per_process_gpu_memory_fraction <= 1:
            session_config = tf.ConfigProto(log_device_placement=True,
                                            allow_soft_placement=True)
            session_config.gpu_options.per_process_gpu_memory_fraction = hparams.per_process_gpu_memory_fraction
        else:
            session_config = tf.ConfigProto()

        with tf.Session(config=session_config) as sess:
            self.saved_model_loader.restore_variables(sess, saver)
            self.saved_model_loader.run_init_ops(sess, tags)
            sess.run(iterator.initializer)
            while True:
                try:
                    outputs['inputs'] = next_element
                    output_vals = sess.run(outputs)

                    data_batch = output_vals['inputs']
                    if 'premise/tokens' in data_batch.keys(
                    ) and 'hypothesis/tokens' in data_batch.keys():
                        premise_tokens_val, hypothesis_tokens_val, true_label_val = \
                            data_batch['premise/tokens'], data_batch['hypothesis/tokens'], data_batch['label/labels']
                    else:
                        true_label_val = data_batch['label/labels']
                        premise_tokens_val = [
                            [] for i in range(len(true_label_val))
                        ]
                        hypothesis_tokens_val = [
                            [] for i in range(len(true_label_val))
                        ]
                    # probs = output_vals['output_score']
                    probs = output_vals['output']
                    num_batch = probs.shape[0]
                    total_num += num_batch
                    print("processing %s/%s" % (num_batch, total_num))
                    #######################
                    # print(probs)
                    if task_type == 'multiclass':
                        predictions_val = np.argmax(probs, axis=1)
                    elif task_type == 'multilabel':
                        threshold = hparams.get('threshold', 0.5)
                        predictions_val = (probs > threshold).astype(
                            dtype=np.int32)
                    elif task_type == 'topk':
                        predictions_val = (probs > 0).astype(dtype=np.int32)
                    else:
                        raise ConfigureError(
                            "Task type %s is not support for task %s. "
                            "Only multiclass and multilabel is support for task %s"
                            % (task_type, task, task))
                    # predictions = (probs > 0.5).astype(np.int32)
                    # print(predictions)
                    y_true.append(true_label_val)
                    y_pred.append(predictions_val)
                    # print(predictions)
                    # for i in range(probs.shape[0]):
                    #     predictions = (probs > 0.5).astype(np.int32)
                    #     predict = predictions[i]
                    #     label = true_label_val[i]
                    #     if predict == label:
                    #         accuracy += 1
                    #     confusion_matrix[label][predict] += 1
                    ################
                    for i in range(num_batch):
                        premise_str = vocab.convert_indexes_to_tokens(
                            premise_tokens_val[i], 'tokens')
                        premise_str = " ".join(premise_str)
                        hypothesis_str = vocab.convert_indexes_to_tokens(
                            hypothesis_tokens_val[i], 'tokens')
                        hypothesis_str = " ".join(hypothesis_str)

                        if task_type == 'multilabel' or task_type == 'topk':
                            predictions = [[] for i in range(num_batch)]
                            for (row,
                                 col) in np.argwhere(predictions_val == 1):
                                predictions[row].append(col)
                            true_labels = [[] for i in range(num_batch)]
                            for row, col in np.argwhere(true_label_val == 1):
                                true_labels[row].append(col)
                        else:
                            predictions = predictions_val
                            true_labels = true_label_val

                        true_label = true_labels[i]
                        predict = predictions[i]
                        prob = probs[i]
                        if task_type == 'multiclass':
                            tsv_str = "\t".join([
                                premise_str, hypothesis_str,
                                vocab.get_index_token(true_label,
                                                      namespace='labels'),
                                vocab.get_index_token(predict,
                                                      namespace='labels'),
                                str(prob)
                            ])
                        elif task_type == 'multilabel' or task_type == 'topk':
                            tsv_str = "\t".join([
                                premise_str, hypothesis_str, " ".join([
                                    vocab.get_index_token(l,
                                                          namespace='labels')
                                    for l in true_label
                                ]), " ".join([
                                    vocab.get_index_token(p,
                                                          namespace='labels')
                                    for p in predict
                                ]),
                                str(prob)
                            ])
                        else:
                            raise ConfigureError(
                                "Task type %s is not support for task %s. "
                                "Only multiclass and multilabel is support for task %s"
                                % (task_type, task, task))

                        # tsv_str = "\t".join([premise_str, hypothesis_str, str(true_label), str(predict), str(prob),
                        #            json.dumps(output_vals['query_embedding'][i].tolist()), json.dumps(output_vals['title_embedding'][i].tolist()),
                        #            json.dumps(output_vals['query_lstm_1'][i].tolist()), json.dumps(output_vals['title_lstm_1'][i].tolist()),
                        #            json.dumps(output_vals['query_attention'][i].tolist()), json.dumps(output_vals['title_attention'][i].tolist()),
                        #            json.dumps(output_vals['query_lstm_2'][i].tolist()), json.dumps(output_vals['title_lstm_2'][i].tolist()),
                        #            json.dumps(output_vals['fc1'][i].tolist()), json.dumps(output_vals['fc2'][i].tolist())
                        #                      ])
                        tsv_file.write(tsv_str + "\n")

                    # print("process %s/%s correct/total instances with accuracy %s." % (accuracy, total_num, accuracy/float(total_num)))
                except tf.errors.OutOfRangeError as e:
                    logger.info("processed all the evalutation data")
                    break

            # logger.warning(e)
            y_true = np.concatenate(y_true, axis=0)
            y_pred = np.concatenate(y_pred, axis=0)
            avg_param = 'micro'
            if num_classes == 2:
                avg_param = 'binary'
            accuracy = metrics.accuracy_score(y_true,
                                              y_pred)  # accuracy/total_num
            precise, recall, f1score, support = metrics.precision_recall_fscore_support(
                y_true, y_pred, labels=labels, average=avg_param)
            if task_type == 'multiclass':
                confusion_matrix = metrics.confusion_matrix(y_true,
                                                            y_pred,
                                                            labels=labels)
                print("metrics:")
                confmx_str = "label \ predict "
                for i in range(num_classes):
                    confmx_str += "| %s | " % vocab.get_index_token(
                        i, namespace='labels')
                confmx_str += "\n"
                for i in range(num_classes):
                    confmx_str += "| %s | " % vocab.get_index_token(
                        i, namespace='labels')
                    for j in range(num_classes):
                        confmx_str += "| %s | " % confusion_matrix[i][j]
                    confmx_str += "\n"

                print(confmx_str)

            elif task_type == 'multilabel' or task_type == 'topk':
                confusion_matrix = metrics.multilabel_confusion_matrix(
                    y_true, y_pred)
                print("metrics:")
                for k in range(num_classes):
                    print("confusion matrix for label %s" %
                          vocab.get_index_token(k, namespace='labels'))
                    confmx_str = "label \ predict "
                    for i in range(2):
                        confmx_str += "| %s | " % i
                    confmx_str += "\n"
                    for i in range(2):
                        confmx_str += "| %s | " % i
                        for j in range(2):
                            confmx_str += "| %s | " % confusion_matrix[k][i][j]
                        confmx_str += "\n"

                    print(confmx_str)

            else:
                raise ConfigureError(
                    "Task type %s is not support for task %s. "
                    "Only multiclass and multilabel is support for task %s" %
                    (task_type, task, task))
            # confusion_matrix[1][1]/(confusion_matrix[0][1]+confusion_matrix[1][1])
            # recall = confusion_matrix[1][1]/(confusion_matrix[1][0]+confusion_matrix[1][1])
            # f1score = (precise+recall)/2
            print("micro total accuracy precise recall f1-score")
            print(
                "accuracy: %.2f, precise: %.2f, recall: %.2f, f1-score: %.2f" %
                (accuracy, precise, recall, f1score))

            precisions, recalls, fbeta_scores, supports = metrics.precision_recall_fscore_support(
                y_true, y_pred, labels=labels)
            print("accuracy precise recall f1-score for each class")
            print(
                '======================================================================================'
            )
            for lab_idx, (precision, recall, fbeta_score,
                          support) in enumerate(
                              zip(precisions, recalls, fbeta_scores,
                                  supports)):
                print(
                    "label:%s\tprecision:%.2f\trecall:%.2f\tf1-score:%.2f\tsupport:%.2f"
                    % (vocab.get_index_token(lab_idx, namespace='labels'),
                       precision, recall, fbeta_score, support))
            # legend = ["label \ predict "]
            # for i in range(num_classes):
            #     legend.append(str(i))
            # ws.append(legend)
            # for i in range(num_classes):
            #     row = [str(i)]
            #     for j in range(num_classes):
            #         row.append(str(confusion_matrix[i][j]))
            #     ws.append(row)
            # ws.append([])
            # ws.append([])
            # ws.append(['accuracy', 'precise', 'recall', 'f1-score'])
            # ws.append([str(accuracy), str(precise), str(recall), str(f1score)])
            # if output_file:
            #     if not output_file.endswith(".xlsx"):
            #         output_file += '.xlsx'
            #     wb.save(output_file)
            tsv_file.close()
Пример #30
0
    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            lm_xor = keras.layers.Lambda(self._xor_match)([premise_tokens_ids, hypothesis_tokens_ids])
            lm_conv = keras.layers.Conv1D(
                self._lm_filters,
                premise_tokens_ids.shape[1].value,
                padding='valid',
                activation=self._activation_func
            )(lm_xor)

            lm_conv = keras.layers.Dropout(self._dropout_rate)(
                lm_conv, training=is_training)
            lm_feat = keras.layers.Reshape((lm_conv.shape[2].value, ))(lm_conv)
            for hidden_size in self._lm_hidden_sizes:
                lm_feat = keras.layers.Dense(
                    hidden_size,
                    activation=self._activation_func
                )(lm_feat)
            lm_drop = keras.layers.Dropout(self._dropout_rate)(
                lm_feat, training=is_training)
            lm_score = keras.layers.Dense(1)(lm_drop)

            dm_q_conv = keras.layers.Conv1D(
                self._dm_filters,
                self._dm_kernel_size,
                padding='same',
                activation=self._activation_func
            )(premise_tokens)
            dm_q_conv = keras.layers.Dropout(self._dropout_rate)(
                dm_q_conv, training=is_training)
            dm_q_mp = keras.layers.MaxPooling1D(
                pool_size=premise_tokens_ids.shape[1].value)(dm_q_conv)
            dm_q_rep = keras.layers.Reshape((dm_q_mp.shape[2].value, ))(dm_q_mp)
            dm_q_rep = keras.layers.Dense(self._dm_q_hidden_size)(
                dm_q_rep)
            dm_q_rep = keras.layers.Lambda(lambda x: tf.expand_dims(x, 1))(
                dm_q_rep)

            dm_d_conv1 = keras.layers.Conv1D(
                self._dm_filters,
                self._dm_kernel_size,
                padding='same',
                activation=self._activation_func
            )(hypothesis_tokens)
            dm_d_conv1 = keras.layers.Dropout(self._dropout_rate)(
                dm_d_conv1, training=is_training)
            dm_d_mp = keras.layers.MaxPooling1D(
                pool_size=self._dm_d_mpool)(dm_d_conv1)
            dm_d_conv2 = keras.layers.Conv1D(
                self._dm_filters, 1,
                padding='same',
                activation=self._activation_func
            )(dm_d_mp)
            dm_d_conv2 = keras.layers.Dropout(self._dropout_rate)(
                dm_d_conv2, training=is_training)

            h_dot = dm_q_rep * dm_d_conv2 #keras.layers.Lambda(self._hadamard_dot)([dm_q_rep, dm_d_conv2])
            dm_feat = keras.layers.Reshape((h_dot.shape[1].value*h_dot.shape[2].value, ))(h_dot)
            for hidden_size in self._dm_hidden_sizes:
                dm_feat = keras.layers.Dense(hidden_size)(dm_feat)
            dm_feat_drop = keras.layers.Dropout(self._dropout_rate)(
                dm_feat, training=is_training)
            dm_score = keras.layers.Dense(1)(dm_feat_drop)

            add = keras.layers.Add()([lm_score, dm_score])

            # Get prediction
            output_dict = self._make_output(add, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])
                # metrics['map'] = tf.metrics.average_precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=2)
                # metrics['precision_1'] = tf.metrics.precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=1, class_id=1)

                    #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict