示例#1
0
        def model_fn(features, labels, mode, params):
            # For serving, features are a bit different
            #if isinstance(features, dict):
            #    features = features['words'], features['nwords']

            # Read vocabs and inputs
            dropout = args.dropout
            #input_ids = features["input_ids"]
            #mask = features["mask"]
            #segment_ids = features["segment_ids"]
            #label_ids = features["label_ids"]
            ##words, nwords = features
            #tf.print(' '.join(words[4]), output_stream=sys.stderr)
            training = (mode == tf.estimator.ModeKeys.TRAIN)
            #vocab_words = tf.contrib.lookup.index_table_from_file(
            #    #args.vocab_words)
            #    args.vocab_words, num_oov_buckets=args.num_oov_buckets)
            #with Path(args.vocab_tags).open() as f:
            #    indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
            #    num_tags = len(indices) + 1

            ##word_ids = vocab_words.lookup(words)
            import ipdb
            ipdb.set_trace()

            input_ids = features["input_ids"]
            mask = features["mask"]
            label_ids = None
            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                label_ids = features["label_ids"]
            if args.embedding == 'plain':
                embeddings = tf.cast(input_ids, tf.float32)
            elif args.embedding == 'word2id':
                # word2id
                with Path(args.vocab_words).open(encoding='utf-8') as f:
                    vocab_words_1 = f.readlines()
                    vocab_length = len(vocab_words_1)
                #input_ids = features["input_ids"]
                #label_ids = features["label_ids"]
                #mask = features["mask"]
                embeddings = embedding(input_ids, vocab_length, args)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)
                pass

            elif args.embedding == 'bert':
                from my_model.embeddings.embedding import get_bert_embedding
                #input_ids = features["input_ids"]
                #mask = features["mask"]
                segment_ids = features["segment_ids"]
                #label_ids = features["label_ids"]
                embeddings = get_bert_embedding(args.bert_config_file,
                                                training,
                                                input_ids,
                                                mask,
                                                segment_ids,
                                                use_one_hot_embeddings=False)

            else:
                # Word Embeddings
                # deafult
                #input_ids = features["input_ids"]
                #label_ids = features["label_ids"]
                #mask = features["mask"]
                glove = np.load(args.glove)['embeddings']  # np.array
                variable = np.vstack([glove, [[0.] * args.dim]])
                variable = tf.Variable(variable,
                                       dtype=tf.float32,
                                       trainable=False)
                embeddings = tf.nn.embedding_lookup(variable, input_ids)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)
                pass

            (total_loss, logits,
             predicts) = create_model(embeddings,
                                      label_ids,
                                      mask,
                                      mode,
                                      training,
                                      self.num_labels,
                                      use_one_hot_embeddings=False)
            tvars = tf.trainable_variables()
            initialized_variable_names = None
            scaffold_fn = None
            if args.init_checkpoint:
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, args.init_checkpoint)
                tf.train.init_from_checkpoint(args.init_checkpoint,
                                              assignment_map)
                self.logging.debug("**** Trainable Variables ****")
                for var in tvars:
                    init_string = ""
                    if var.name in initialized_variable_names:
                        init_string = ", *INIT_FROM_CKPT*"
                    self.logging.debug("  name = %s, shape = %s%s", var.name,
                                       var.shape, init_string)
            if mode == tf.estimator.ModeKeys.TRAIN:
                warmup_steps = args.warmup_steps
                step = tf.to_float(tf.train.get_global_step())
                if args.learning_rate_decay == 'sqrt':
                    lr_warmup = args.learning_rate_peak * tf.minimum(
                        1.0, step / warmup_steps)
                    lr_decay = args.learning_rate_peak * tf.minimum(
                        1.0, tf.sqrt(warmup_steps / step))
                    lr = tf.where(step < warmup_steps, lr_warmup, lr_decay)
                elif args.learning_rate_decay == 'exp':
                    lr = tf.train.exponential_decay(
                        args.learning_rate_peak,
                        global_step=step,
                        decay_steps=args.decay_steps,
                        decay_rate=args.decay_rate)
                elif args.learning_rate_decay == 'bert':
                    num_train_steps = int(self.len_train_examples /
                                          args.batch_size * args.epochs)
                    #num_warmup_steps = int(num_train_steps * args.warmup_steps)
                    num_warmup_steps = int(num_train_steps * 0.1)
                    train_op = optimization.create_optimizer(
                        total_loss,
                        args.learning_rate,
                        num_train_steps,
                        num_warmup_steps,
                        use_tpu=False)
                    output_spec = tf.estimator.EstimatorSpec(
                        mode=mode,
                        loss=total_loss,
                        train_op=train_op,
                        #scaffold_fn=scaffold_fn
                    )
                    return output_spec
                else:
                    self.logging.info(
                        'learning rate decay strategy not supported')
                    sys.exit()
                tf.print(lr)
                train_op = tf.train.AdamOptimizer(lr).minimize(
                    total_loss,
                    global_step=tf.train.get_or_create_global_step())
                #return tf.estimator.EstimatorSpec(
                #    mode, loss=loss, train_op=train_op)

                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    #scaffold_fn=scaffold_fn
                )

            elif mode == tf.estimator.ModeKeys.EVAL:
                #def metric_fn(label_ids, logits,num_labels,mask):
                #    predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32)
                #    cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask)
                #    return {
                #        "confusion_matrix":cm
                #    }
                #    #
                #eval_metrics = (metric_fn, [label_ids, logits, self.num_labels, mask])
                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                # Metrics
                #weights = tf.sequence_mask(nwords)
                weights = mask
                #mask2len = tf.reduce_sum(mask,axis=1)
                #weights = tf.sequence_mask(mask2len)
                #pred_ids= tf.math.argmax(logits, axis=-1, output_type=tf.int32)
                pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32)
                num_label_ids = self.num_labels
                metrics = {
                    'mse': total_loss
                    #'precision': tf.metrics.precision(label_ids, pred_ids, weights),
                    #'recall': tf.metrics.recall(label_ids, pred_ids, weights),
                    ##'f1': f1(label_ids, pred_ids, weights),
                    #'precision': precision(label_ids, pred_ids, self.num_labels),
                    #'recall': recall(label_ids, pred_ids, self.num_labels),
                    #'f1': f1(label_ids, pred_ids, self.num_labels),
                }
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    #eval_metric_ops=metrics
                    #scaffold_fn=scaffold_fn
                )
            else:
                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predicts,
                    #scaffold_fn=scaffold_fn
                )
            return output_spec
示例#2
0
        def model_fn(features, labels, mode, params):
            # For serving, features are a bit different
            if isinstance(features, dict):
                features = features['words'], features['nwords']

            # Read vocabs and inputs
            dropout = args.dropout
            words, nwords = features
            #tf.print(' '.join(words[4]), output_stream=sys.stderr)
            training = (mode == tf.estimator.ModeKeys.TRAIN)
            vocab_words = tf.contrib.lookup.index_table_from_file(
                #args.vocab_words)
                args.vocab_words,
                num_oov_buckets=args.num_oov_buckets)
            with Path(args.vocab_tags).open() as f:
                indices = [
                    idx for idx, tag in enumerate(f) if tag.strip() != 'O'
                ]
                num_tags = len(indices) + 1

            word_ids = vocab_words.lookup(words)
            if args.embedding == 'word2id':
                # word2id
                with Path(args.vocab_words).open(encoding='utf-8') as f:
                    vocab_words_1 = f.readlines()
                    vocab_length = len(vocab_words_1)
                embeddings = embedding(word_ids, vocab_length, args)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)

            else:
                # Word Embeddings
                # deafult
                glove = np.load(args.glove)['embeddings']  # np.array
                variable = np.vstack([glove, [[0.] * args.dim]])
                variable = tf.Variable(variable,
                                       dtype=tf.float32,
                                       trainable=False)
                embeddings = tf.nn.embedding_lookup(variable, word_ids)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)
            # LSTM
            t = tf.transpose(embeddings, perm=[1, 0, 2])
            lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(args.lstm_size)
            lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(args.lstm_size)
            lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
            output_fw, _ = lstm_cell_fw(t,
                                        dtype=tf.float32,
                                        sequence_length=nwords)
            output_bw, _ = lstm_cell_bw(t,
                                        dtype=tf.float32,
                                        sequence_length=nwords)
            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.transpose(output, perm=[1, 0, 2])
            output = tf.layers.dropout(output, rate=dropout, training=training)

            # CRF
            logits = tf.layers.dense(output, num_tags)
            crf_params = tf.get_variable("crf", [num_tags, num_tags],
                                         dtype=tf.float32)
            pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

            if mode == tf.estimator.ModeKeys.PREDICT:
                # Predictions
                reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
                    args.vocab_tags)
                pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
                predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
                return tf.estimator.EstimatorSpec(mode,
                                                  predictions=predictions)
            else:
                # Loss
                vocab_tags = tf.contrib.lookup.index_table_from_file(
                    args.vocab_tags)
                tags = vocab_tags.lookup(labels)
                log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                    logits, tags, nwords, crf_params)
                loss = tf.reduce_mean(-log_likelihood)

                # Metrics
                weights = tf.sequence_mask(nwords)
                metrics = {
                    'acc':
                    tf.metrics.accuracy(tags, pred_ids, weights),
                    'precision':
                    precision(tags, pred_ids, num_tags, indices, weights),
                    'recall':
                    recall(tags, pred_ids, num_tags, indices, weights),
                    'f1':
                    f1(tags, pred_ids, num_tags, indices, weights),
                }
                for metric_name, op in metrics.items():
                    tf.summary.scalar(metric_name, op[1])

                if mode == tf.estimator.ModeKeys.EVAL:
                    return tf.estimator.EstimatorSpec(mode,
                                                      loss=loss,
                                                      eval_metric_ops=metrics)

                elif mode == tf.estimator.ModeKeys.TRAIN:
                    warmup_steps = args.warmup_steps
                    step = tf.to_float(tf.train.get_global_step())
                    if args.learning_rate_decay == 'sqrt':
                        lr_warmup = args.learning_rate_peak * tf.minimum(
                            1.0, step / warmup_steps)
                        lr_decay = args.learning_rate_peak * tf.minimum(
                            1.0, tf.sqrt(warmup_steps / step))
                        lr = tf.where(step < warmup_steps, lr_warmup, lr_decay)
                    elif args.learning_rate_decay == 'exp':
                        lr = tf.train.exponential_decay(
                            args.learning_rate_peak,
                            global_step=step,
                            decay_steps=args.decay_steps,
                            decay_rate=args.decay_rate)
                    else:
                        self.logging.info(
                            'learning rate decay strategy not supported')
                        sys.exit()
                    tf.print(lr)
                    train_op = tf.train.AdamOptimizer(lr).minimize(
                        loss, global_step=tf.train.get_or_create_global_step())
                    return tf.estimator.EstimatorSpec(mode,
                                                      loss=loss,
                                                      train_op=train_op)