Exemplo n.º 1
0
def main(argv):
    del argv  # unused

    embeddings_path = FLAGS.embeddings_path

    preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)

    nltk.download("punkt")
    train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
    dataset = tfrecord_input.TFRecordInputWithTokenizer(
        train_preprocess_fn=train_preprocess_fn)

    # TODO: Move embedding *into* Keras model.
    model_tf = tf_gru_attention_multiclass.TFRNNModel(dataset.labels())
    model = preprocessor.add_embedding_to_model(model_tf,
                                                base_model.TOKENS_FEATURE_KEY)

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval()

    serving_input_fn = serving_input.create_serving_input_fn(
        word_to_idx=preprocessor._word_to_idx,
        unknown_token=preprocessor._unknown_token,
        text_feature_name=base_model.TOKENS_FEATURE_KEY,
        example_key_name=base_model.EXAMPLE_KEY)
    trainer.export(serving_input_fn, base_model.EXAMPLE_KEY)
Exemplo n.º 2
0
def main(argv):
    del argv  # unused

    embeddings_path = FLAGS.embeddings_path
    is_binary_embedding = FLAGS.is_binary_embedding
    text_feature_name = FLAGS.text_feature_name

    preprocessor = text_preprocessor.TextPreprocessor(embeddings_path,
                                                      is_binary_embedding)

    nltk.download("punkt")
    train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
    dataset = tfrecord_input.TFRecordInput(
        train_path=FLAGS.train_path,
        validate_path=FLAGS.validate_path,
        text_feature=text_feature_name,
        labels=LABELS,
        train_preprocess_fn=train_preprocess_fn,
        batch_size=FLAGS.batch_size)

    # TODO: Move embedding *into* Keras model.
    model = preprocessor.add_embedding_to_model(
        keras_cnn.KerasCNNModel(set(LABELS.keys())), text_feature_name)

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval(FLAGS.train_steps, FLAGS.eval_period,
                            FLAGS.eval_steps)
def main(argv):
    del argv  # unused

    dataset = tfrecord_input.TFRecordInput()
    model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())

    trainer = model_trainer.ModelTrainer(dataset,
                                         model,
                                         warm_start_from=FLAGS.warm_start_from)
    trainer.train_with_eval()

    keys = [("label", "probabilities")]
    predictions = list(trainer.predict_on_dev(predict_keys=keys))

    valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv")
    df = pd.read_csv(valid_path_csv)
    labels = df["label"].values
    community = os.path.basename(FLAGS.validate_path).split("..")[0]

    assert len(labels) == len(predictions), \
      "Labels and predictions must have the same length."

    d = {
        "label": labels,
        "prediction": [p[keys[0]][1] for p in predictions],
        "community": [community for p in predictions],
    }

    df = pd.DataFrame(data=d)
    df.to_csv(path_or_buf=FLAGS.tmp_results_path,
              mode='a+',
              index=False,
              header=False)
Exemplo n.º 4
0
def main(argv):
  del argv  # unused

  embeddings_path = FLAGS.embeddings_path
  text_feature_name = FLAGS.text_feature_name
  key_name = FLAGS.key_name

  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)

  nltk.download("punkt")
  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
  dataset = tfrecord_input.TFRecordInput(
      train_path=FLAGS.train_path,
      validate_path=FLAGS.validate_path,
      text_feature=text_feature_name,
      labels=LABELS,
      train_preprocess_fn=train_preprocess_fn,
      batch_size=FLAGS.batch_size)

  # TODO: Move embedding *into* Keras model.
  model_keras = keras_gru_attention.KerasRNNModel(
    set(LABELS.keys()),
    preprocessor._embedding_size)
  model = preprocessor.add_embedding_to_model(
      model_keras, text_feature_name)

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval(FLAGS.train_steps, FLAGS.eval_period, FLAGS.eval_steps)

  serving_input_fn = serving_input.create_serving_input_fn(
      word_to_idx=preprocessor._word_to_idx,
      unknown_token=preprocessor._unknown_token,
      text_feature_name=text_feature_name,
      key_name=key_name)
  trainer.export(serving_input_fn)
Exemplo n.º 5
0
def main(argv):
    del argv  # unused

    embeddings_path = FLAGS.embeddings_path
    is_binary_embedding = FLAGS.is_binary_embedding
    text_feature_name = FLAGS.text_feature_name
    key_name = FLAGS.key_name

    preprocessor = text_preprocessor.TextPreprocessor(embeddings_path,
                                                      is_binary_embedding)

    nltk.download("punkt")
    train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
    dataset = tfrecord_input.TFRecordInput(
        train_path=FLAGS.train_path,
        validate_path=FLAGS.validate_path,
        text_feature=text_feature_name,
        labels=LABELS,
        train_preprocess_fn=train_preprocess_fn,
        batch_size=FLAGS.batch_size,
        max_seq_len=5000)

    model_tf = tf_word_label_embedding.TFWordLabelEmbeddingModel(
        text_feature_name, "frac_neg")
    model = preprocessor.add_embedding_to_model(model_tf, text_feature_name)

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval(FLAGS.train_steps, FLAGS.eval_period,
                            FLAGS.eval_steps)
Exemplo n.º 6
0
def main(argv):
    del argv  # unused

    dataset = tfrecord_input.TFRecordInput()
    model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval()

    serving_input_fn = serving_input.create_text_serving_input_fn(
        text_feature_name=base_model.TEXT_FEATURE_KEY,
        example_key_name=base_model.EXAMPLE_KEY)
    trainer.export(serving_input_fn, base_model.EXAMPLE_KEY)
Exemplo n.º 7
0
def main(argv):
  del argv  # unused

  preprocessor = text_preprocessor.TextPreprocessor(FLAGS.embeddings_path)

  nltk.download("punkt")
  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
  dataset = tfrecord_input.TFRecordInputWithTokenizer(
      train_preprocess_fn=train_preprocess_fn)

  # TODO: Move embedding *into* Keras model.
  model = preprocessor.add_embedding_to_model(
      keras_cnn.KerasCNNModel(dataset.labels()), base_model.TOKENS_FEATURE_KEY)

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval()
Exemplo n.º 8
0
def main(argv):
    del argv  # unused

    preprocessor = text_preprocessor.TextPreprocessor(FLAGS.embeddings_path)

    nltk.download('punkt')
    train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
    dataset = tfrecord_input.TFRecordInputWithTokenizer(
        train_preprocess_fn=train_preprocess_fn, max_seq_len=5000)

    model_tf = tf_word_label_embedding.TFWordLabelEmbeddingModel(
        dataset.labels())
    model = preprocessor.add_embedding_to_model(model_tf,
                                                base_model.TOKENS_FEATURE_KEY)

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval()
Exemplo n.º 9
0
def main(argv):
    del argv  # unused

    embeddings_path = FLAGS.embeddings_path

    preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)

    nltk.download("punkt")
    train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
    dataset = tfrecord_input.TFRecordInputWithTokenizer(
        train_preprocess_fn=train_preprocess_fn)

    # TODO: Move embedding *into* Keras model.
    model_tf = tf_gru_attention.TFRNNModel(dataset.labels())
    model = preprocessor.add_embedding_to_model(model_tf,
                                                base_model.TOKENS_FEATURE_KEY)

    trainer = model_trainer.ModelTrainer(dataset,
                                         model,
                                         warm_start_from=FLAGS.warm_start_from)
    trainer.train_with_eval()

    key = ('label', 'logistic')
    predictions = list(trainer.evaluate_on_dev(predict_keys=[key]))

    valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv")
    df = pd.read_csv(valid_path_csv)
    labels = df['label'].values

    community = os.path.basename(FLAGS.validate_path).split("..")[0]

    assert len(labels) == len(
        predictions), "Labels and predictions must have the same length."

    d = {
        "label": labels,
        "prediction": [p[key][0] for p in predictions],
        "community": [community for p in predictions],
    }

    df = pd.DataFrame(data=d)
    df.to_csv(path_or_buf=FLAGS.tmp_results_path,
              mode='a+',
              index=False,
              header=False)
Exemplo n.º 10
0
def main(argv):
    del argv  # unused

    dataset = tfrecord_simple.TFSimpleRecordInput(
        train_path=FLAGS.train_path,
        validate_path=FLAGS.validate_path,
        text_feature=FLAGS.text_feature_name,
        labels=LABELS,
        batch_size=FLAGS.batch_size)

    model = tf_hub_classifier.TFHubClassifierModel(FLAGS.text_feature_name,
                                                   set(LABELS.keys()))

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval(FLAGS.train_steps, FLAGS.eval_period,
                            FLAGS.eval_steps)

    serving_input_fn = create_serving_input_fn(
        text_feature_name=FLAGS.text_feature_name, key_name=FLAGS.key_name)
    trainer.export(serving_input_fn)
Exemplo n.º 11
0
def main(argv):
    del argv  # unused

    module = hub.Module(FLAGS.model_spec)
    with tf.Session() as sess:
        spm_path = sess.run(module(signature='spm_path'))

    dataset = TFRecordWithSentencePiece(spm_path)
    model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval()

    values = tf.placeholder(tf.int64, shape=[None], name='values')
    indices = tf.placeholder(tf.int64, shape=[None, 2], name='indices')
    dense_shape = tf.placeholder(tf.int64, shape=[None], name='dense_shape')
    serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
        {
            'values': values,
            'indices': indices,
            'dense_shape': dense_shape
        })
    trainer.export(serving_input_fn, None)
Exemplo n.º 12
0
def main(argv):
    del argv  # unused

    embeddings_path = FLAGS.embeddings_path
    text_feature_name = FLAGS.text_feature_name

    preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)
    nltk.download("punkt")
    tokenize_op = preprocessor.tokenize_tensor_op(nltk.word_tokenize)

    dataset = tfrecord_input.TFRecordInput(train_path=FLAGS.train_path,
                                           validate_path=FLAGS.validate_path,
                                           text_feature=text_feature_name,
                                           labels=LABELS,
                                           feature_preprocessor=tokenize_op,
                                           batch_size=FLAGS.batch_size)

    model = preprocessor.add_embedding_to_model(
        tf_gru_attention.TFRNNModel(text_feature_name, set(LABELS.keys())),
        text_feature_name)

    trainer = model_trainer.ModelTrainer(dataset, model)
    trainer.train_with_eval(FLAGS.train_steps, FLAGS.eval_period,
                            FLAGS.eval_steps)