def Eval(sess):
  logging.info('***************%s', FLAGS.arg_prefix)
  """Builds and evaluates a network."""
  task_context = FLAGS.task_context
  task_context = RewriteContext(task_context)
  logging.info(task_context)

  feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
      gen_parser_ops.feature_size(task_context=task_context[0],
                                  arg_prefix=FLAGS.arg_prefix))

  t = time.time()
  hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
  
  logging.info('Building training network with parameters: feature_sizes: %s '
               'domain_sizes: %s', feature_sizes, domain_sizes)
  if FLAGS.graph_builder == 'greedy':
    parser = graph_builder.GreedyParser(num_actions,
                                        feature_sizes,
                                        domain_sizes,
                                        embedding_dims,
                                        hidden_layer_sizes,
                                        gate_gradients=True,
                                        arg_prefix=FLAGS.arg_prefix)
  else:
    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=FLAGS.arg_prefix,
        beam_size=FLAGS.beam_size,
        max_steps=FLAGS.max_steps)

  for c in task_context:
    parser.AddEvaluation(c,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.input,
                         evaluation_max_steps=FLAGS.max_steps)


    parser.AddSaver(FLAGS.slim_model)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, FLAGS.model_path)

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.document_sink(sink_documents,
                                        task_context=c,
                                        corpus_name=FLAGS.output)

    run_parser(sess, parser, sink, sink_documents)
示例#2
0
def Eval(sess, parser, task_context):
    parser.AddEvaluation(task_context,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.input,
                         evaluation_max_steps=FLAGS.max_steps)

    parser.AddSaver(FLAGS.slim_model)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, FLAGS.model_path)

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.document_sink(sink_documents,
                                        task_context=task_context,
                                        corpus_name=FLAGS.output)
    t = time.time()
    num_epochs = None
    num_tokens = 0
    num_correct = 0
    num_documents = 0
    while True:
        tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
            parser.evaluation['epochs'],
            parser.evaluation['eval_metrics'],
            parser.evaluation['documents'],
        ])

        if len(tf_documents):
            logging.info('Processed %d documents', len(tf_documents))
            num_documents += len(tf_documents)
            sess.run(sink, feed_dict={sink_documents: tf_documents})

        num_tokens += tf_eval_metrics[0]
        num_correct += tf_eval_metrics[1]
        if num_epochs is None:
            num_epochs = tf_eval_epochs
        elif num_epochs < tf_eval_epochs:
            break

    logging.info('Total processed documents: %d', num_documents)
    if num_tokens > 0:
        eval_metric = 100.0 * num_correct / num_tokens
        logging.info('num correct tokens: %d', num_correct)
        logging.info('total tokens: %d', num_tokens)
        logging.info(
            'Seconds elapsed in evaluation: %.2f, '
            'eval metric: %.2f%%',
            time.time() - t, eval_metric)
    return num_documents
示例#3
0
def Eval(sess, parser, task_context):
  parser.AddEvaluation(task_context,
                       FLAGS.batch_size,
                       corpus_name=FLAGS.input,
                       evaluation_max_steps=FLAGS.max_steps)

  parser.AddSaver(FLAGS.slim_model)
  sess.run(parser.inits.values())
  parser.saver.restore(sess, FLAGS.model_path)

  sink_documents = tf.placeholder(tf.string)
  sink = gen_parser_ops.document_sink(sink_documents,
                                      task_context=FLAGS.task_context,
                                      corpus_name=FLAGS.output)
  t = time.time()
  num_epochs = None
  num_tokens = 0
  num_correct = 0
  num_documents = 0
  while True:
    tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
        parser.evaluation['epochs'],
        parser.evaluation['eval_metrics'],
        parser.evaluation['documents'],
    ])

    if len(tf_documents):
      logging.info('Processed %d documents', len(tf_documents))
      num_documents += len(tf_documents)
      sess.run(sink, feed_dict={sink_documents: tf_documents})

    num_tokens += tf_eval_metrics[0]
    num_correct += tf_eval_metrics[1]
    if num_epochs is None:
      num_epochs = tf_eval_epochs
    elif num_epochs < tf_eval_epochs:
      break

  logging.info('Total processed documents: %d', num_documents)
  if num_tokens > 0:
    eval_metric = 100.0 * num_correct / num_tokens
    logging.info('num correct tokens: %d', num_correct)
    logging.info('total tokens: %d', num_tokens)
    logging.info('Seconds elapsed in evaluation: %.2f, '
                 'eval metric: %.2f%%', time.time() - t, eval_metric)

  return num_documents
示例#4
0
    def _parse_impl(self):
        with tf.variable_scope(self._pg.variable_scope):
            tf_eval_epochs, tf_eval_metrics, tf_documents = self._sess.run([
                self._parser.evaluation['epochs'],
                self._parser.evaluation['eval_metrics'],
                self._parser.evaluation['documents'],
            ])

            sink_documents = tf.placeholder(tf.string)
            sink = gen_parser_ops.document_sink(sink_documents,
                                                task_context=self.task_context,
                                                corpus_name='stdout-conll')

            self._sess.run(sink, feed_dict={sink_documents: tf_documents})

            sys.stdout.write('\n')
            sys.stdout.flush()
示例#5
0
    def _eval(self):

        with tf.variable_scope(self._variable_scope):
            sink_documents = tf.placeholder(tf.string)
            sink = gen_parser_ops.document_sink(
                sink_documents,
                task_context=self._task_context,
                corpus_name=self._output)

            t = time.time()
            num_epochs = None
            num_tokens = 0
            num_correct = 0
            num_documents = 0
            while True:
                tf_eval_epochs, tf_eval_metrics, tf_documents = self._sess.run(
                    [
                        self._parser.evaluation['epochs'],
                        self._parser.evaluation['eval_metrics'],
                        self._parser.evaluation['documents'],
                    ])

                if len(tf_documents):
                    logging.info('Processed %d documents', len(tf_documents))
                    num_documents += len(tf_documents)
                    self._sess.run(sink,
                                   feed_dict={sink_documents: tf_documents})

                num_tokens += tf_eval_metrics[0]
                num_correct += tf_eval_metrics[1]
                if num_epochs is None:
                    num_epochs = tf_eval_epochs
                elif num_epochs < tf_eval_epochs:
                    break

            logging.info('Total processed documents: %d', num_documents)
            if num_tokens > 0:
                eval_metric = 100.0 * num_correct / num_tokens
                logging.info('num correct tokens: %d', num_correct)
                logging.info('total tokens: %d', num_tokens)
                logging.info(
                    'Seconds elapsed in evaluation: %.2f, '
                    'eval metric: %.2f%%',
                    time.time() - t, eval_metric)
示例#6
0
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  if not gfile.IsDirectory(OutputPath('')):
    gfile.MakeDirs(OutputPath(''))

  # Rewrite context.
  RewriteContext()

  # Creates necessary term maps.
  if FLAGS.compute_lexicon:
    logging.info('Computing lexicon...')
    with tf.Session(FLAGS.tf_master) as sess:
      gen_parser_ops.lexicon_builder(task_context=OutputPath('context'),
                                     corpus_name=FLAGS.training_corpus).run()
  with tf.Session(FLAGS.tf_master) as sess:
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                    arg_prefix=FLAGS.arg_prefix))

  # Well formed and projectivize.
  if FLAGS.projectivize_training_set:
    logging.info('Preprocessing...')
    with tf.Session(FLAGS.tf_master) as sess:
      source, last = gen_parser_ops.document_source(
          task_context=OutputPath('context'),
          batch_size=FLAGS.batch_size,
          corpus_name=FLAGS.training_corpus)
      sink = gen_parser_ops.document_sink(
          task_context=OutputPath('context'),
          corpus_name='projectivized-training-corpus',
          documents=gen_parser_ops.projectivize_filter(
              gen_parser_ops.well_formed_filter(source,
                                                task_context=OutputPath(
                                                    'context')),
              task_context=OutputPath('context')))
      while True:
        tf_last, _ = sess.run([last, sink])
        if tf_last:
          break

  logging.info('Training...')
  with tf.Session(FLAGS.tf_master) as sess:
    Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
示例#7
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    if not gfile.IsDirectory(OutputPath('')):
        gfile.MakeDirs(OutputPath(''))

    # Rewrite context.
    RewriteContext()

    # Creates necessary term maps.
    if FLAGS.compute_lexicon:
        logging.info('Computing lexicon...')
        with tf.Session(FLAGS.tf_master) as sess:
            gen_parser_ops.lexicon_builder(
                task_context=OutputPath('context'),
                corpus_name=FLAGS.training_corpus).run()
    with tf.Session(FLAGS.tf_master) as sess:
        feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
            gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                        arg_prefix=FLAGS.arg_prefix))

    # Well formed and projectivize.
    if FLAGS.projectivize_training_set:
        logging.info('Preprocessing...')
        with tf.Session(FLAGS.tf_master) as sess:
            source, last = gen_parser_ops.document_source(
                task_context=OutputPath('context'),
                batch_size=FLAGS.batch_size,
                corpus_name=FLAGS.training_corpus)
            sink = gen_parser_ops.document_sink(
                task_context=OutputPath('context'),
                corpus_name='projectivized-training-corpus',
                documents=gen_parser_ops.projectivize_filter(
                    gen_parser_ops.well_formed_filter(
                        source, task_context=OutputPath('context')),
                    task_context=OutputPath('context')))
            while True:
                tf_last, _ = sess.run([last, sink])
                if tf_last:
                    break

    logging.info('Training...')
    with tf.Session(FLAGS.tf_master) as sess:
        Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
示例#8
0
    gate_gradients=True,
    arg_prefix=tagger_arg_prefix,
    beam_size=beam_size,
    max_steps=max_steps)
tagger.AddEvaluation(task_context,
                     batch_size,
                     corpus_name=input_style,
                     evaluation_max_steps=max_steps)

tagger.AddSaver(slim_model)
sess.run(tagger.inits.values())
tagger.saver.restore(sess, tagger_model_path)

sink_documents = tf.placeholder(tf.string)
sink = gen_parser_ops.document_sink(sink_documents,
                                    task_context=task_context,
                                    corpus_name='stdout-conll')


def stdin_handler(signum, frame):
    tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
        tagger.evaluation['epochs'],
        tagger.evaluation['eval_metrics'],
        tagger.evaluation['documents'],
    ])

    sys.stdout.write('\n## result start\n')
    sys.stdout.flush()

    if len(tf_documents):
        sess.run(sink, feed_dict={sink_documents: tf_documents})
示例#9
0
def Eval(sess):
    """Builds and evaluates a network."""
    task_context = FLAGS.task_context
    if FLAGS.resource_dir:
        task_context = RewriteContext(task_context)
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=task_context,
                                    arg_prefix=FLAGS.arg_prefix))

    t = time.time()
    hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
    LOGGING.info(
        'Building training network with parameters: feature_sizes: %s '
        'domain_sizes: %s', feature_sizes, domain_sizes)
    if FLAGS.graph_builder == 'greedy':
        parser = graph_builder.GreedyParser(num_actions,
                                            feature_sizes,
                                            domain_sizes,
                                            embedding_dims,
                                            hidden_layer_sizes,
                                            gate_gradients=True,
                                            arg_prefix=FLAGS.arg_prefix)
    else:
        parser = structured_graph_builder.StructuredGraphBuilder(
            num_actions,
            feature_sizes,
            domain_sizes,
            embedding_dims,
            hidden_layer_sizes,
            gate_gradients=True,
            arg_prefix=FLAGS.arg_prefix,
            beam_size=FLAGS.beam_size,
            max_steps=FLAGS.max_steps)
    parser.AddEvaluation(task_context,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.input,
                         evaluation_max_steps=FLAGS.max_steps)

    parser.AddSaver(FLAGS.slim_model)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, FLAGS.model_path)

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.document_sink(sink_documents,
                                        task_context=task_context,
                                        corpus_name=FLAGS.output)
    t = time.time()
    num_epochs = None
    num_tokens = 0
    num_correct = 0
    num_documents = 0
    while True:
        tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
            parser.evaluation['epochs'],
            parser.evaluation['eval_metrics'],
            parser.evaluation['documents'],
        ])

        if len(tf_documents):
            LOGGING.info('Processed %d documents', len(tf_documents))
            num_documents += len(tf_documents)
            sess.run(sink, feed_dict={sink_documents: tf_documents})

        num_tokens += tf_eval_metrics[0]
        num_correct += tf_eval_metrics[1]
        if num_epochs is None:
            num_epochs = tf_eval_epochs
        elif num_epochs < tf_eval_epochs:
            break

    LOGGING.info('Total processed documents: %d', num_documents)
    if num_tokens > 0:
        eval_metric = 100.0 * num_correct / num_tokens
        LOGGING.info('num correct tokens: %d', num_correct)
        LOGGING.info('total tokens: %d', num_tokens)
        LOGGING.info(
            'Seconds elapsed in evaluation: %.2f, '
            'eval metric: %.2f%%',
            time.time() - t, eval_metric)
示例#10
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)

    model_dir = FLAGS.model_dir
    task_context = "%s/context.pbtxt" % model_dir

    common_params = {
        "task_context": task_context,
        "beam_size": 8,
        "max_steps": 1000,
        "graph_builder": "structured",
        "batch_size": 1024,
        "slim_model": True,
    }

    model = {
        "brain_parser": {
            "arg_prefix": "brain_parser",
            "hidden_layer_sizes": "512,512",
            # input is taken from input tensor, not from corpus
            "input": None,
            "model_path": "%s/parser-params" % model_dir,
        },
    }

    for prefix in ["brain_parser"]:
        model[prefix].update(common_params)
        feature_sizes, domain_sizes, embedding_dims, num_actions = GetFeatureSize(
            task_context, prefix)
        model[prefix].update({
            'feature_sizes': feature_sizes,
            'domain_sizes': domain_sizes,
            'embedding_dims': embedding_dims,
            'num_actions': num_actions
        })

    with tf.Session() as sess:
        if FLAGS.export_path is not None:
            text_input = tf.placeholder(tf.string, [None])
        else:
            text_input = tf.constant(["parsey is the greatest"], tf.string)

        # corpus_name must be specified and valid because it indirectly informs
        # the document format ("english-text" vs "conll-sentence") used to parse
        # the input text
        document_source = gen_parser_ops.document_source(
            text=text_input,
            task_context=task_context,
            corpus_name="stdin-conll",
            batch_size=common_params['batch_size'],
            documents_from_input=True)

        for prefix in ["brain_parser"]:
            with tf.variable_scope(prefix):
                if True or prefix == "brain_tagger":
                    #source = document_source.documents if prefix == "brain_tagger" else model["brain_tagger"]["documents"]
                    source = document_source.documents
                    model[prefix]["documents"] = Build(sess, source,
                                                       model[prefix])

        if FLAGS.export_path is None:
            sink = gen_parser_ops.document_sink(
                model["brain_parser"]["documents"],
                task_context=task_context,
                corpus_name="stdout-conll")
            sess.run(sink)
        else:
            assets = []
            for model_file in os.listdir(model_dir):
                path = os.path.join(model_dir, model_file)
                if not os.path.isdir(path):
                    assets.append(tf.constant(path))
            ExportModel(sess, FLAGS.export_path, text_input,
                        model["brain_parser"]["documents"], assets)
示例#11
0
def Eval(sess, num_actions, feature_sizes, domain_sizes, embedding_dims):
  """Builds and evaluates a network.

  Args:
    sess: tensorflow session to use
    num_actions: number of possible golden actions
    feature_sizes: size of each feature vector
    domain_sizes: number of possible feature ids in each feature vector
    embedding_dims: embedding dimension for each feature group
  """
  t = time.time()
  hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
  logging.info('Building training network with parameters: feature_sizes: %s '
               'domain_sizes: %s', feature_sizes, domain_sizes)
  if FLAGS.graph_builder == 'greedy':
    parser = graph_builder.GreedyParser(num_actions,
                                        feature_sizes,
                                        domain_sizes,
                                        embedding_dims,
                                        hidden_layer_sizes,
                                        gate_gradients=True,
                                        arg_prefix=FLAGS.arg_prefix)
  else:
    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=FLAGS.arg_prefix,
        beam_size=FLAGS.beam_size,
        max_steps=FLAGS.max_steps)
  task_context = FLAGS.task_context
  parser.AddEvaluation(task_context,
                       FLAGS.batch_size,
                       corpus_name=FLAGS.input,
                       evaluation_max_steps=FLAGS.max_steps)

  parser.AddSaver(FLAGS.slim_model)
  sess.run(parser.inits.values())
  parser.saver.restore(sess, FLAGS.model_path)

  sink_documents = tf.placeholder(tf.string)
  sink = gen_parser_ops.document_sink(sink_documents,
                                      task_context=FLAGS.task_context,
                                      corpus_name=FLAGS.output)
  t = time.time()
  num_epochs = None
  num_tokens = 0
  num_correct = 0
  num_documents = 0
  while True:
    tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
        parser.evaluation['epochs'],
        parser.evaluation['eval_metrics'],
        parser.evaluation['documents'],
    ])

    if len(tf_documents):
      logging.info('Processed %d documents', len(tf_documents))
      num_documents += len(tf_documents)
      sess.run(sink, feed_dict={sink_documents: tf_documents})

    num_tokens += tf_eval_metrics[0]
    num_correct += tf_eval_metrics[1]
    if num_epochs is None:
      num_epochs = tf_eval_epochs
    elif num_epochs < tf_eval_epochs:
      break

  logging.info('Total processed documents: %d', num_documents)
  if num_tokens > 0:
    eval_metric = 100.0 * num_correct / num_tokens
    logging.info('num correct tokens: %d', num_correct)
    logging.info('total tokens: %d', num_tokens)
    logging.info('Seconds elapsed in evaluation: %.2f, '
                 'eval metric: %.2f%%', time.time() - t, eval_metric)
示例#12
0
def _perform_action(action=None):
    arg_prefix = action
    task_context = task_context_path

    if action == "brain_tagger":
        hidden_layer_sizes = [64]
        model_path = tagger_params_path
        output = 'output-to-file'
        input = 'input-from-file'
    elif action == "brain_parser":
        hidden_layer_sizes = [512, 512]
        model_path = parser_params_path
        output = 'output-to-file-conll'
        input = 'input-from-file-conll'
    else:
        raise Exception("Do not recognize action %s" % action)

    with tf.Session() as sess:
        feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
            gen_parser_ops.feature_size(task_context=task_context,
                                        arg_prefix=arg_prefix))

    beam_size = 8
    max_steps = 1000
    batch_size = 1024
    slim_model = True

    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=arg_prefix,
        beam_size=beam_size,
        max_steps=max_steps)

    parser.AddEvaluation(task_context,
                         batch_size,
                         corpus_name=input,
                         evaluation_max_steps=max_steps)

    with tf.Session() as sess:

        parser.AddSaver(slim_model)
        sess.run(parser.inits.values())
        parser.saver.restore(sess, model_path)

        sink_documents = tf.placeholder(tf.string)
        sink = gen_parser_ops.document_sink(sink_documents,
                                            task_context=task_context,
                                            corpus_name=output)
        t = time.time()
        num_epochs = None
        num_tokens = 0
        num_correct = 0
        num_documents = 0
        while True:
            tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
                parser.evaluation['epochs'],
                parser.evaluation['eval_metrics'],
                parser.evaluation['documents'],
            ])

            if len(tf_documents):
                logging.info('Processed %d documents', len(tf_documents))
                num_documents += len(tf_documents)
                sess.run(sink, feed_dict={sink_documents: tf_documents})

            num_tokens += tf_eval_metrics[0]
            num_correct += tf_eval_metrics[1]
            if num_epochs is None:
                num_epochs = tf_eval_epochs
            elif num_epochs < tf_eval_epochs:
                break

        logging.info('Total processed documents: %d', num_documents)
        if num_tokens > 0:
            eval_metric = 100.0 * num_correct / num_tokens
            logging.info('num correct tokens: %d', num_correct)
            logging.info('total tokens: %d', num_tokens)
            logging.info(
                'Seconds elapsed in evaluation: %.2f, '
                'eval metric: %.2f%%',
                time.time() - t, eval_metric)
feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
    gen_parser_ops.feature_size(task_context=task_context, arg_prefix=tokenizer_arg_prefix))
hidden_layer_sizes = map(int, tokenizer_hidden_layer_sizes.split(','))
tokenizer = structured_graph_builder.StructuredGraphBuilder(
    num_actions, feature_sizes, domain_sizes, embedding_dims,
    hidden_layer_sizes, gate_gradients=True, arg_prefix=tokenizer_arg_prefix,
    beam_size=beam_size, max_steps=max_steps)
tokenizer.AddEvaluation(task_context, batch_size, corpus_name='stdin-untoken',
                      evaluation_max_steps=max_steps)

tokenizer.AddSaver(slim_model)
sess.run(tokenizer.inits.values())
tokenizer.saver.restore(sess, tokenizer_model_path)

sink_documents = tf.placeholder(tf.string)
sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context,
                                    corpus_name='stdin-untoken')


def stdin_handler(signum, frame):
    tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
        tokenizer.evaluation['epochs'],
        tokenizer.evaluation['eval_metrics'],
        tokenizer.evaluation['documents'],
    ])

    sys.stdout.write('\n## result start\n')
    sys.stdout.flush()

    if len(tf_documents):
        sess.run(sink, feed_dict={sink_documents: tf_documents})