示例#1
0
    def testParsingReaderOp(self):
        # Runs the reader over the test input for two epochs.
        num_steps_a = 0
        num_actions = 0
        num_word_ids = 0
        num_tag_ids = 0
        num_label_ids = 0
        batch_size = 10
        with self.test_session() as sess:
            (words, tags,
             labels), epochs, gold_actions = (gen_parser_ops.gold_parse_reader(
                 self._task_context,
                 3,
                 batch_size,
                 corpus_name='training-corpus'))
            while True:
                tf_gold_actions, tf_epochs, tf_words, tf_tags, tf_labels = (
                    sess.run([gold_actions, epochs, words, tags, labels]))
                num_steps_a += 1
                num_actions = max(num_actions, max(tf_gold_actions) + 1)
                num_word_ids = max(num_word_ids, self.GetMaxId(tf_words) + 1)
                num_tag_ids = max(num_tag_ids, self.GetMaxId(tf_tags) + 1)
                num_label_ids = max(num_label_ids,
                                    self.GetMaxId(tf_labels) + 1)
                self.assertIn(tf_epochs, [0, 1, 2])
                if tf_epochs > 1:
                    break

        # Runs the reader again, this time with a lot of added graph nodes.
        num_steps_b = 0
        with self.test_session() as sess:
            num_features = [6, 6, 4]
            num_feature_ids = [num_word_ids, num_tag_ids, num_label_ids]
            embedding_sizes = [8, 8, 8]
            hidden_layer_sizes = [32, 32]
            # Here we aim to test the iteration of the reader op in a complex network,
            # not the GraphBuilder.
            parser = graph_builder.GreedyParser(num_actions, num_features,
                                                num_feature_ids,
                                                embedding_sizes,
                                                hidden_layer_sizes)
            parser.AddTraining(self._task_context,
                               batch_size,
                               corpus_name='training-corpus')
            sess.run(list(parser.inits.values()))
            while True:
                tf_epochs, tf_cost, _ = sess.run([
                    parser.training['epochs'], parser.training['cost'],
                    parser.training['train_op']
                ])
                num_steps_b += 1
                self.assertGreaterEqual(tf_cost, 0)
                self.assertIn(tf_epochs, [0, 1, 2])
                if tf_epochs > 1:
                    break

        # Assert that the two runs made the exact same number of steps.
        logging.info('Number of steps in the two runs: %d, %d', num_steps_a,
                     num_steps_b)
        self.assertEqual(num_steps_a, num_steps_b)
示例#2
0
 def MakeBuilder(self, use_averaging=True, **kw_args):
     # Set the seed and gate_gradients to ensure reproducibility.
     return graph_builder.GreedyParser(self._num_actions,
                                       self._num_features,
                                       self._num_feature_ids,
                                       embedding_sizes=[8, 8, 8],
                                       hidden_layer_sizes=[32, 32],
                                       seed=42,
                                       gate_gradients=True,
                                       use_averaging=use_averaging,
                                       **kw_args)
def Eval(sess):
  logging.info('***************%s', FLAGS.arg_prefix)
  """Builds and evaluates a network."""
  task_context = FLAGS.task_context
  task_context = RewriteContext(task_context)
  logging.info(task_context)

  feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
      gen_parser_ops.feature_size(task_context=task_context[0],
                                  arg_prefix=FLAGS.arg_prefix))

  t = time.time()
  hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
  
  logging.info('Building training network with parameters: feature_sizes: %s '
               'domain_sizes: %s', feature_sizes, domain_sizes)
  if FLAGS.graph_builder == 'greedy':
    parser = graph_builder.GreedyParser(num_actions,
                                        feature_sizes,
                                        domain_sizes,
                                        embedding_dims,
                                        hidden_layer_sizes,
                                        gate_gradients=True,
                                        arg_prefix=FLAGS.arg_prefix)
  else:
    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=FLAGS.arg_prefix,
        beam_size=FLAGS.beam_size,
        max_steps=FLAGS.max_steps)

  for c in task_context:
    parser.AddEvaluation(c,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.input,
                         evaluation_max_steps=FLAGS.max_steps)


    parser.AddSaver(FLAGS.slim_model)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, FLAGS.model_path)

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.document_sink(sink_documents,
                                        task_context=c,
                                        corpus_name=FLAGS.output)

    run_parser(sess, parser, sink, sink_documents)
示例#4
0
    def __init__(self, processconfig):
        self._sess = tf.Session()
        self._pg = processconfig
        self.stdout_file_path = os.path.join(
            os.path.dirname(self._pg.custom_file),
            'stdout.tmp')  # File where syntaxnet output will be written
        """
        Builds and evaluates a network.
        """
        self.task_context = self._pg.task_context
        if self._pg.resource_dir:
            self.task_context = RewriteContext(self.task_context,
                                               self._pg.resource_dir)

        # Initiate custom tmp file
        with open(self._pg.custom_file, 'w') as f:
            pass
        self.fdescr_ = open(self._pg.custom_file, 'r')
        self.fdescr_.close()

        with tf.variable_scope(self._pg.variable_scope):
            feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run(
                gen_parser_ops.feature_size(task_context=self.task_context,
                                            arg_prefix=self._pg.arg_prefix))

            if self._pg.graph_builder_ == 'greedy':
                self._parser = graph_builder.GreedyParser(
                    num_actions,
                    feature_sizes,
                    domain_sizes,
                    embedding_dims,
                    self._pg.hidden_layer_sizes,
                    gate_gradients=True,
                    arg_prefix=self._pg.arg_prefix)
            else:
                self._parser = structured_graph_builder.StructuredGraphBuilder(
                    num_actions,
                    feature_sizes,
                    domain_sizes,
                    embedding_dims,
                    self._pg.hidden_layer_sizes,
                    gate_gradients=True,
                    arg_prefix=self._pg.arg_prefix,
                    beam_size=self._pg.beam_size,
                    max_steps=self._pg.max_steps)
            self._parser.AddEvaluation(self.task_context,
                                       self._pg.batch_size,
                                       corpus_name=self._pg.input_,
                                       evaluation_max_steps=self._pg.max_steps)
            self._parser.AddSaver(self._pg.slim_model)
            self._sess.run(self._parser.inits.values())
            self._parser.saver.restore(self._sess, self._pg.model_path)
示例#5
0
 def __init__(self,
              task_context,
              arg_prefix,
              hidden_layer_sizes,
              model_dir,
              model_path,
              in_corpus_name,
              out_corpus_name,
              batch_size,
              max_steps,
              use_slim_model=True):
     self.model_dir = model_dir
     self.task_context, self.in_name = self.RewriteContext(
         task_context, in_corpus_name)
     self.arg_prefix = arg_prefix
     self.graph = tf.Graph()
     self.in_corpus_name = in_corpus_name
     self.out_corpus_name = out_corpus_name
     with self.graph.as_default():
         self.sess = tf.Session()
         feature_sizes, domain_sizes, embedding_dims, num_actions = self.sess.run(
             gen_parser_ops.feature_size(task_context=self.task_context,
                                         arg_prefix=self.arg_prefix))
     self.feature_sizes = feature_sizes
     self.domain_sizes = domain_sizes
     self.embedding_dims = embedding_dims
     self.num_actions = num_actions
     self.hidden_layer_sizes = map(int, hidden_layer_sizes.split(','))
     self.batch_size = batch_size
     self.max_steps = max_steps
     self.use_slim_model = use_slim_model
     with self.graph.as_default():
         self.parser = graph_builder.GreedyParser(
             self.num_actions,
             self.feature_sizes,
             self.domain_sizes,
             self.embedding_dims,
             self.hidden_layer_sizes,
             gate_gradients=True,
             arg_prefix=self.arg_prefix)
         self.parser.AddEvaluation(self.task_context,
                                   self.batch_size,
                                   corpus_name=self.in_corpus_name,
                                   evaluation_max_steps=self.max_steps)
         self.parser.AddSaver(self.use_slim_model)
         self.sess.run(self.parser.inits.values())
         self.parser.saver.restore(self.sess,
                                   os.path.join(self.model_dir, model_path))
         self.parser.AddEvaluation(self.task_context,
                                   self.batch_size,
                                   corpus_name=self.in_corpus_name,
                                   evaluation_max_steps=self.max_steps)
示例#6
0
def EvalForever(sess, num_actions, feature_sizes, domain_sizes, embedding_dims):
  """Builds and evaluates a network.

  Args:
    sess: tensorflow session to use
    num_actions: number of possible golden actions
    feature_sizes: size of each feature vector
    domain_sizes: number of possible feature ids in each feature vector
    embedding_dims: embedding dimension for each feature group
  """
  t = time.time()
  hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
  logging.info('Building training network with parameters: feature_sizes: %s '
               'domain_sizes: %s', feature_sizes, domain_sizes)
  if FLAGS.graph_builder == 'greedy':
    parser = graph_builder.GreedyParser(num_actions,
                                        feature_sizes,
                                        domain_sizes,
                                        embedding_dims,
                                        hidden_layer_sizes,
                                        gate_gradients=True,
                                        arg_prefix=FLAGS.arg_prefix)
  else:
    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=FLAGS.arg_prefix,
        beam_size=FLAGS.beam_size,
        max_steps=FLAGS.max_steps)
  task_context = FLAGS.task_context
  while True:
    if not Eval(sess, parser, task_context):
      break
示例#7
0
def EvalForever(sess):
    """Builds and evaluates a network."""
    task_context = FLAGS.task_context
    if FLAGS.resource_dir:
        task_context = RewriteContext(task_context)
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=task_context,
                                    arg_prefix=FLAGS.arg_prefix))

    t = time.time()
    hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
    logging.info(
        'Building training network with parameters: feature_sizes: %s '
        'domain_sizes: %s', feature_sizes, domain_sizes)
    if FLAGS.graph_builder == 'greedy':
        parser = graph_builder.GreedyParser(num_actions,
                                            feature_sizes,
                                            domain_sizes,
                                            embedding_dims,
                                            hidden_layer_sizes,
                                            gate_gradients=True,
                                            arg_prefix=FLAGS.arg_prefix)
    else:
        parser = structured_graph_builder.StructuredGraphBuilder(
            num_actions,
            feature_sizes,
            domain_sizes,
            embedding_dims,
            hidden_layer_sizes,
            gate_gradients=True,
            arg_prefix=FLAGS.arg_prefix,
            beam_size=FLAGS.beam_size,
            max_steps=FLAGS.max_steps)
    while True:
        if not Eval(sess, parser, task_context):
            break
示例#8
0
def Eval(sess):
    """Builds and evaluates a network."""
    task_context = FLAGS.task_context
    if FLAGS.resource_dir:
        task_context = RewriteContext(task_context)
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=task_context,
                                    arg_prefix=FLAGS.arg_prefix))

    t = time.time()
    hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
    LOGGING.info(
        'Building training network with parameters: feature_sizes: %s '
        'domain_sizes: %s', feature_sizes, domain_sizes)
    if FLAGS.graph_builder == 'greedy':
        parser = graph_builder.GreedyParser(num_actions,
                                            feature_sizes,
                                            domain_sizes,
                                            embedding_dims,
                                            hidden_layer_sizes,
                                            gate_gradients=True,
                                            arg_prefix=FLAGS.arg_prefix)
    else:
        parser = structured_graph_builder.StructuredGraphBuilder(
            num_actions,
            feature_sizes,
            domain_sizes,
            embedding_dims,
            hidden_layer_sizes,
            gate_gradients=True,
            arg_prefix=FLAGS.arg_prefix,
            beam_size=FLAGS.beam_size,
            max_steps=FLAGS.max_steps)
    parser.AddEvaluation(task_context,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.input,
                         evaluation_max_steps=FLAGS.max_steps)

    parser.AddSaver(FLAGS.slim_model)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, FLAGS.model_path)

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.document_sink(sink_documents,
                                        task_context=task_context,
                                        corpus_name=FLAGS.output)
    t = time.time()
    num_epochs = None
    num_tokens = 0
    num_correct = 0
    num_documents = 0
    while True:
        tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
            parser.evaluation['epochs'],
            parser.evaluation['eval_metrics'],
            parser.evaluation['documents'],
        ])

        if len(tf_documents):
            LOGGING.info('Processed %d documents', len(tf_documents))
            num_documents += len(tf_documents)
            sess.run(sink, feed_dict={sink_documents: tf_documents})

        num_tokens += tf_eval_metrics[0]
        num_correct += tf_eval_metrics[1]
        if num_epochs is None:
            num_epochs = tf_eval_epochs
        elif num_epochs < tf_eval_epochs:
            break

    LOGGING.info('Total processed documents: %d', num_documents)
    if num_tokens > 0:
        eval_metric = 100.0 * num_correct / num_tokens
        LOGGING.info('num correct tokens: %d', num_correct)
        LOGGING.info('total tokens: %d', num_tokens)
        LOGGING.info(
            'Seconds elapsed in evaluation: %.2f, '
            'eval metric: %.2f%%',
            time.time() - t, eval_metric)
示例#9
0
def Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims):
    """Builds and trains the network.

  Args:
    sess: tensorflow session to use.
    num_actions: number of possible golden actions.
    feature_sizes: size of each feature vector.
    domain_sizes: number of possible feature ids in each feature vector.
    embedding_dims: embedding dimension to use for each feature group.
  """
    t = time.time()
    hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
    logging.info(
        'Building training network with parameters: feature_sizes: %s '
        'domain_sizes: %s', feature_sizes, domain_sizes)

    if FLAGS.graph_builder == 'greedy':
        parser = graph_builder.GreedyParser(
            num_actions,
            feature_sizes,
            domain_sizes,
            embedding_dims,
            hidden_layer_sizes,
            seed=int(FLAGS.seed),
            gate_gradients=True,
            averaging_decay=FLAGS.averaging_decay,
            arg_prefix=FLAGS.arg_prefix)
    else:
        parser = structured_graph_builder.StructuredGraphBuilder(
            num_actions,
            feature_sizes,
            domain_sizes,
            embedding_dims,
            hidden_layer_sizes,
            seed=int(FLAGS.seed),
            gate_gradients=True,
            averaging_decay=FLAGS.averaging_decay,
            arg_prefix=FLAGS.arg_prefix,
            beam_size=FLAGS.beam_size,
            max_steps=FLAGS.max_steps)

    task_context = OutputPath('context')
    if FLAGS.word_embeddings is not None:
        parser.AddPretrainedEmbeddings(0, FLAGS.word_embeddings, task_context)

    corpus_name = ('projectivized-training-corpus' if
                   FLAGS.projectivize_training_set else FLAGS.training_corpus)
    parser.AddTraining(task_context,
                       FLAGS.batch_size,
                       learning_rate=FLAGS.learning_rate,
                       momentum=FLAGS.momentum,
                       decay_steps=FLAGS.decay_steps,
                       corpus_name=corpus_name)
    parser.AddEvaluation(task_context,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.tuning_corpus)
    parser.AddSaver(FLAGS.slim_model)

    # Save graph.
    if FLAGS.output_path:
        with gfile.FastGFile(OutputPath('graph'), 'w') as f:
            f.write(sess.graph_def.SerializeToString())

    logging.info('Initializing...')
    num_epochs = 0
    cost_sum = 0.0
    num_steps = 0
    best_eval_metric = 0.0
    sess.run(parser.inits.values())

    if FLAGS.pretrained_params is not None:
        logging.info('Loading pretrained params from %s',
                     FLAGS.pretrained_params)
        feed_dict = {'save/Const:0': FLAGS.pretrained_params}
        targets = []
        for node in sess.graph_def.node:
            if (node.name.startswith('save/Assign') and node.input[0]
                    in FLAGS.pretrained_params_names.split(',')):
                logging.info('Loading %s with op %s', node.input[0], node.name)
                targets.append(node.name)
        sess.run(targets, feed_dict=feed_dict)

    logging.info('Training...')
    while num_epochs < FLAGS.num_epochs:
        tf_epochs, tf_cost, _ = sess.run([
            parser.training['epochs'], parser.training['cost'],
            parser.training['train_op']
        ])
        num_epochs = tf_epochs
        num_steps += 1
        cost_sum += tf_cost
        if num_steps % FLAGS.report_every == 0:
            logging.info(
                'Epochs: %d, num steps: %d, '
                'seconds elapsed: %.2f, avg cost: %.2f, ', num_epochs,
                num_steps,
                time.time() - t, cost_sum / FLAGS.report_every)
            cost_sum = 0.0
        if num_steps % FLAGS.checkpoint_every == 0:
            best_eval_metric = Eval(sess, parser, num_steps, best_eval_metric)