Пример #1
0
 def __init__(self, filepath, batch_size=32,
              projectivize=False, morph_to_pos=False):
   self._graph = tf.Graph()
   self._session = tf.Session(graph=self._graph)
   task_context_str = """
         input {
           name: 'documents'
           record_format: 'conll-sentence'
           Part {
            file_pattern: '%s'
           }
         }""" % filepath
   if morph_to_pos:
     task_context_str += """
         Parameter {
           name: "join_category_to_pos"
           value: "true"
         }
         Parameter {
           name: "add_pos_as_attribute"
           value: "true"
         }
         Parameter {
           name: "serialize_morph_to_pos"
           value: "true"
         }
         """
   with self._graph.as_default():
     self._source, self._is_last = gen_parser_ops.document_source(
         task_context_str=task_context_str, batch_size=batch_size)
     self._source = gen_parser_ops.well_formed_filter(self._source)
     if projectivize:
       self._source = gen_parser_ops.projectivize_filter(self._source)
Пример #2
0
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  if not gfile.IsDirectory(OutputPath('')):
    gfile.MakeDirs(OutputPath(''))

  # Rewrite context.
  RewriteContext()

  # Creates necessary term maps.
  if FLAGS.compute_lexicon:
    logging.info('Computing lexicon...')
    with tf.Session(FLAGS.tf_master) as sess:
      gen_parser_ops.lexicon_builder(task_context=OutputPath('context'),
                                     corpus_name=FLAGS.training_corpus).run()
  with tf.Session(FLAGS.tf_master) as sess:
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                    arg_prefix=FLAGS.arg_prefix))

  # Well formed and projectivize.
  if FLAGS.projectivize_training_set:
    logging.info('Preprocessing...')
    with tf.Session(FLAGS.tf_master) as sess:
      source, last = gen_parser_ops.document_source(
          task_context=OutputPath('context'),
          batch_size=FLAGS.batch_size,
          corpus_name=FLAGS.training_corpus)
      sink = gen_parser_ops.document_sink(
          task_context=OutputPath('context'),
          corpus_name='projectivized-training-corpus',
          documents=gen_parser_ops.projectivize_filter(
              gen_parser_ops.well_formed_filter(source,
                                                task_context=OutputPath(
                                                    'context')),
              task_context=OutputPath('context')))
      while True:
        tf_last, _ = sess.run([last, sink])
        if tf_last:
          break

  logging.info('Training...')
  with tf.Session(FLAGS.tf_master) as sess:
    Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
Пример #3
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    if not gfile.IsDirectory(OutputPath('')):
        gfile.MakeDirs(OutputPath(''))

    # Rewrite context.
    RewriteContext()

    # Creates necessary term maps.
    if FLAGS.compute_lexicon:
        logging.info('Computing lexicon...')
        with tf.Session(FLAGS.tf_master) as sess:
            gen_parser_ops.lexicon_builder(
                task_context=OutputPath('context'),
                corpus_name=FLAGS.training_corpus).run()
    with tf.Session(FLAGS.tf_master) as sess:
        feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
            gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                        arg_prefix=FLAGS.arg_prefix))

    # Well formed and projectivize.
    if FLAGS.projectivize_training_set:
        logging.info('Preprocessing...')
        with tf.Session(FLAGS.tf_master) as sess:
            source, last = gen_parser_ops.document_source(
                task_context=OutputPath('context'),
                batch_size=FLAGS.batch_size,
                corpus_name=FLAGS.training_corpus)
            sink = gen_parser_ops.document_sink(
                task_context=OutputPath('context'),
                corpus_name='projectivized-training-corpus',
                documents=gen_parser_ops.projectivize_filter(
                    gen_parser_ops.well_formed_filter(
                        source, task_context=OutputPath('context')),
                    task_context=OutputPath('context')))
            while True:
                tf_last, _ = sess.run([last, sink])
                if tf_last:
                    break

    logging.info('Training...')
    with tf.Session(FLAGS.tf_master) as sess:
        Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
 def __init__(self,
              filepath,
              record_format,
              batch_size=32,
              check_well_formed=False,
              projectivize=False,
              morph_to_pos=False):
     self._graph = tf.Graph()
     self._session = tf.Session(graph=self._graph)
     task_context_str = """
       input {
         name: 'documents'
         record_format: '%s'
         Part {
          file_pattern: '%s'
         }
       }""" % (record_format, filepath)
     if morph_to_pos:
         task_context_str += """
       Parameter {
         name: "join_category_to_pos"
         value: "true"
       }
       Parameter {
         name: "add_pos_as_attribute"
         value: "true"
       }
       Parameter {
         name: "serialize_morph_to_pos"
         value: "true"
       }
       """
     with self._graph.as_default():
         self._source, self._is_last = gen_parser_ops.document_source(
             task_context_str=task_context_str, batch_size=batch_size)
         if check_well_formed:
             self._source = gen_parser_ops.well_formed_filter(self._source)
         if projectivize:
             self._source = gen_parser_ops.projectivize_filter(self._source)