Python ConllSentenceReader示例，dragnn.python.sentence_io.ConllSentenceReader Python示例

示例#1

0

显示文件

 def testReadFromTextFile(self):
     reader = sentence_io.ConllSentenceReader(self.filepath,
                                              self.batch_size)
     self.assertParseable(reader, self.batch_size, False)
     self.assertParseable(reader, self.batch_size, False)
     self.assertParseable(reader, 14, True)
     self.assertParseable(reader, 0, True)
     self.assertParseable(reader, 0, True)

示例#2

0

显示文件

 def testReadAndProjectivize(self):
     reader = sentence_io.ConllSentenceReader(self.filepath,
                                              self.batch_size,
                                              projectivize=True)
     self.assertParseable(reader, self.batch_size, False)
     self.assertParseable(reader, self.batch_size, False)
     self.assertParseable(reader, 14, True)
     self.assertParseable(reader, 0, True)
     self.assertParseable(reader, 0, True)

示例#3

0

显示文件

 def testReadFirstSentence(self):
     reader = sentence_io.ConllSentenceReader(self.filepath, 1)
     sentences, last = reader.read()
     self.assertEqual(1, len(sentences))
     pb = sentence_pb2.Sentence()
     pb.ParseFromString(sentences[0])
     self.assertFalse(last)
     self.assertEqual(
         u'I knew I could do it properly if given the right kind of support .',
         pb.text)

示例#4

0

显示文件

def main(unused_argv):
    # Validate that we have a parser saved model passed to this script.
    if FLAGS.parser_saved_model is None:
        tf.logging.fatal('A parser saved model must be provided.')

    # Parse the flags containint lists, using regular expressions.
    # This matches and extracts key=value pairs.
    component_beam_sizes = re.findall(r'([^=,]+)=(\d+)',
                                      FLAGS.inference_beam_size)
    tf.logging.info('Found beam size dict %s' % component_beam_sizes)

    # This matches strings separated by a comma. Does not return any empty
    # strings.
    components_to_locally_normalize = re.findall(r'[^,]+',
                                                 FLAGS.locally_normalize)
    tf.logging.info('Found local normalization dict %s' %
                    components_to_locally_normalize)

    # Create a session config with the requested number of threads.
    session_config = tf.ConfigProto(log_device_placement=False,
                                    intra_op_parallelism_threads=FLAGS.threads,
                                    inter_op_parallelism_threads=FLAGS.threads)

    # Get the segmented input data for the parser, either by running the
    # segmenter ourselves or by simply reading it from the CoNLL file.
    if FLAGS.segmenter_saved_model is None:
        # If no segmenter was provided, we must use the data from the CONLL file.
        input_file = FLAGS.input_file
        parser_input = sentence_io.ConllSentenceReader(input_file).corpus()
        use_gold_segmentation = True
    else:
        # If the segmenter was provided, use it.
        segmenter_input = get_segmenter_corpus(FLAGS.input_file,
                                               FLAGS.text_format)
        parser_input = run_segmenter(segmenter_input,
                                     FLAGS.segmenter_saved_model,
                                     session_config, FLAGS.max_batch_size,
                                     FLAGS.timeline_output_file)
        use_gold_segmentation = False

    # Now that we have parser input data, parse.
    processed = run_parser(parser_input, FLAGS.parser_saved_model,
                           session_config, component_beam_sizes,
                           components_to_locally_normalize,
                           FLAGS.max_batch_size, FLAGS.timeline_output_file)

    if FLAGS.output_file:
        print_output(FLAGS.output_file, FLAGS.text_format,
                     use_gold_segmentation, processed)

示例#5

0

显示文件

文件： parse_to_conll.py 项目： anglesmile1310/Logo_Detection

def get_segmenter_corpus(input_data_path, use_text_format):
  """Reads in a character corpus for segmenting."""
  # Read in the documents.
  tf.logging.info('Reading documents...')
  if use_text_format:
    char_corpus = sentence_io.FormatSentenceReader(input_data_path,
                                                   'untokenized-text').corpus()
  else:
    input_corpus = sentence_io.ConllSentenceReader(input_data_path).corpus()
    with tf.Session(graph=tf.Graph()) as tmp_session:
      char_input = gen_parser_ops.char_token_generator(input_corpus)
      char_corpus = tmp_session.run(char_input)
    check.Eq(len(input_corpus), len(char_corpus))

  return char_corpus

示例#6

0

显示文件

文件： parser_trainer.py 项目： ChaoShen0/CS7CS4--task-106--team21

def main(unused_argv):
    logging.set_verbosity(logging.INFO)

    if not gfile.IsDirectory(FLAGS.resource_path):
        gfile.MakeDirs(FLAGS.resource_path)

    # Constructs lexical resources for SyntaxNet in the given resource path, from
    # the training data.
    if FLAGS.compute_lexicon:
        logging.info('Computing lexicon...')
        lexicon.build_lexicon(FLAGS.resource_path, FLAGS.training_corpus_path)

    # Construct the "lookahead" ComponentSpec. This is a simple right-to-left RNN
    # sequence model, which encodes the context to the right of each token. It has
    # no loss except for the downstream components.

    char2word = spec_builder.ComponentSpecBuilder('char_lstm')
    char2word.set_network_unit(name='wrapped_units.LayerNormBasicLSTMNetwork',
                               hidden_layer_sizes='256')
    char2word.set_transition_system(name='char-shift-only',
                                    left_to_right='true')
    char2word.add_fixed_feature(name='chars',
                                fml='char-input.text-char',
                                embedding_dim=16)
    char2word.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master)

    lookahead = spec_builder.ComponentSpecBuilder('lookahead')
    lookahead.set_network_unit(name='wrapped_units.LayerNormBasicLSTMNetwork',
                               hidden_layer_sizes='256')
    lookahead.set_transition_system(name='shift-only', left_to_right='false')
    lookahead.add_link(source=char2word,
                       fml='input.last-char-focus',
                       embedding_dim=32)
    lookahead.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master)

    # Construct the ComponentSpec for tagging. This is a simple left-to-right RNN
    # sequence tagger.
    tagger = spec_builder.ComponentSpecBuilder('tagger')
    tagger.set_network_unit(name='wrapped_units.LayerNormBasicLSTMNetwork',
                            hidden_layer_sizes='256')
    tagger.set_transition_system(name='tagger')
    tagger.add_token_link(source=lookahead,
                          fml='input.focus',
                          embedding_dim=32)
    tagger.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master)

    # Construct the ComponentSpec for parsing.
    parser = spec_builder.ComponentSpecBuilder('parser')
    parser.set_network_unit(name='FeedForwardNetwork',
                            hidden_layer_sizes='256',
                            layer_norm_hidden='True')
    parser.set_transition_system(name='arc-standard')
    parser.add_token_link(source=lookahead,
                          fml='input.focus',
                          embedding_dim=32)
    parser.add_token_link(source=tagger,
                          fml='input.focus stack.focus stack(1).focus',
                          embedding_dim=32)

    # Recurrent connection for the arc-standard parser. For both tokens on the
    # stack, we connect to the last time step to either SHIFT or REDUCE that
    # token. This allows the parser to build up compositional representations of
    # phrases.
    parser.add_link(
        source=parser,  # recurrent connection
        name='rnn-stack',  # unique identifier
        fml='stack.focus stack(1).focus',  # look for both stack tokens
        source_translator='shift-reduce-step',  # maps token indices -> step
        embedding_dim=32)  # project down to 32 dims

    parser.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master)

    master_spec = spec_pb2.MasterSpec()
    master_spec.component.extend(
        [char2word.spec, lookahead.spec, tagger.spec, parser.spec])
    logging.info('Constructed master spec: %s', str(master_spec))
    hyperparam_config = spec_pb2.GridPoint()
    hyperparam_config.decay_steps = 128000
    hyperparam_config.learning_rate = 0.001
    hyperparam_config.learning_method = 'adam'
    hyperparam_config.adam_beta1 = 0.9
    hyperparam_config.adam_beta2 = 0.9
    hyperparam_config.adam_eps = 0.0001
    hyperparam_config.gradient_clip_norm = 1
    hyperparam_config.self_norm_alpha = 1.0
    hyperparam_config.use_moving_average = True
    hyperparam_config.dropout_rate = 0.7
    hyperparam_config.seed = 1

    # Build the TensorFlow graph.
    graph = tf.Graph()
    with graph.as_default():
        builder = graph_builder.MasterBuilder(master_spec, hyperparam_config)
        component_targets = spec_builder.default_targets_from_spec(master_spec)
        trainers = [
            builder.add_training_from_config(target)
            for target in component_targets
        ]
        assert len(trainers) == 2
        annotator = builder.add_annotation()
        builder.add_saver()

    # Read in serialized protos from training data.
    training_set = sentence_io.ConllSentenceReader(
        FLAGS.training_corpus_path,
        projectivize=FLAGS.projectivize_training_set).corpus()
    dev_set = sentence_io.ConllSentenceReader(FLAGS.dev_corpus_path,
                                              projectivize=False).corpus()

    # Ready to train!
    logging.info('Training on %d sentences.', len(training_set))
    logging.info('Tuning on %d sentences.', len(dev_set))

    pretrain_steps = [100, 0]
    tagger_steps = 1000
    train_steps = [tagger_steps, 8 * tagger_steps]

    tf.logging.info('Creating TensorFlow checkpoint dir...')
    gfile.MakeDirs(os.path.dirname(FLAGS.checkpoint_filename))
    summary_writer = trainer_lib.get_summary_writer(FLAGS.tensorboard_dir)

    with tf.Session(FLAGS.tf_master, graph=graph) as sess:
        # Make sure to re-initialize all underlying state.
        sess.run(tf.global_variables_initializer())
        trainer_lib.run_training(sess, trainers, annotator,
                                 evaluation.parser_summaries, pretrain_steps,
                                 train_steps, training_set, dev_set, dev_set,
                                 FLAGS.batch_size, summary_writer,
                                 FLAGS.report_every, builder.saver,
                                 FLAGS.checkpoint_filename)

示例#7

0

显示文件

文件： model_trainer.py 项目： anglesmile1310/Logo_Detection

def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    check.NotNone(FLAGS.model_dir, '--model_dir is required')
    check.Ne(
        FLAGS.pretrain_steps is None, FLAGS.pretrain_epochs is None,
        'Exactly one of --pretrain_steps or --pretrain_epochs is required')
    check.Ne(FLAGS.train_steps is None, FLAGS.train_epochs is None,
             'Exactly one of --train_steps or --train_epochs is required')

    config_path = os.path.join(FLAGS.model_dir, 'config.txt')
    master_path = os.path.join(FLAGS.model_dir, 'master.pbtxt')
    hyperparameters_path = os.path.join(FLAGS.model_dir,
                                        'hyperparameters.pbtxt')
    targets_path = os.path.join(FLAGS.model_dir, 'targets.pbtxt')
    checkpoint_path = os.path.join(FLAGS.model_dir, 'checkpoints/best')
    tensorboard_dir = os.path.join(FLAGS.model_dir, 'tensorboard')

    with tf.gfile.FastGFile(config_path) as config_file:
        config = collections.defaultdict(bool,
                                         ast.literal_eval(config_file.read()))
    train_corpus_path = config['train_corpus_path']
    tune_corpus_path = config['tune_corpus_path']
    projectivize_train_corpus = config['projectivize_train_corpus']

    master = _read_text_proto(master_path, spec_pb2.MasterSpec)
    hyperparameters = _read_text_proto(hyperparameters_path,
                                       spec_pb2.GridPoint)
    targets = spec_builder.default_targets_from_spec(master)
    if tf.gfile.Exists(targets_path):
        targets = _read_text_proto(targets_path,
                                   spec_pb2.TrainingGridSpec).target

    # Build the TensorFlow graph.
    graph = tf.Graph()
    with graph.as_default():
        tf.set_random_seed(hyperparameters.seed)
        builder = graph_builder.MasterBuilder(master, hyperparameters)
        trainers = [
            builder.add_training_from_config(target) for target in targets
        ]
        annotator = builder.add_annotation()
        builder.add_saver()

    # Read in serialized protos from training data.
    train_corpus = sentence_io.ConllSentenceReader(
        train_corpus_path, projectivize=projectivize_train_corpus).corpus()
    tune_corpus = sentence_io.ConllSentenceReader(tune_corpus_path,
                                                  projectivize=False).corpus()
    gold_tune_corpus = tune_corpus

    # Convert to char-based corpora, if requested.
    if config['convert_to_char_corpora']:
        # NB: Do not convert the |gold_tune_corpus|, which should remain word-based
        # for segmentation evaluation purposes.
        train_corpus = _convert_to_char_corpus(train_corpus)
        tune_corpus = _convert_to_char_corpus(tune_corpus)

    pretrain_steps = _get_steps(FLAGS.pretrain_steps, FLAGS.pretrain_epochs,
                                len(train_corpus))
    train_steps = _get_steps(FLAGS.train_steps, FLAGS.train_epochs,
                             len(train_corpus))
    check.Eq(len(targets), len(pretrain_steps),
             'Length mismatch between training targets and --pretrain_steps')
    check.Eq(len(targets), len(train_steps),
             'Length mismatch between training targets and --train_steps')

    # Ready to train!
    tf.logging.info('Training on %d sentences.', len(train_corpus))
    tf.logging.info('Tuning on %d sentences.', len(tune_corpus))

    tf.logging.info('Creating TensorFlow checkpoint dir...')
    summary_writer = trainer_lib.get_summary_writer(tensorboard_dir)

    checkpoint_dir = os.path.dirname(checkpoint_path)
    if tf.gfile.IsDirectory(checkpoint_dir):
        tf.gfile.DeleteRecursively(checkpoint_dir)
    elif tf.gfile.Exists(checkpoint_dir):
        tf.gfile.Remove(checkpoint_dir)
    tf.gfile.MakeDirs(checkpoint_dir)

    with tf.Session(FLAGS.tf_master, graph=graph) as sess:
        # Make sure to re-initialize all underlying state.
        sess.run(tf.global_variables_initializer())
        trainer_lib.run_training(sess, trainers, annotator,
                                 evaluation.parser_summaries, pretrain_steps,
                                 train_steps, train_corpus, tune_corpus,
                                 gold_tune_corpus, FLAGS.batch_size,
                                 summary_writer, FLAGS.report_every,
                                 builder.saver, checkpoint_path)

    tf.logging.info('Best checkpoint written to:\n%s', checkpoint_path)

示例#8

0

显示文件

文件： evaluator.py 项目： dhanya1/full_cyclist

def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    # Parse the flags containint lists, using regular expressions.
    # This matches and extracts key=value pairs.
    component_beam_sizes = re.findall(r'([^=,]+)=(\d+)',
                                      FLAGS.inference_beam_size)
    # This matches strings separated by a comma. Does not return any empty
    # strings.
    components_to_locally_normalize = re.findall(r'[^,]+',
                                                 FLAGS.locally_normalize)

    # Reads master spec.
    master_spec = spec_pb2.MasterSpec()
    with gfile.FastGFile(FLAGS.master_spec) as fin:
        text_format.Parse(fin.read(), master_spec)

    # Rewrite resource locations.
    if FLAGS.resource_dir:
        for component in master_spec.component:
            for resource in component.resource:
                for part in resource.part:
                    part.file_pattern = os.path.join(FLAGS.resource_dir,
                                                     part.file_pattern)

    if FLAGS.complete_master_spec:
        spec_builder.complete_master_spec(master_spec, None,
                                          FLAGS.resource_dir)

    # Graph building.
    tf.logging.info('Building the graph')
    g = tf.Graph()
    with g.as_default(), tf.device('/device:CPU:0'):
        hyperparam_config = spec_pb2.GridPoint()
        hyperparam_config.use_moving_average = True
        builder = graph_builder.MasterBuilder(master_spec, hyperparam_config)
        annotator = builder.add_annotation()
        builder.add_saver()

    tf.logging.info('Reading documents...')
    input_corpus = sentence_io.ConllSentenceReader(FLAGS.input_file).corpus()

    session_config = tf.ConfigProto(log_device_placement=False,
                                    intra_op_parallelism_threads=FLAGS.threads,
                                    inter_op_parallelism_threads=FLAGS.threads)

    with tf.Session(graph=g, config=session_config) as sess:
        tf.logging.info('Initializing variables...')
        sess.run(tf.global_variables_initializer())

        tf.logging.info('Loading from checkpoint...')
        sess.run('save/restore_all', {'save/Const:0': FLAGS.checkpoint_file})

        tf.logging.info('Processing sentences...')

        processed = []
        start_time = time.time()
        run_metadata = tf.RunMetadata()
        for start in range(0, len(input_corpus), FLAGS.max_batch_size):
            end = min(start + FLAGS.max_batch_size, len(input_corpus))
            feed_dict = {annotator['input_batch']: input_corpus[start:end]}
            for comp, beam_size in component_beam_sizes:
                feed_dict['%s/InferenceBeamSize:0' % comp] = beam_size
            for comp in components_to_locally_normalize:
                feed_dict['%s/LocallyNormalize:0' % comp] = True
            if FLAGS.timeline_output_file and end == len(input_corpus):
                serialized_annotations = sess.run(
                    annotator['annotations'],
                    feed_dict=feed_dict,
                    options=tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE),
                    run_metadata=run_metadata)
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                with open(FLAGS.timeline_output_file, 'w') as trace_file:
                    trace_file.write(trace.generate_chrome_trace_format())
            else:
                serialized_annotations = sess.run(annotator['annotations'],
                                                  feed_dict=feed_dict)
            processed.extend(serialized_annotations)

        tf.logging.info('Processed %d documents in %.2f seconds.',
                        len(input_corpus),
                        time.time() - start_time)
        pos, uas, las = evaluation.calculate_parse_metrics(
            input_corpus, processed)
        if FLAGS.log_file:
            with gfile.GFile(FLAGS.log_file, 'w') as f:
                f.write('%s\t%f\t%f\t%f\n' %
                        (FLAGS.language_name, pos, uas, las))

        if FLAGS.output_file:
            with gfile.GFile(FLAGS.output_file, 'w') as f:
                for serialized_sentence in processed:
                    sentence = sentence_pb2.Sentence()
                    sentence.ParseFromString(serialized_sentence)
                    f.write(text_format.MessageToString(sentence) + '\n\n')

示例#9

0

显示文件

def main(unused_argv):

    # Parse the flags containint lists, using regular expressions.
    # This matches and extracts key=value pairs.
    component_beam_sizes = re.findall(r'([^=,]+)=(\d+)',
                                      FLAGS.inference_beam_size)
    # This matches strings separated by a comma. Does not return any empty
    # strings.
    components_to_locally_normalize = re.findall(r'[^,]+',
                                                 FLAGS.locally_normalize)

    ## SEGMENTATION ##

    if not FLAGS.use_gold_segmentation:

        # Reads master spec.
        master_spec = spec_pb2.MasterSpec()
        with gfile.FastGFile(FLAGS.segmenter_master_spec) as fin:
            text_format.Parse(fin.read(), master_spec)

        if FLAGS.complete_master_spec:
            spec_builder.complete_master_spec(master_spec, None,
                                              FLAGS.segmenter_resource_dir)

        # Graph building.
        tf.logging.info('Building the graph')
        g = tf.Graph()
        with g.as_default(), tf.device('/device:CPU:0'):
            hyperparam_config = spec_pb2.GridPoint()
            hyperparam_config.use_moving_average = True
            builder = graph_builder.MasterBuilder(master_spec,
                                                  hyperparam_config)
            annotator = builder.add_annotation()
            builder.add_saver()

        tf.logging.info('Reading documents...')
        input_corpus = sentence_io.ConllSentenceReader(
            FLAGS.input_file).corpus()
        with tf.Session(graph=tf.Graph()) as tmp_session:
            char_input = gen_parser_ops.char_token_generator(input_corpus)
            char_corpus = tmp_session.run(char_input)
        check.Eq(len(input_corpus), len(char_corpus))

        session_config = tf.ConfigProto(
            log_device_placement=False,
            intra_op_parallelism_threads=FLAGS.threads,
            inter_op_parallelism_threads=FLAGS.threads)

        with tf.Session(graph=g, config=session_config) as sess:
            tf.logging.info('Initializing variables...')
            sess.run(tf.global_variables_initializer())
            tf.logging.info('Loading from checkpoint...')
            sess.run('save/restore_all',
                     {'save/Const:0': FLAGS.segmenter_checkpoint_file})

            tf.logging.info('Processing sentences...')

            processed = []
            start_time = time.time()
            run_metadata = tf.RunMetadata()
            for start in range(0, len(char_corpus), FLAGS.max_batch_size):
                end = min(start + FLAGS.max_batch_size, len(char_corpus))
                feed_dict = {annotator['input_batch']: char_corpus[start:end]}
                if FLAGS.timeline_output_file and end == len(char_corpus):
                    serialized_annotations = sess.run(
                        annotator['annotations'],
                        feed_dict=feed_dict,
                        options=tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE),
                        run_metadata=run_metadata)
                    trace = timeline.Timeline(
                        step_stats=run_metadata.step_stats)
                    with open(FLAGS.timeline_output_file, 'w') as trace_file:
                        trace_file.write(trace.generate_chrome_trace_format())
                else:
                    serialized_annotations = sess.run(annotator['annotations'],
                                                      feed_dict=feed_dict)
                processed.extend(serialized_annotations)

            tf.logging.info('Processed %d documents in %.2f seconds.',
                            len(char_corpus),
                            time.time() - start_time)

        input_corpus = processed
    else:
        input_corpus = sentence_io.ConllSentenceReader(
            FLAGS.input_file).corpus()

    ## PARSING

    # Reads master spec.
    master_spec = spec_pb2.MasterSpec()
    with gfile.FastGFile(FLAGS.parser_master_spec) as fin:
        text_format.Parse(fin.read(), master_spec)

    if FLAGS.complete_master_spec:
        spec_builder.complete_master_spec(master_spec, None,
                                          FLAGS.parser_resource_dir)

    # Graph building.
    tf.logging.info('Building the graph')
    g = tf.Graph()
    with g.as_default(), tf.device('/device:CPU:0'):
        hyperparam_config = spec_pb2.GridPoint()
        hyperparam_config.use_moving_average = True
        builder = graph_builder.MasterBuilder(master_spec, hyperparam_config)
        annotator = builder.add_annotation()
        builder.add_saver()

    tf.logging.info('Reading documents...')

    session_config = tf.ConfigProto(log_device_placement=False,
                                    intra_op_parallelism_threads=FLAGS.threads,
                                    inter_op_parallelism_threads=FLAGS.threads)

    with tf.Session(graph=g, config=session_config) as sess:
        tf.logging.info('Initializing variables...')
        sess.run(tf.global_variables_initializer())

        tf.logging.info('Loading from checkpoint...')
        sess.run('save/restore_all',
                 {'save/Const:0': FLAGS.parser_checkpoint_file})

        tf.logging.info('Processing sentences...')

        processed = []
        start_time = time.time()
        run_metadata = tf.RunMetadata()
        for start in range(0, len(input_corpus), FLAGS.max_batch_size):
            end = min(start + FLAGS.max_batch_size, len(input_corpus))
            feed_dict = {annotator['input_batch']: input_corpus[start:end]}
            for comp, beam_size in component_beam_sizes:
                feed_dict['%s/InferenceBeamSize:0' % comp] = beam_size
            for comp in components_to_locally_normalize:
                feed_dict['%s/LocallyNormalize:0' % comp] = True
            if FLAGS.timeline_output_file and end == len(input_corpus):
                serialized_annotations = sess.run(
                    annotator['annotations'],
                    feed_dict=feed_dict,
                    options=tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE),
                    run_metadata=run_metadata)
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                with open(FLAGS.timeline_output_file, 'w') as trace_file:
                    trace_file.write(trace.generate_chrome_trace_format())
            else:
                serialized_annotations = sess.run(annotator['annotations'],
                                                  feed_dict=feed_dict)
            processed.extend(serialized_annotations)

        tf.logging.info('Processed %d documents in %.2f seconds.',
                        len(input_corpus),
                        time.time() - start_time)

        if FLAGS.output_file:
            with gfile.GFile(FLAGS.output_file, 'w') as f:
                for serialized_sentence in processed:
                    sentence = sentence_pb2.Sentence()
                    sentence.ParseFromString(serialized_sentence)
                    f.write('#' + sentence.text.encode('utf-8') + '\n')
                    for i, token in enumerate(sentence.token):
                        head = token.head + 1
                        f.write('%s\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_\n' %
                                (i + 1, token.word.encode('utf-8'), head,
                                 token.label.encode('utf-8')))
                    f.write('\n\n')