def Eval(sess): logging.info('***************%s', FLAGS.arg_prefix) """Builds and evaluates a network.""" task_context = FLAGS.task_context task_context = RewriteContext(task_context) logging.info(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context[0], arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) for c in task_context: parser.AddEvaluation(c, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=c, corpus_name=FLAGS.output) run_parser(sess, parser, sink, sink_documents)
def Eval(sess, parser, task_context): parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric) return num_documents
def Eval(sess, parser, task_context): parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=FLAGS.task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info('Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric) return num_documents
def _parse_impl(self): with tf.variable_scope(self._pg.variable_scope): tf_eval_epochs, tf_eval_metrics, tf_documents = self._sess.run([ self._parser.evaluation['epochs'], self._parser.evaluation['eval_metrics'], self._parser.evaluation['documents'], ]) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=self.task_context, corpus_name='stdout-conll') self._sess.run(sink, feed_dict={sink_documents: tf_documents}) sys.stdout.write('\n') sys.stdout.flush()
def _eval(self): with tf.variable_scope(self._variable_scope): sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink( sink_documents, task_context=self._task_context, corpus_name=self._output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = self._sess.run( [ self._parser.evaluation['epochs'], self._parser.evaluation['eval_metrics'], self._parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) self._sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder(task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter(source, task_context=OutputPath( 'context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder( task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter( source, task_context=OutputPath('context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
gate_gradients=True, arg_prefix=tagger_arg_prefix, beam_size=beam_size, max_steps=max_steps) tagger.AddEvaluation(task_context, batch_size, corpus_name=input_style, evaluation_max_steps=max_steps) tagger.AddSaver(slim_model) sess.run(tagger.inits.values()) tagger.saver.restore(sess, tagger_model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name='stdout-conll') def stdin_handler(signum, frame): tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ tagger.evaluation['epochs'], tagger.evaluation['eval_metrics'], tagger.evaluation['documents'], ]) sys.stdout.write('\n## result start\n') sys.stdout.flush() if len(tf_documents): sess.run(sink, feed_dict={sink_documents: tf_documents})
def Eval(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) LOGGING.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): LOGGING.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break LOGGING.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens LOGGING.info('num correct tokens: %d', num_correct) LOGGING.info('total tokens: %d', num_tokens) LOGGING.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
def main(unused_argv): logging.set_verbosity(logging.INFO) model_dir = FLAGS.model_dir task_context = "%s/context.pbtxt" % model_dir common_params = { "task_context": task_context, "beam_size": 8, "max_steps": 1000, "graph_builder": "structured", "batch_size": 1024, "slim_model": True, } model = { "brain_parser": { "arg_prefix": "brain_parser", "hidden_layer_sizes": "512,512", # input is taken from input tensor, not from corpus "input": None, "model_path": "%s/parser-params" % model_dir, }, } for prefix in ["brain_parser"]: model[prefix].update(common_params) feature_sizes, domain_sizes, embedding_dims, num_actions = GetFeatureSize( task_context, prefix) model[prefix].update({ 'feature_sizes': feature_sizes, 'domain_sizes': domain_sizes, 'embedding_dims': embedding_dims, 'num_actions': num_actions }) with tf.Session() as sess: if FLAGS.export_path is not None: text_input = tf.placeholder(tf.string, [None]) else: text_input = tf.constant(["parsey is the greatest"], tf.string) # corpus_name must be specified and valid because it indirectly informs # the document format ("english-text" vs "conll-sentence") used to parse # the input text document_source = gen_parser_ops.document_source( text=text_input, task_context=task_context, corpus_name="stdin-conll", batch_size=common_params['batch_size'], documents_from_input=True) for prefix in ["brain_parser"]: with tf.variable_scope(prefix): if True or prefix == "brain_tagger": #source = document_source.documents if prefix == "brain_tagger" else model["brain_tagger"]["documents"] source = document_source.documents model[prefix]["documents"] = Build(sess, source, model[prefix]) if FLAGS.export_path is None: sink = gen_parser_ops.document_sink( model["brain_parser"]["documents"], task_context=task_context, corpus_name="stdout-conll") sess.run(sink) else: assets = [] for model_file in os.listdir(model_dir): path = os.path.join(model_dir, model_file) if not os.path.isdir(path): assets.append(tf.constant(path)) ExportModel(sess, FLAGS.export_path, text_input, model["brain_parser"]["documents"], assets)
def Eval(sess, num_actions, feature_sizes, domain_sizes, embedding_dims): """Builds and evaluates a network. Args: sess: tensorflow session to use num_actions: number of possible golden actions feature_sizes: size of each feature vector domain_sizes: number of possible feature ids in each feature vector embedding_dims: embedding dimension for each feature group """ t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) task_context = FLAGS.task_context parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=FLAGS.task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info('Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
def _perform_action(action=None): arg_prefix = action task_context = task_context_path if action == "brain_tagger": hidden_layer_sizes = [64] model_path = tagger_params_path output = 'output-to-file' input = 'input-from-file' elif action == "brain_parser": hidden_layer_sizes = [512, 512] model_path = parser_params_path output = 'output-to-file-conll' input = 'input-from-file-conll' else: raise Exception("Do not recognize action %s" % action) with tf.Session() as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=arg_prefix)) beam_size = 8 max_steps = 1000 batch_size = 1024 slim_model = True parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=arg_prefix, beam_size=beam_size, max_steps=max_steps) parser.AddEvaluation(task_context, batch_size, corpus_name=input, evaluation_max_steps=max_steps) with tf.Session() as sess: parser.AddSaver(slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=tokenizer_arg_prefix)) hidden_layer_sizes = map(int, tokenizer_hidden_layer_sizes.split(',')) tokenizer = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=tokenizer_arg_prefix, beam_size=beam_size, max_steps=max_steps) tokenizer.AddEvaluation(task_context, batch_size, corpus_name='stdin-untoken', evaluation_max_steps=max_steps) tokenizer.AddSaver(slim_model) sess.run(tokenizer.inits.values()) tokenizer.saver.restore(sess, tokenizer_model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name='stdin-untoken') def stdin_handler(signum, frame): tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ tokenizer.evaluation['epochs'], tokenizer.evaluation['eval_metrics'], tokenizer.evaluation['documents'], ]) sys.stdout.write('\n## result start\n') sys.stdout.flush() if len(tf_documents): sess.run(sink, feed_dict={sink_documents: tf_documents})