def annotate_text(self, text): sentence = sentence_pb2.Sentence( text=text, token=[sentence_pb2.Token(word=text, start=-1, end=-1)]) # preprocess with tf.Session(graph=tf.Graph()) as tmp_session: char_input = gen_parser_ops.char_token_generator( [sentence.SerializeToString()]) preprocessed = tmp_session.run(char_input)[0] segmented, _ = self.segmenter_model(preprocessed) annotations, traces = self.parser_model(segmented[0]) assert len(annotations) == 1 assert len(traces) == 1 return sentence_pb2.Sentence.FromString(annotations[0])
def syntaxnet_sentence(tokens): pb_tokens = [] last_start = 0 for token in tokens: token_bytes = token.encode("utf8") pb_tokens.append(sentence_pb2.Token( word=token_bytes, start=last_start, end=last_start + len(token_bytes) - 1) ) last_start = last_start + len(token_bytes) + 1 annotations, traces = parser_model(sentence_pb2.Sentence( text=u" ".join(tokens).encode("utf8"), token=pb_tokens ).SerializeToString()) assert len(annotations) == 1 assert len(traces) == 1 return sentence_pb2.Sentence.FromString(annotations[0])
def syntaxnet_tokenize(text): sentence = sentence_pb2.Sentence( text=text, token=[sentence_pb2.Token(word=text, start=-1, end=-1)] ) # preprocess with tf.Session(graph=tf.Graph()) as tmp_session: char_input = gen_parser_ops.char_token_generator([sentence.SerializeToString()]) preprocessed = tmp_session.run(char_input)[0] segmented, _ = segmenter_model(preprocessed) tokens = [] for t in sentence_pb2.Sentence.FromString(segmented[0]).token: tokens.append(t.word) return tokens
def annotate_text(text): """ Segment and parse input text using syntaxnet models. """ sentence = sentence_pb2.Sentence( text=text, token=[sentence_pb2.Token(word=text, start=-1, end=-1)]) # preprocess with tf.Session(graph=tf.Graph()) as tmp_session: char_input = gen_parser_ops.char_token_generator( [sentence.SerializeToString()]) preprocessed = tmp_session.run(char_input)[0] segmented, _ = SEGMENTER_MODEL(preprocessed) annotations, traces = PARSER_MODEL(segmented[0]) assert len(annotations) == 1 assert len(traces) == 1 return sentence_pb2.Sentence.FromString(annotations[0]), traces[0]
def inference(sess, graph, builder, annotator, text, enable_tracing=False): tokens = [ sentence_pb2.Token(word=word, start=-1, end=-1) for word in text.split() ] sentence = sentence_pb2.Sentence() sentence.token.extend(tokens) if enable_tracing: annotations, traces = sess.run( [annotator['annotations'], annotator['traces']], feed_dict={ annotator['input_batch']: [sentence.SerializeToString()] }) #HTML(visualization.trace_html(traces[0])) else: annotations = sess.run(annotator['annotations'], feed_dict={ annotator['input_batch']: [sentence.SerializeToString()] }) parsed_sentence = sentence_pb2.Sentence.FromString(annotations[0]) #HTML(render_parse_tree_graphviz.parse_tree_graph(parsed_sentence)) return parsed_sentence