def RunModel(saved_model_dir, signature_def_key, tag, text, ngrams_list=None): saved_model = reader.read_saved_model(saved_model_dir) meta_graph = None for meta_graph_def in saved_model.meta_graphs: if tag in meta_graph_def.meta_info_def.tags: meta_graph = meta_graph_def break if meta_graph_def is None: raise ValueError("Cannot find saved_model with tag" + tag) signature_def = signature_def_utils.get_signature_def_by_key( meta_graph, signature_def_key) text = text_utils.TokenizeText(text) ngrams = None if ngrams_list is not None: ngrams_list = text_utils.ParseNgramsOpts(ngrams_list) ngrams = text_utils.GenerateNgrams(text, ngrams_list) example = inputs.BuildTextExample(text, ngrams=ngrams) example = example.SerializeToString() inputs_feed_dict = { signature_def.inputs["inputs"].name: [example], } if signature_def_key == "proba": output_key = "scores" elif signature_def_key == "embedding": output_key = "outputs" else: raise ValueError("Unrecognised signature_def %s" % (signature_def_key)) output_tensor = signature_def.outputs[output_key].name with tf.Session() as sess: loader.load(sess, [tag], saved_model_dir) outputs = sess.run(output_tensor, feed_dict=inputs_feed_dict) return outputs
def Request(text): example = inputs.BuildTextExample(text_utils.TokenizeText(text)) request = classification_pb2.ClassificationRequest() request.model_spec.name = 'default' request.model_spec.signature_name = FLAGS.signature_def request.input.example_list.examples.extend([example]) return request
def ParseText_and_label_Input(textfile, labelsmap, stopwords_file, ngrams): """从两个文件中解析数据,一个是训练集。一行是一条数据,用""" idx_to_textlabel = {} with open(labelsmap, encoding='utf-8') as f: lines = f.readlines() for idx, line in enumerate(lines): idx_to_textlabel[idx] = line.strip() examples = [] with open(stopwords_file, encoding="utf-8") as sf: lines = sf.readlines() stop_words = set([line.strip() for line in lines]) stop_words.add(' ') stop_words.add('\n') stop_words.add('') with open(textfile, encoding='utf-8') as f1: for record in f1: text, label = record.strip().split(' ') text = text.strip() label = idx_to_textlabel[int(label.strip())] words = text_utils.TokenizeText(text, stop_words) examples.append({ "text": words, "label": label, }) if ngrams: examples[-1]["ngrams"] = text_utils.GenerateNgrams( words, ngrams) return examples
def Request(text, ngrams): text = text_utils.TokenizeText(text) ngrams = None if ngrams is not None: ngrams_list = text_utils.ParseNgramsOpts(ngrams) ngrams = text_utils.GenerateNgrams(text, ngrams_list) example = inputs.BuildTextExample(text, ngrams=ngrams) request = classification_pb2.ClassificationRequest() request.model_spec.name = 'default' request.model_spec.signature_name = 'proba' request.input.example_list.examples.extend([example]) return request
def ParseTextInput(textfile, labelsfile): """Parse input from two text files: text and labels. labels are specified 0-offset one per line. """ examples = [] with open(textfile) as f1, open(labelsfile) as f2: for text, label in zip(f1, f2): words = text_utils.TokenizeText(text) examples.append({ "text": words, "label": label, }) return examples
def ParseFacebookInput(inputfile): """Parse input in the format used by facebook FastText. labels are formatted as __label__1 where the label values start at 0. """ examples = [] for line in open(inputfile): words = line.split(' ', 1) # label is first field with __label__ removed match = re.match(r'__label__(.+)', words[0]) label = match.group(1) if match else None words = text_utils.TokenizeText(words[1]) examples.append({ "text": words, "label": label }) return examples