def load(self, task_name=None): if self.name == 'tf': prefer_eager = self.params.get('prefer_eager', False) from eight_mile.tf.layers import set_tf_eager_mode, set_tf_log_level, set_tf_eager_debug set_tf_eager_mode(prefer_eager) set_tf_log_level(os.getenv("MEAD_TF_LOG_LEVEL", "ERROR")) set_tf_eager_debug(str2bool(os.getenv("MEAD_TF_EAGER_DEBUG", "FALSE"))) base_pkg_name = 'baseline.{}'.format(self.name) # Backends may not be downloaded to the cache, they must exist locally mod = import_user_module(base_pkg_name) import_user_module('baseline.{}.optz'.format(self.name)) import_user_module('baseline.{}.embeddings'.format(self.name)) import_user_module('mead.{}.exporters'.format(self.name)) if task_name is not None: try: import_user_module(f'{base_pkg_name}.{task_name}') except: logger.warning(f"No module found [{base_pkg_name}.{task_name}]") self.transition_mask = mod.transition_mask
def main(): parser = argparse.ArgumentParser(description='Classify text with a model') parser.add_argument('--model', help='A classifier model', required=True, type=str) parser.add_argument('--text', help='raw value', type=str) parser.add_argument('--device', help='device') parser.add_argument('--backend', help='backend', choices={'tf', 'pytorch'}, default='tf') parser.add_argument( '--prefer_eager', help="If running in TensorFlow, should we prefer eager model", type=str2bool) args = parser.parse_known_args()[0] if args.backend == 'tf': from eight_mile.tf.layers import set_tf_eager_mode set_tf_eager_mode(args.prefer_eager) if os.path.exists(args.text) and os.path.isfile(args.text): texts = [] with open(args.text, 'r') as f: for line in f: text = line.strip().split() texts += [text] else: texts = args.text.split() print(texts) m = bl.LanguageModelService.load(args.model, device=args.device) print(m.predict(texts))
def main(): parser = argparse.ArgumentParser(description='Classify text with a model') parser.add_argument( '--model', help= 'The path to either the .zip file created by training or to the client bundle ' 'created by exporting', required=True, type=str) parser.add_argument( '--text', help= 'The text to classify as a string, or a path to a file with each line as an example', type=str) parser.add_argument('--backend', help='backend', choices={'tf', 'pytorch', 'onnx'}, default='tf') parser.add_argument( '--remote', help='(optional) remote endpoint, normally localhost:8500', type=str) # localhost:8500 parser.add_argument( '--name', help='(optional) service name as the server may serves multiple models', type=str) parser.add_argument('--device', help='device') parser.add_argument('--preproc', help='(optional) where to perform preprocessing', choices={'client', 'server'}, default='client') parser.add_argument('--batchsz', help='batch size when --text is a file', default=100, type=int) parser.add_argument('--model_type', type=str, default='default') parser.add_argument('--modules', default=[], nargs="+") parser.add_argument( '--prefer_eager', help="If running in TensorFlow, should we prefer eager model", type=str2bool, default=False) parser.add_argument('--scores', '-s', action="store_true") args = parser.parse_args() if args.backend == 'tf': from eight_mile.tf.layers import set_tf_eager_mode set_tf_eager_mode(args.prefer_eager) for mod_name in args.modules: bl.import_user_module(mod_name) if os.path.exists(args.text) and os.path.isfile(args.text): texts = [] with open(args.text, 'r') as f: for line in f: text = line.strip().split() texts += [text] else: texts = [args.text.split()] batched = [ texts[i:i + args.batchsz] for i in range(0, len(texts), args.batchsz) ] m = bl.ClassifierService.load(args.model, backend=args.backend, remote=args.remote, name=args.name, preproc=args.preproc, device=args.device, model_type=args.model_type) for texts in batched: for text, output in zip(texts, m.predict(texts)): if args.scores: print("{}, {}".format(" ".join(text), output)) else: print("{}, {}".format(" ".join(text), output[0][0]))
parser.add_argument('--text', help='raw value', type=str) parser.add_argument('--device', help='device') parser.add_argument('--backend', help='backend', choices={'tf', 'pytorch'}, default='tf') parser.add_argument( '--prefer_eager', help="If running in TensorFlow, should we prefer eager model", type=str2bool) args = parser.parse_known_args()[0] if args.backend == 'tf': from eight_mile.tf.layers import set_tf_eager_mode set_tf_eager_mode(args.prefer_eager) if os.path.exists(args.text) and os.path.isfile(args.text): texts = [] with open(args.text, 'r') as f: for line in f: text = line.strip().split() texts += [text] else: texts = args.text.split() print(texts) m = bl.LanguageModelService.load(args.model, device=args.device) print(m.predict(texts))
def main(): parser = argparse.ArgumentParser(description='Encoder-Decoder execution') parser.add_argument('--model', help='An encoder-decoder model', required=True, type=str) parser.add_argument('--text', help='raw value or a file', type=str) parser.add_argument('--backend', help='backend', default='tf') parser.add_argument('--remote', help='(optional) remote endpoint', type=str) # localhost:8500 parser.add_argument('--name', help='(optional) signature name', type=str) parser.add_argument( '--target', help='A file to write decoded output (or print to screen)') parser.add_argument('--tsv', help='print tab separated', type=bl.str2bool, default=False) parser.add_argument('--batchsz', help='Size of a batch to pass at once', default=32, type=int) parser.add_argument('--device', help='device') parser.add_argument('--alpha', type=float, help='If set use in the gnmt length penalty.') parser.add_argument('--beam', type=int, default=30, help='The size of beam to use.') parser.add_argument( '--prefer_eager', help="If running in TensorFlow, should we prefer eager model", type=str2bool) args = parser.parse_known_args()[0] if args.backend == 'tf': from eight_mile.tf.layers import set_tf_eager_mode set_tf_eager_mode(args.prefer_eager) batches = [] if os.path.exists(args.text) and os.path.isfile(args.text): with open(args.text, 'r') as f: batch = [] for line in f: text = line.strip().split() if len(batch) == args.batchsz: batches.append(batch) batch = [] batch.append(text) if len(batch) > 0: batches.append(batch) else: batch = [args.text.split()] batches.append(batch) m = bl.EncoderDecoderService.load(args.model, backend=args.backend, beam=args.beam, remote=args.remote, name=args.name, device=args.device) f = open(args.target, 'w') if args.target is not None else None for texts in batches: decoded = m.predict(texts, alpha=args.alpha, beam=args.beam) for src, dst in zip(texts, decoded): src_str = ' '.join(src) dst_str = ' '.join(dst) if args.tsv: line = src_str + '\t' + dst_str else: line = dst_str print(line, file=f, flush=True) if f is not None: f.close()
import argparse import baseline from eight_mile.utils import get_version from eight_mile.confusion import ConfusionMatrix import baseline.tf.embeddings import eight_mile.tf.layers as L from eight_mile.tf.layers import SET_TRAIN_FLAG, set_tf_log_level, set_tf_eager_mode from eight_mile.tf.optz import EagerOptimizer set_tf_eager_mode(True) import tensorflow as tf from tensorflow.compat.v1 import count_nonzero import logging import numpy as np import time def get_logging_level(ll): ll = ll.lower() if ll == 'debug': return logging.DEBUG if ll == 'info': return logging.INFO return logging.WARNING #tf.config.gpu.set_per_process_memory_growth(True) NUM_PREFETCH = 2 SHUF_BUF_SZ = 5000
def main(): parser = argparse.ArgumentParser(description='Tag text with a model') parser.add_argument('--model', help='A tagger model with extended features', required=True, type=str) parser.add_argument('--text', help='raw value', type=str) parser.add_argument('--conll', help='is file type conll?', type=str2bool, default=False) parser.add_argument( '--features', help= '(optional) features in the format feature_name:index (column # in conll) or ' 'just feature names (assumed sequential)', default=[], nargs='+') parser.add_argument('--backend', help='backend', default='tf') parser.add_argument('--device', help='device') parser.add_argument('--remote', help='(optional) remote endpoint', type=str) # localhost:8500 parser.add_argument('--name', help='(optional) signature name', type=str) parser.add_argument('--preproc', help='(optional) where to perform preprocessing', choices={'client', 'server'}, default='client') parser.add_argument( '--export_mapping', help='mapping between features and the fields in the grpc/ REST ' 'request, eg: token:word ner:ner. This should match with the ' '`exporter_field` definition in the mead config', default=[], nargs='+') parser.add_argument( '--prefer_eager', help="If running in TensorFlow, should we prefer eager model", type=str2bool) parser.add_argument('--modules', default=[], nargs="+") parser.add_argument( '--batchsz', default=64, help="How many examples to run through the model at once", type=int) args = parser.parse_args() if args.backend == 'tf': from eight_mile.tf.layers import set_tf_eager_mode set_tf_eager_mode(args.prefer_eager) for mod_name in args.modules: bl.import_user_module(mod_name) def create_export_mapping(feature_map_strings): feature_map_strings = [ x.strip() for x in feature_map_strings if x.strip() ] if not feature_map_strings: return {} else: return { x[0]: x[1] for x in [y.split(':') for y in feature_map_strings] } def feature_index_mapping(features): if not features: return {} elif ':' in features[0]: return { feature.split(':')[0]: int(feature.split(':')[1]) for feature in features } else: return {feature: index for index, feature in enumerate(features)} if os.path.exists(args.text) and os.path.isfile(args.text): texts = [] if args.conll: feature_indices = feature_index_mapping(args.features) for sentence in read_conll(args.text): if feature_indices: texts.append( [{k: line[v] for k, v in feature_indices.items()} for line in sentence]) else: texts.append([line[0] for line in sentence]) else: with open(args.text, 'r') as f: for line in f: text = line.strip().split() texts += [text] else: texts = [args.text.split()] m = bl.TaggerService.load(args.model, backend=args.backend, remote=args.remote, name=args.name, preproc=args.preproc, device=args.device) batched = [ texts[i:i + args.batchsz] for i in range(0, len(texts), args.batchsz) ] for texts in batched: for sen in m.predict(texts, export_mapping=create_export_mapping( args.export_mapping)): for word_tag in sen: print("{} {}".format(word_tag['text'], word_tag['label'])) print()
def main(): parser = argparse.ArgumentParser(description='Classify text with a model') parser.add_argument( '--model', help= 'The path to either the .zip file created by training or to the client bundle ' 'created by exporting', required=True, type=str) parser.add_argument('--config', type=str, required=True) parser.add_argument('--text1', type=str) parser.add_argument('--text2', type=str) parser.add_argument('--file', type=str) parser.add_argument('--backend', help='backend', choices={'tf', 'pytorch'}, default='pytorch') parser.add_argument('--device', help='device') parser.add_argument('--batchsz', help='batch size when --text is a file', default=100, type=int) parser.add_argument('--modules', default=[]) args = parser.parse_args() if args.backend == 'tf': from eight_mile.tf.layers import set_tf_eager_mode set_tf_eager_mode(args.prefer_eager) for mod_name in args.modules: bl.import_user_module(mod_name) if os.path.exists(args.file) and os.path.isfile(args.file): df = pd.read_csv(args.file) text_1 = [x.strip().split() for x in df['hypothesis']] text_2 = [x.strip().split() for x in df['premise']] else: text_1 = [args.text1.split()] text_2 = [args.text2.split()] text_1_batched = [ text_1[i:i + args.batchsz] for i in range(0, len(text_1), args.batchsz) ] text_2_batched = [ text_2[i:i + args.batchsz] for i in range(0, len(text_2), args.batchsz) ] config = read_config_file_or_json(args.config) loader_config = config['loader'] model_type = config['model']['model_type'] model = NLIService.load(args.model, model_type=model_type, backend=args.backend, device=args.device, **loader_config) for text_1_batch, text_2_batch in zip(text_1_batched, text_2_batched): output_batch = model.predict(text_1_batch, text_2_batch) for text_1, text_2, output in zip(text_1_batch, text_2_batch, output_batch): print("text1: {}, text2: {}, prediction: {}".format( " ".join(text_1), " ".join(text_2), output[0][0])) print('=' * 30)
import baseline as bl from eight_mile.tf.layers import set_tf_log_level, set_tf_eager_mode set_tf_eager_mode(False) import baseline.tf.embeddings import baseline.tf.classify import time import tensorflow as tf import numpy as np import os import argparse import logging log = logging.getLogger('baseline.timing') def get_logging_level(ll): ll = ll.lower() if ll == 'debug': return logging.DEBUG if ll == 'info': return logging.INFO return logging.WARNING parser = argparse.ArgumentParser(description='Train a Baseline model with TensorFlow Estimator API') parser.add_argument('--checkpoint_dir', help='Directory for model checkpoints', default='./checkpoints', type=str) parser.add_argument('--model_type', help='What type of model to build', type=str, default='default') parser.add_argument('--poolsz', help='How many hidden units for pooling', type=int, default=100) parser.add_argument('--stacksz', help='How many hidden units for stacking', type=int, nargs='+') parser.add_argument('--text', help='raw value', type=str) parser.add_argument('--backend', help='backend', default='tf') parser.add_argument('--remote', help='(optional) remote endpoint', type=str) # localhost:8500 parser.add_argument('--name', help='(optional) signature name', type=str)