Пример #1
0
    def load(self, task_name=None):
        if self.name == 'tf':
            prefer_eager = self.params.get('prefer_eager', False)
            from eight_mile.tf.layers import set_tf_eager_mode, set_tf_log_level, set_tf_eager_debug
            set_tf_eager_mode(prefer_eager)
            set_tf_log_level(os.getenv("MEAD_TF_LOG_LEVEL", "ERROR"))
            set_tf_eager_debug(str2bool(os.getenv("MEAD_TF_EAGER_DEBUG", "FALSE")))

        base_pkg_name = 'baseline.{}'.format(self.name)
        # Backends may not be downloaded to the cache, they must exist locally
        mod = import_user_module(base_pkg_name)
        import_user_module('baseline.{}.optz'.format(self.name))
        import_user_module('baseline.{}.embeddings'.format(self.name))
        import_user_module('mead.{}.exporters'.format(self.name))
        if task_name is not None:
            try:
                import_user_module(f'{base_pkg_name}.{task_name}')
            except:
                logger.warning(f"No module found [{base_pkg_name}.{task_name}]")
        self.transition_mask = mod.transition_mask
Пример #2
0
def main():
    parser = argparse.ArgumentParser(description='Classify text with a model')
    parser.add_argument('--model',
                        help='A classifier model',
                        required=True,
                        type=str)
    parser.add_argument('--text', help='raw value', type=str)
    parser.add_argument('--device', help='device')
    parser.add_argument('--backend',
                        help='backend',
                        choices={'tf', 'pytorch'},
                        default='tf')
    parser.add_argument(
        '--prefer_eager',
        help="If running in TensorFlow, should we prefer eager model",
        type=str2bool)

    args = parser.parse_known_args()[0]

    if args.backend == 'tf':
        from eight_mile.tf.layers import set_tf_eager_mode
        set_tf_eager_mode(args.prefer_eager)

    if os.path.exists(args.text) and os.path.isfile(args.text):
        texts = []
        with open(args.text, 'r') as f:
            for line in f:
                text = line.strip().split()
                texts += [text]

    else:
        texts = args.text.split()

    print(texts)

    m = bl.LanguageModelService.load(args.model, device=args.device)
    print(m.predict(texts))
Пример #3
0
def main():
    parser = argparse.ArgumentParser(description='Classify text with a model')
    parser.add_argument(
        '--model',
        help=
        'The path to either the .zip file created by training or to the client bundle '
        'created by exporting',
        required=True,
        type=str)
    parser.add_argument(
        '--text',
        help=
        'The text to classify as a string, or a path to a file with each line as an example',
        type=str)
    parser.add_argument('--backend',
                        help='backend',
                        choices={'tf', 'pytorch', 'onnx'},
                        default='tf')
    parser.add_argument(
        '--remote',
        help='(optional) remote endpoint, normally localhost:8500',
        type=str)  # localhost:8500
    parser.add_argument(
        '--name',
        help='(optional) service name as the server may serves multiple models',
        type=str)
    parser.add_argument('--device', help='device')
    parser.add_argument('--preproc',
                        help='(optional) where to perform preprocessing',
                        choices={'client', 'server'},
                        default='client')
    parser.add_argument('--batchsz',
                        help='batch size when --text is a file',
                        default=100,
                        type=int)
    parser.add_argument('--model_type', type=str, default='default')
    parser.add_argument('--modules', default=[], nargs="+")
    parser.add_argument(
        '--prefer_eager',
        help="If running in TensorFlow, should we prefer eager model",
        type=str2bool,
        default=False)
    parser.add_argument('--scores', '-s', action="store_true")
    args = parser.parse_args()

    if args.backend == 'tf':
        from eight_mile.tf.layers import set_tf_eager_mode
        set_tf_eager_mode(args.prefer_eager)

    for mod_name in args.modules:
        bl.import_user_module(mod_name)

    if os.path.exists(args.text) and os.path.isfile(args.text):
        texts = []
        with open(args.text, 'r') as f:
            for line in f:
                text = line.strip().split()
                texts += [text]

    else:
        texts = [args.text.split()]
    batched = [
        texts[i:i + args.batchsz] for i in range(0, len(texts), args.batchsz)
    ]

    m = bl.ClassifierService.load(args.model,
                                  backend=args.backend,
                                  remote=args.remote,
                                  name=args.name,
                                  preproc=args.preproc,
                                  device=args.device,
                                  model_type=args.model_type)
    for texts in batched:
        for text, output in zip(texts, m.predict(texts)):
            if args.scores:
                print("{}, {}".format(" ".join(text), output))
            else:
                print("{}, {}".format(" ".join(text), output[0][0]))
Пример #4
0
parser.add_argument('--text', help='raw value', type=str)
parser.add_argument('--device', help='device')
parser.add_argument('--backend',
                    help='backend',
                    choices={'tf', 'pytorch'},
                    default='tf')
parser.add_argument(
    '--prefer_eager',
    help="If running in TensorFlow, should we prefer eager model",
    type=str2bool)

args = parser.parse_known_args()[0]

if args.backend == 'tf':
    from eight_mile.tf.layers import set_tf_eager_mode
    set_tf_eager_mode(args.prefer_eager)

if os.path.exists(args.text) and os.path.isfile(args.text):
    texts = []
    with open(args.text, 'r') as f:
        for line in f:
            text = line.strip().split()
            texts += [text]

else:
    texts = args.text.split()

print(texts)

m = bl.LanguageModelService.load(args.model, device=args.device)
print(m.predict(texts))
Пример #5
0
def main():
    parser = argparse.ArgumentParser(description='Encoder-Decoder execution')
    parser.add_argument('--model',
                        help='An encoder-decoder model',
                        required=True,
                        type=str)
    parser.add_argument('--text', help='raw value or a file', type=str)
    parser.add_argument('--backend', help='backend', default='tf')
    parser.add_argument('--remote',
                        help='(optional) remote endpoint',
                        type=str)  # localhost:8500
    parser.add_argument('--name', help='(optional) signature name', type=str)
    parser.add_argument(
        '--target', help='A file to write decoded output (or print to screen)')
    parser.add_argument('--tsv',
                        help='print tab separated',
                        type=bl.str2bool,
                        default=False)
    parser.add_argument('--batchsz',
                        help='Size of a batch to pass at once',
                        default=32,
                        type=int)
    parser.add_argument('--device', help='device')
    parser.add_argument('--alpha',
                        type=float,
                        help='If set use in the gnmt length penalty.')
    parser.add_argument('--beam',
                        type=int,
                        default=30,
                        help='The size of beam to use.')
    parser.add_argument(
        '--prefer_eager',
        help="If running in TensorFlow, should we prefer eager model",
        type=str2bool)

    args = parser.parse_known_args()[0]

    if args.backend == 'tf':
        from eight_mile.tf.layers import set_tf_eager_mode
        set_tf_eager_mode(args.prefer_eager)

    batches = []
    if os.path.exists(args.text) and os.path.isfile(args.text):
        with open(args.text, 'r') as f:
            batch = []
            for line in f:
                text = line.strip().split()
                if len(batch) == args.batchsz:
                    batches.append(batch)
                    batch = []
                batch.append(text)

            if len(batch) > 0:
                batches.append(batch)

    else:
        batch = [args.text.split()]
        batches.append(batch)

    m = bl.EncoderDecoderService.load(args.model,
                                      backend=args.backend,
                                      beam=args.beam,
                                      remote=args.remote,
                                      name=args.name,
                                      device=args.device)

    f = open(args.target, 'w') if args.target is not None else None

    for texts in batches:
        decoded = m.predict(texts, alpha=args.alpha, beam=args.beam)
        for src, dst in zip(texts, decoded):
            src_str = ' '.join(src)
            dst_str = ' '.join(dst)
            if args.tsv:
                line = src_str + '\t' + dst_str
            else:
                line = dst_str

            print(line, file=f, flush=True)

    if f is not None:
        f.close()
Пример #6
0
import argparse
import baseline
from eight_mile.utils import get_version
from eight_mile.confusion import ConfusionMatrix
import baseline.tf.embeddings
import eight_mile.tf.layers as L
from eight_mile.tf.layers import SET_TRAIN_FLAG, set_tf_log_level, set_tf_eager_mode
from eight_mile.tf.optz import EagerOptimizer
set_tf_eager_mode(True)
import tensorflow as tf
from tensorflow.compat.v1 import count_nonzero
import logging
import numpy as np
import time


def get_logging_level(ll):
    ll = ll.lower()
    if ll == 'debug':
        return logging.DEBUG
    if ll == 'info':
        return logging.INFO
    return logging.WARNING


#tf.config.gpu.set_per_process_memory_growth(True)

NUM_PREFETCH = 2
SHUF_BUF_SZ = 5000

Пример #7
0
def main():
    parser = argparse.ArgumentParser(description='Tag text with a model')
    parser.add_argument('--model',
                        help='A tagger model with extended features',
                        required=True,
                        type=str)
    parser.add_argument('--text', help='raw value', type=str)
    parser.add_argument('--conll',
                        help='is file type conll?',
                        type=str2bool,
                        default=False)
    parser.add_argument(
        '--features',
        help=
        '(optional) features in the format feature_name:index (column # in conll) or '
        'just feature names (assumed sequential)',
        default=[],
        nargs='+')
    parser.add_argument('--backend', help='backend', default='tf')
    parser.add_argument('--device', help='device')
    parser.add_argument('--remote',
                        help='(optional) remote endpoint',
                        type=str)  # localhost:8500
    parser.add_argument('--name', help='(optional) signature name', type=str)
    parser.add_argument('--preproc',
                        help='(optional) where to perform preprocessing',
                        choices={'client', 'server'},
                        default='client')
    parser.add_argument(
        '--export_mapping',
        help='mapping between features and the fields in the grpc/ REST '
        'request, eg: token:word ner:ner. This should match with the '
        '`exporter_field` definition in the mead config',
        default=[],
        nargs='+')
    parser.add_argument(
        '--prefer_eager',
        help="If running in TensorFlow, should we prefer eager model",
        type=str2bool)
    parser.add_argument('--modules', default=[], nargs="+")
    parser.add_argument(
        '--batchsz',
        default=64,
        help="How many examples to run through the model at once",
        type=int)

    args = parser.parse_args()

    if args.backend == 'tf':
        from eight_mile.tf.layers import set_tf_eager_mode
        set_tf_eager_mode(args.prefer_eager)

    for mod_name in args.modules:
        bl.import_user_module(mod_name)

    def create_export_mapping(feature_map_strings):
        feature_map_strings = [
            x.strip() for x in feature_map_strings if x.strip()
        ]
        if not feature_map_strings:
            return {}
        else:
            return {
                x[0]: x[1]
                for x in [y.split(':') for y in feature_map_strings]
            }

    def feature_index_mapping(features):
        if not features:
            return {}
        elif ':' in features[0]:
            return {
                feature.split(':')[0]: int(feature.split(':')[1])
                for feature in features
            }
        else:
            return {feature: index for index, feature in enumerate(features)}

    if os.path.exists(args.text) and os.path.isfile(args.text):
        texts = []
        if args.conll:
            feature_indices = feature_index_mapping(args.features)
            for sentence in read_conll(args.text):
                if feature_indices:
                    texts.append(
                        [{k: line[v]
                          for k, v in feature_indices.items()}
                         for line in sentence])
                else:
                    texts.append([line[0] for line in sentence])
        else:
            with open(args.text, 'r') as f:
                for line in f:
                    text = line.strip().split()
                    texts += [text]
    else:
        texts = [args.text.split()]

    m = bl.TaggerService.load(args.model,
                              backend=args.backend,
                              remote=args.remote,
                              name=args.name,
                              preproc=args.preproc,
                              device=args.device)

    batched = [
        texts[i:i + args.batchsz] for i in range(0, len(texts), args.batchsz)
    ]

    for texts in batched:
        for sen in m.predict(texts,
                             export_mapping=create_export_mapping(
                                 args.export_mapping)):
            for word_tag in sen:
                print("{} {}".format(word_tag['text'], word_tag['label']))
            print()
Пример #8
0
def main():
    parser = argparse.ArgumentParser(description='Classify text with a model')
    parser.add_argument(
        '--model',
        help=
        'The path to either the .zip file created by training or to the client bundle '
        'created by exporting',
        required=True,
        type=str)
    parser.add_argument('--config', type=str, required=True)
    parser.add_argument('--text1', type=str)
    parser.add_argument('--text2', type=str)
    parser.add_argument('--file', type=str)
    parser.add_argument('--backend',
                        help='backend',
                        choices={'tf', 'pytorch'},
                        default='pytorch')
    parser.add_argument('--device', help='device')
    parser.add_argument('--batchsz',
                        help='batch size when --text is a file',
                        default=100,
                        type=int)
    parser.add_argument('--modules', default=[])
    args = parser.parse_args()

    if args.backend == 'tf':
        from eight_mile.tf.layers import set_tf_eager_mode
        set_tf_eager_mode(args.prefer_eager)

    for mod_name in args.modules:
        bl.import_user_module(mod_name)

    if os.path.exists(args.file) and os.path.isfile(args.file):
        df = pd.read_csv(args.file)
        text_1 = [x.strip().split() for x in df['hypothesis']]
        text_2 = [x.strip().split() for x in df['premise']]
    else:
        text_1 = [args.text1.split()]
        text_2 = [args.text2.split()]

    text_1_batched = [
        text_1[i:i + args.batchsz] for i in range(0, len(text_1), args.batchsz)
    ]
    text_2_batched = [
        text_2[i:i + args.batchsz] for i in range(0, len(text_2), args.batchsz)
    ]

    config = read_config_file_or_json(args.config)
    loader_config = config['loader']
    model_type = config['model']['model_type']
    model = NLIService.load(args.model,
                            model_type=model_type,
                            backend=args.backend,
                            device=args.device,
                            **loader_config)
    for text_1_batch, text_2_batch in zip(text_1_batched, text_2_batched):
        output_batch = model.predict(text_1_batch, text_2_batch)
        for text_1, text_2, output in zip(text_1_batch, text_2_batch,
                                          output_batch):
            print("text1: {}, text2: {}, prediction: {}".format(
                " ".join(text_1), " ".join(text_2), output[0][0]))
        print('=' * 30)
Пример #9
0
import baseline as bl

from eight_mile.tf.layers import set_tf_log_level, set_tf_eager_mode
set_tf_eager_mode(False)
import baseline.tf.embeddings
import baseline.tf.classify
import time
import tensorflow as tf
import numpy as np
import os
import argparse
import logging
log = logging.getLogger('baseline.timing')

def get_logging_level(ll):
    ll = ll.lower()
    if ll == 'debug':
        return logging.DEBUG
    if ll == 'info':
        return logging.INFO
    return logging.WARNING

parser = argparse.ArgumentParser(description='Train a Baseline model with TensorFlow Estimator API')
parser.add_argument('--checkpoint_dir', help='Directory for model checkpoints', default='./checkpoints', type=str)
parser.add_argument('--model_type', help='What type of model to build', type=str, default='default')
parser.add_argument('--poolsz', help='How many hidden units for pooling', type=int, default=100)
parser.add_argument('--stacksz', help='How many hidden units for stacking', type=int, nargs='+')
parser.add_argument('--text', help='raw value', type=str)
parser.add_argument('--backend', help='backend', default='tf')
parser.add_argument('--remote', help='(optional) remote endpoint', type=str) # localhost:8500
parser.add_argument('--name', help='(optional) signature name', type=str)