Python revlut примеры, baseline.utils.revlut Python примеры использования

Пример #1

0

Показать файл

Файл: services.py Проект: wenshuoliu/baseline

 def __init__(self,
              vocabs=None,
              vectorizers=None,
              model=None,
              preproc='client'):
     super().__init__(vocabs, vectorizers, model, preproc)
     # The model always returns indices (no need for `return_labels`)
     self.label_vocab = revlut(self.get_labels())
     self.rev_vocab = {k: revlut(v) for k, v in self.vocabs.items()}

Пример #2

0

Показать файл

Файл: services.py Проект: wenshuoliu/baseline

 def __init__(self,
              vocabs=None,
              vectorizers=None,
              model=None,
              preproc='client'):
     super().__init__(vocabs, vectorizers, model, preproc)
     if hasattr(self.model, 'return_labels'):
         self.return_labels = self.model.return_labels
     else:
         self.return_labels = False  # keeping the default tagger behavior
     if not self.return_labels:
         self.label_vocab = revlut(self.get_labels())
     self.rev_vocab = {k: revlut(v) for k, v in self.vocabs.items()}

Пример #3

0

Показать файл

 def __init__(self, model, span_type, verbose):
     self.model = model
     self.idx2label = revlut(model.labels)
     self.span_type = span_type
     if verbose:
         logger.info('Setting span type %s', self.span_type)
     self.verbose = verbose

Пример #4

0

Показать файл

 def __init__(self, model, span_type, verbose):
     self.model = model
     self.idx2label = revlut(model.labels)
     self.span_type = span_type
     if verbose:
         print('Setting span type {}'.format(self.span_type))
     self.verbose = verbose

Пример #5

0

Показать файл

Файл: train.py Проект: dpressel/baseline

 def __init__(self, model, span_type, verbose):
     self.model = model
     self.idx2label = revlut(model.labels)
     self.span_type = span_type
     if verbose:
         logger.info('Setting span type %s', self.span_type)
     self.verbose = verbose

Пример #6

0

Показать файл

Файл: tasks.py Проект: dpressel/baseline

    def train(self, checkpoint=None):

        num_ex = self.config_params['num_valid_to_show']

        rlut1 = revlut(self.feat2src[self.primary_key])
        rlut2 = revlut(self.feat2tgt)
        if num_ex > 0:
            logger.info('Showing examples')
            preproc = self.config_params.get('preproc', {})
            show_ex_fn = preproc['show_ex']
            self.config_params['train']['after_train_fn'] = lambda model: show_ex_fn(model,
                                                                                     self.valid_data, rlut1, rlut2,
                                                                                     self.feat2tgt,
                                                                                     preproc['mxlen'], False, 0,
                                                                                     num_ex, reverse=False)
        self.config_params['train']['tgt_rlut'] = rlut2
        return super(EncoderDecoderTask, self).train(checkpoint)

Пример #7

0

Показать файл

 def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'):
     super(TaggerService, self).__init__(vocabs, vectorizers, model, preproc)
     if hasattr(self.model, 'return_labels'):
         self.return_labels = self.model.return_labels
     else:
         self.return_labels = False  # keeping the default tagger behavior
     if not self.return_labels:
         self.label_vocab = revlut(self.get_labels())

Пример #8

0

Показать файл

Файл: services.py Проект: dpressel/baseline

 def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'):
     super(TaggerService, self).__init__(vocabs, vectorizers, model, preproc)
     if hasattr(self.model, 'return_labels'):
         self.return_labels = self.model.return_labels
     else:
         self.return_labels = False  # keeping the default tagger behavior
     if not self.return_labels:
         self.label_vocab = revlut(self.get_labels())

Пример #9

0

Показать файл

    def train(self, checkpoint=None):

        num_ex = self.config_params['num_valid_to_show']

        rlut1 = revlut(self.feat2src[self.primary_key])
        rlut2 = revlut(self.feat2tgt)
        if num_ex > 0:
            logger.info('Showing examples')
            preproc = self.config_params.get('preproc', {})
            show_ex_fn = preproc['show_ex']
            self.config_params['train']['after_train_fn'] = lambda model: show_ex_fn(model,
                                                                                     self.valid_data, rlut1, rlut2,
                                                                                     self.feat2tgt,
                                                                                     preproc['mxlen'], False, 0,
                                                                                     num_ex, reverse=False)
        self.config_params['train']['tgt_rlut'] = rlut2
        return super(EncoderDecoderTask, self).train(checkpoint)

Пример #10

0

Показать файл

Файл: train.py Проект: shubhampachori12110095/baseline

 def __init__(self, model, **kwargs):
     super(TaggerTrainerPyTorch, self).__init__()
     self.gpu = not bool(kwargs.get('nogpu', False))
     self.model = model
     self.idx2label = revlut(self.model.labels)
     self.clip = float(kwargs.get('clip', 5))
     self.optimizer, self.scheduler = pytorch_prepare_optimizer(
         self.model, **kwargs)
     if self.gpu:
         self.model = model.to_gpu()

Пример #11

0

Показать файл

Файл: train.py Проект: switchfootsid/baseline

    def __init__(self, model, **kwargs):

        super(TaggerTrainerDyNet, self).__init__()

        self.span_type = kwargs.get('span_type', 'iob')
        self.gpu = not bool(kwargs.get('nogpu', False))
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.autobatchsz = kwargs.get('autobatchsz')
        self.labels = model.labels
        self.optimizer = optimizer(model, **kwargs)

Пример #12

0

Показать файл

 def __init__(self, model, **kwargs):
     super(TaggerTrainerPyTorch, self).__init__()
     self.gpu = not bool(kwargs.get('nogpu', False))
     # By default support IOB1/IOB2
     self.span_type = kwargs.get('span_type', 'iob')
     print('Setting span type {}'.format(self.span_type))
     self.model = model
     self.idx2label = revlut(self.model.labels)
     self.clip = float(kwargs.get('clip', 5))
     self.optimizer, self.scheduler = pytorch_prepare_optimizer(self.model, **kwargs)
     if self.gpu:
         self.model = model.to_gpu()

Пример #13

0

Показать файл

Файл: train.py Проект: dpressel/baseline

    def __init__(self, model, **kwargs):

        super(TaggerTrainerDyNet, self).__init__()

        self.span_type = kwargs.get('span_type', 'iob')
        self.gpu = not bool(kwargs.get('nogpu', False))
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.autobatchsz = kwargs.get('autobatchsz')
        self.labels = model.labels
        self.optimizer = OptimizerManager(model, **kwargs)
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

Пример #14

0

Показать файл

Файл: train.py Проект: bjayakumar/mead-baseline

    def __init__(self, model, **kwargs):

        super(TaggerTrainerDyNet, self).__init__()

        self.span_type = kwargs.get('span_type', 'iob')
        logger.info('Setting span type %s', self.span_type)
        self.gpu = not bool(kwargs.get('nogpu', False))
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.autobatchsz = kwargs.get('autobatchsz')
        self.labels = model.labels
        self.optimizer = OptimizerManager(model, **kwargs)
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
        self.verbose = kwargs.get('verbose', False)

Пример #15

0

Показать файл

Файл: train.py Проект: byfaith/baseline

    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpu = not bool(kwargs.get('nogpu', False))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpu:
            self.model = model.to_gpu()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

Пример #16

0

Показать файл

Файл: train.py Проект: dpressel/baseline

    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpu = not bool(kwargs.get('nogpu', False))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        if self.verbose:
            logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpu:
            self.model = model.to_gpu()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

Пример #17

0

Показать файл

    def predict_text(self,
                     tokens,
                     mxlen,
                     maxw,
                     zero_alloc=np.zeros,
                     word_trans_fn=lowercase):
        """
        Utility function to convert lists of sentence tokens to integer value one-hots which
        are then passed to the tagger.  The resultant output is then converted back to label and token
        to be printed.

        This method is not aware of any input features other than words and characters (and lengths).  If you
        wish to use other features and have a custom model that is aware of those, use `predict` directly.

        :param tokens: 
        :param mxlen: 
        :param maxw: 
        :param zero_alloc: Define
        :param word_trans_fn:
        :return: 
        """
        words_vocab = self.get_vocab(vocab_type='word')
        chars_vocab = self.get_vocab(vocab_type='char')
        # This might be inefficient if the label space is large
        label_vocab = revlut(self.get_labels())
        xs = zero_alloc((1, mxlen), dtype=int)
        xs_ch = zero_alloc((1, mxlen, maxw), dtype=int)
        lengths = zero_alloc(1, dtype=int)
        lengths[0] = min(len(tokens), mxlen)
        for j in range(mxlen):

            if j == len(tokens):
                break

            w = tokens[j]
            nch = min(len(w), maxw)

            xs[0, j] = words_vocab.get(word_trans_fn(w), 0)
            for k in range(nch):
                xs_ch[0, j, k] = chars_vocab.get(w[k], 0)

        indices = self.predict({'x': xs, 'xch': xs_ch, 'lengths': lengths})[0]
        output = []
        for j in range(lengths[0]):
            output.append((tokens[j], label_vocab[indices[j]]))
        return output

Пример #18

0

Показать файл

    def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'):
        super(EncoderDecoderService, self).__init__(None, None, model, preproc)
        self.src_vocabs = {}
        self.tgt_vocab = None
        for k, vocab in vocabs.items():
            if k == 'tgt':
                self.tgt_vocab = vocab
            else:
                self.src_vocabs[k] = vocab

        self.tgt_idx_to_token = revlut(self.tgt_vocab)
        self.src_vectorizers = {}
        self.tgt_vectorizer = None
        for k, vectorizer, in vectorizers.items():
            if k == 'tgt':
                self.tgt_vectorizer = vectorizer
            else:
                self.src_vectorizers[k] = vectorizer

Пример #19

0

Показать файл

Файл: services.py Проект: dpressel/baseline

    def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'):
        super(EncoderDecoderService, self).__init__(None, None, model, preproc)
        self.src_vocabs = {}
        self.tgt_vocab = None
        for k, vocab in vocabs.items():
            if k == 'tgt':
                self.tgt_vocab = vocab
            else:
                self.src_vocabs[k] = vocab

        self.tgt_idx_to_token = revlut(self.tgt_vocab)
        self.src_vectorizers = {}
        self.tgt_vectorizer = None
        for k, vectorizer, in vectorizers.items():
            if k == 'tgt':
                self.tgt_vectorizer = vectorizer
            else:
                self.src_vectorizers[k] = vectorizer

Пример #20

0

Показать файл

    def predict_text(self,
                     tokens,
                     mxlen,
                     maxw,
                     zero_alloc=np.zeros,
                     word_trans_fn=lowercase):
        """
        Utility function to convert lists of sentence tokens to integer value one-hots which
        are then passed to the tagger.  The resultant output is then converted back to label and token
        to be printed
        :param tokens: 
        :param mxlen: 
        :param maxw: 
        :param zero_alloc: Define
        :param word_trans_fn:
        :return: 
        """
        words_vocab = self.get_vocab(vocab_type='word')
        chars_vocab = self.get_vocab(vocab_type='char')
        # This might be inefficient if the label space is large
        label_vocab = revlut(self.get_labels())
        xs = zero_alloc((1, mxlen), dtype=int)
        xs_ch = zero_alloc((1, mxlen, maxw), dtype=int)
        lengths = zero_alloc(1, dtype=int)
        lengths[0] = min(len(tokens), mxlen)
        for j in range(mxlen):

            if j == len(tokens):
                break

            w = tokens[j]
            nch = min(len(w), maxw)

            xs[0, j] = words_vocab.get(word_trans_fn(w), 0)
            for k in range(nch):
                xs_ch[0, j, k] = chars_vocab.get(w[k], 0)

        indices = self.predict(xs, xs_ch, lengths)[0]
        output = []
        for j in range(lengths[0]):
            output.append((tokens[j], label_vocab[indices[j]]))
        return output

Пример #21

0

Показать файл

    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpu = not bool(kwargs.get('nogpu', False))
        optim = kwargs.get('optim', 'adam')
        eta = float(kwargs.get('eta', 0.01))
        mom = float(kwargs.get('mom', 0.9))
        self.clip = float(kwargs.get('clip', 5))
        self.model = model
        self.idx2label = revlut(self.model.labels)
        if optim == 'adadelta':
            self.optimizer = torch.optim.Adadelta(model.parameters(), lr=eta)
        elif optim == 'adam':
            self.optimizer = torch.optim.Adam(model.parameters(), lr=eta)
        elif optim == 'rmsprop':
            self.optimizer = torch.optim.RMSprop(model.parameters(), lr=eta)
        else:
            self.optimizer = torch.optim.SGD(model.parameters(), lr=eta, momentum=mom)

        self.crit = model.get_criterion()
        if self.gpu:
            self.model = model.cuda()
            self.crit.cuda()

Пример #22

0

Показать файл

    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpus = int(kwargs.get('gpus', 1))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpus > 1:
            logger.info(
                "Trainer for PyTorch tagger currently doesnt support multiple GPUs.  Setting to 1"
            )
            self.gpus = 1
        if self.gpus > 0:
            self.model = model.to_gpu()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")

        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

Пример #23

0

Показать файл

    def predict_text(self, tokens, **kwargs):
        """
        Utility function to convert lists of sentence tokens to integer value one-hots which
        are then passed to the tagger.  The resultant output is then converted back to label and token
        to be printed.

        This method is not aware of any input features other than words and characters (and lengths).  If you
        wish to use other features and have a custom model that is aware of those, use `predict` directly.

        :param tokens: (``list``) A list of tokens

        """

        featurizer = kwargs.get('featurizer')
        if featurizer is None:
            mxlen = kwargs.get(
                'mxlen', self.mxlen if hasattr(self, 'mxlen') else len(tokens))
            maxw = kwargs.get(
                'maxw', self.maxw if hasattr(self, 'maxw') else max(
                    [len(token) for token in tokens]))
            zero_alloc = kwargs.get('zero_alloc', np.zeros)
            featurizer = WordCharLength(self, mxlen, maxw, zero_alloc)

        # This might be inefficient if the label space is large

        label_vocab = revlut(self.get_labels())
        #lengths = zero_alloc(1, dtype=int)
        #lengths[0] = min(len(tokens), mxlen)

        data = featurizer.run(tokens)
        lengths = data['lengths']
        indices = self.predict(data)[0]
        output = []
        for j in range(lengths[0]):
            output.append((tokens[j], label_vocab[indices[j].item()]))
        return output

Пример #24

0

Показать файл

Файл: services.py Проект: wenshuoliu/baseline

 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.idx_to_token = revlut(self.vocabs[self.model.tgt_key])

Пример #25

0

Показать файл

Файл: services.py Проект: dpressel/baseline

 def __init__(self, *args, **kwargs):
     super(LanguageModelService, self).__init__(*args, **kwargs)
     self.idx_to_token = revlut(self.vocabs[self.model.tgt_key])

Пример #26

0

Показать файл

 def __init__(self, model):
     self.model = model
     self.idx2label = revlut(model.labels)

Пример #27

0

Показать файл

 def __init__(self, *args, **kwargs):
     super(LanguageModelService, self).__init__(*args, **kwargs)
     self.idx_to_token = revlut(self.vocabs[self.model.tgt_key])

Пример #28

0

Показать файл

Файл: services.py Проект: DevSinghSachan/baseline

 def __init__(self, vocabs=None, vectorizers=None, model=None):
     super(TaggerService, self).__init__(vocabs, vectorizers, model)
     self.label_vocab = revlut(self.get_labels())

Python revlut примеры использования