def _validate_positive_integer(val, argname):
    try:
        return int(val)
    except ValueError:
        raise exception.Error("%s should be an integer" % argname)
    if val < 0:
        raise exception.Error("%s should be a positive integer" % argname)
Exemplo n.º 2
0
def read_all_lines(config, sentences):
    source_to_num, _, _, _ = load_dictionaries(config)
    lines = []
    for sent in sentences:
        line = []
        for w in sent.strip().split():
            if config.factors == 1:
                w = [source_to_num[0][w] if w in source_to_num[0] else 1]
            else:
                w = [
                    source_to_num[i][f] if f in source_to_num[i] else 1
                    for (i, f) in enumerate(w.split('|'))
                ]
                if len(w) != config.factors:
                    raise exception.Error(
                        'Expected {0} factors, but input word has {1}\n'.
                        format(config.factors, len(w)))
            line.append(w)
        lines.append(line)
    lines = numpy.array(lines)
    lengths = numpy.array(map(lambda l: len(l), lines))
    lengths = numpy.array(lengths)
    idxs = lengths.argsort()
    lines = lines[idxs]

    #merge into batches
    batches = []
    for i in range(0, len(lines), config.valid_batch_size):
        batch = lines[i:i + config.valid_batch_size]
        batches.append(batch)

    return batches, idxs
Exemplo n.º 3
0
 def api_get_long(self, req, target, last_c='', interval=1):
     req['format'] = 'json'
     self.status = ''
     last_cont = {'continue': last_c}
     while True:
         c_req = req.copy()
         c_req.update(last_cont)
         try:
             result = self.s.get(lang_api, params=c_req,
                                 headers=headers).json()
         except:
             print('api_get_long: Try again after %d sec...' % interval)
             print(req, target, last_c)
             time.sleep(interval)
             for t in self.api_get_long(req, target, last_cont['continue'],
                                        interval * 2):
                 yield t
             break
         if 'error' in result:
             print(req, target, last_c)
             raise exception.Error(result['error'])
         if 'warnings' in result:
             print('api_get_long: Warning: %s' % result['warnings'])
             print(req, target, last_c)
         if target in result:
             yield result[target]
         if 'continue' not in result:
             break
         last_cont = result['continue']
Exemplo n.º 4
0
    def delete_stack_user(self, user_id):

        user = self.client.users.get(user_id)

        # FIXME (shardy) : need to test, do we still need this retry logic?
        # Copied from user.py, but seems like something we really shouldn't
        # need to do, no bug reference in the original comment (below)...
        # tempory hack to work around an openstack bug.
        # seems you can't delete a user first time - you have to try
        # a couple of times - go figure!
        tmo = eventlet.Timeout(10)
        status = 'WAITING'
        reason = 'Timed out trying to delete user'
        try:
            while status == 'WAITING':
                try:
                    user.delete()
                    status = 'DELETED'
                except Exception as ce:
                    reason = str(ce)
                    logger.warning("Problem deleting user %s: %s" %
                                   (user_id, reason))
                    eventlet.sleep(1)
        except eventlet.Timeout as t:
            if t is not tmo:
                # not my timeout
                raise
            else:
                status = 'TIMEDOUT'
        finally:
            tmo.cancel()

        if status != 'DELETED':
            raise exception.Error(reason)
Exemplo n.º 5
0
def validate_pid(pid):
    '''Check if we've received a valid PID'''
    p = int(pid)

    if p >= PID_MIN and p <= PID_MAX:
        return True
    else:
        raise exception.Error('PID out of range %d-%d: %d' %
                              (PID_MIN, PID_MAX, p))
Exemplo n.º 6
0
    def __init__(self, models, configs, beam_size, beta):
        """Sets some things up then calls _beam_search() to do the real work.

        Args:
            models: a sequence of RNN or Transformer objects.
            configs: a sequence of model configs (argparse.Namespace objects).
            beam_size: an integer specifying the beam width.
            beta: a float between 0.0 and 1.0 specifying the value of bias
        """
        self._models = models
        self._configs = configs
        self._beam_size = beam_size
        self.beta = beta

        with tf.name_scope('beam_search'):

            # Define placeholders.
            self.inputs = sampler_inputs.SamplerInputs()

            # Create model adapters to get a consistent interface to
            # Transformer and RNN models.
            model_adapters = []
            for i, (model, config) in enumerate(zip(models, configs)):
                with tf.name_scope('model_adapter_{}'.format(i)) as scope:
                    if config.model_type == 'transformer':
                        adapter = transformer_inference.ModelAdapter(
                            model, config, scope)
                    else:
                        assert config.model_type == 'rnn'
                        adapter = rnn_inference.ModelAdapter(
                            model, config, scope)
                    model_adapters.append(adapter)

            # Check that individual models are compatible with each other.
            vocab_sizes = [a.target_vocab_size for a in model_adapters]
            if len(set(vocab_sizes)) > 1:
                raise exception.Error('Cannot ensemble models with different '
                                      'target vocabulary sizes')
            target_vocab_size = vocab_sizes[0]

            # Build the graph to do the actual work.
            sequences, scores = _beam_search(
                model_adapters=model_adapters,
                beam_size=beam_size,
                batch_size_x=self.inputs.batch_size_x,
                max_translation_len=self.inputs.max_translation_len,
                normalization_alpha=self.inputs.normalization_alpha,
                vocab_size=target_vocab_size,
                eos_id=0,
                last_translation=self.inputs.last_translation,
                last_translation_len=self.inputs.last_translation_len,
                beta=self.beta)

            # print(sequences, scores)

            self._outputs = sequences, scores
Exemplo n.º 7
0
    def client_login(self, pwd=None):
        data = {'logintoken': self.query_tokens('login')['logintoken']}
        self.tokens['login'] = data['logintoken']
        data['username'] = bot_name
        data['password'] = pwd if pwd is not None else self.pwd
        data['action'] = 'clientlogin'
        data['loginreturnurl'] = 'https://zh.wikipedia.org/'
        self.pwd = pwd

        result = self.api_post(data)['clientlogin']

        if 'error' in result:
            raise exception.Error(result['error'])
        if 'warnings' in result:
            print('Warning: %s' % result['warnings'])
        if result.get('status') == 'PASS':
            self.tokens['csrf'] = self.query_tokens('csrf').get('csrftoken')
            return None
        else:
            raise exception.Error(result.get('message', 'Login Failed'))
Exemplo n.º 8
0
 def api_get(self, req, target, interval=1):
     req['format'] = 'json'
     self.status = ''
     try:
         result = self.s.get(lang_api, params=req, headers=headers).json()
     except:  # json.decoder.JSONDecodeError:
         print('api_get: Try again after %d sec...' % interval)
         print(req, target)
         time.sleep(interval)
         return self.api_get(req, target, interval * 2)
     # print(json.dumps(result, indent=4, sort_keys=True))
     if 'error' in result:
         print(req, target)
         raise exception.Error(result['error'])
     if 'warnings' in result:
         print('api_get: Warning: %s' % result['warnings'])
         print(req, target)
     return result.get(target)
Exemplo n.º 9
0
def read_all_lines(config, sentences, batch_size):
    source_to_num, _, _, _ = load_dictionaries(config)

    if config.source_vocab_sizes != None:
        assert len(config.source_vocab_sizes) == len(source_to_num)
        for d, vocab_size in zip(source_to_num, config.source_vocab_sizes):
            if vocab_size != None and vocab_size > 0:
                for key, idx in d.items():
                    if idx >= vocab_size:
                        del d[key]

    lines = []
    for sent in sentences:
        line = []
        for w in sent.strip().split():
            if config.factors == 1:
                w = [source_to_num[0][w] if w in source_to_num[0] else 1]
            else:
                w = [
                    source_to_num[i][f] if f in source_to_num[i] else 1
                    for (i, f) in enumerate(w.split('|'))
                ]
                if len(w) != config.factors:
                    raise exception.Error(
                        'Expected {0} factors, but input word has {1}\n'.
                        format(config.factors, len(w)))
            line.append(w)
        lines.append(line)
    lines = numpy.array(lines)
    lengths = numpy.array(map(lambda l: len(l), lines))
    lengths = numpy.array(lengths)
    idxs = lengths.argsort()
    lines = lines[idxs]

    #merge into batches
    batches = []
    for i in range(0, len(lines), batch_size):
        batch = lines[i:i + batch_size]
        batches.append(batch)

    return batches, idxs
Exemplo n.º 10
0
 def pid(self):
     if self._pid is not None and validate_pid(self._pid):
         return int(self._pid)
     else:
         raise exception.Error('Invalid PID: %d' % self._pid)