Exemplos de is_empty em Python, exemplos de pydash.is_empty em Python

Exemplo n.º 1

0

Exibir arquivo

def main():
    """Example invocation:
    python solution.py
        --lista='[(900, 1100), (1300, 1500)]' \
        --listb='[(900, 915), (1000, 1015), (1230, 1600)]'
    """
    parser = optparse.OptionParser()
    parser.add_option('-a',
                      '--lista',
                      action='store',
                      dest='list_a',
                      help='First list',
                      default=None)
    parser.add_option('-b',
                      '--listb',
                      action='store',
                      dest='list_b',
                      help='Second List',
                      default=None)
    options, _ = parser.parse_args()

    if py.is_empty(options.list_a) or py.is_empty(options.list_b):
        return

    result = find_diff(ast.literal_eval(options.list_a),
                       ast.literal_eval(options.list_b))

    print('Result of listA - listB: ')
    print(result)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: mention_context_batch_sampler_test.py Projeto: dmh43/entity-linking

def test_mention_context_batch_sampler():
    mentions_by_page = {
        0: [40, 50, 60, 70, 80, 90],
        1: [100],
        2: [0, 10, 20, 30]
    }
    cursor = get_mock_cursor(mentions_by_page)
    batch_size = 5
    page_id_order = [2, 0, 1]
    mentions_in_page_order = _.mapcat(
        page_id_order, lambda page_id: mentions_by_page[page_id])
    batch_sampler = MentionContextBatchSampler(cursor, page_id_order,
                                               batch_size)
    batches_seen = []
    indexes_seen = []
    for batch_num, batch_indexes in enumerate(batch_sampler):
        assert _.is_empty(_.intersection(batch_indexes, indexes_seen))
        indexes_seen.extend(batch_indexes)
        if batch_num == 0:
            assert len(set(batch_indexes) - {0, 10, 20, 30}) == 1
            assert any([
                mention in set(batch_indexes) - {0, 10, 20, 30}
                for mention in mentions_by_page[0]
            ])
            assert len(batch_indexes) == batch_size
        elif batch_num == 1:
            assert _.is_empty(set(batch_indexes) - set(mentions_by_page[0]))
            assert len(batch_indexes) == batch_size
        elif batch_num == 2:
            assert batch_indexes == [100]
            assert len(batch_indexes) == 1
        batches_seen.append(batch_num)
    assert _.is_empty(_.difference(mentions_in_page_order, indexes_seen))
    assert batches_seen == [0, 1, 2]

Exemplo n.º 3

0

Exibir arquivo

def _get_existing_device(e: InnerRequestError) -> dict:
    """
    Gets the device encoded in the InnerRequestError after ensuring integrity in the errors.

    Only accepted errors are about already existing unique id fields (like _id or hid), as they are the only ones
    that hint an already existing device.

    @raise MismatchBetweenUid: When the unique ids point at different devices, this is usually a mispelling error
    by the user, as some uids can be entered manually.
    @raise DeviceNotFound: There is not an uid error or it is not well formatted
    """
    devices = []
    for field in DeviceDomain.uid_fields | {
            'model'
    }:  # Model is used when matching in components with parents
        for error in e.body['_issues'].get(field, []):
            with suppress(ValueError, KeyError):
                device = json_util.loads(error)['NotUnique']
                if not is_empty(
                        devices) and device['_id'] != devices[-1][1]['_id']:
                    raise MismatchBetweenUid(field, device['_id'],
                                             devices[-1][1]['_id'],
                                             devices[-1][0])
                devices.append((field, device))
    if is_empty(devices):
        raise DeviceNotFound()
    return devices[-1][1]

Exemplo n.º 4

0

Exibir arquivo

Arquivo: mention_context_batch_sampler_test.py Projeto: dmh43/entity-linking

def test_mention_context_batch_sampler_many_last_ids():
    mentions_by_page = {
        0: [120],
        1: [130],
        2: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110]
    }
    cursor = get_mock_cursor(mentions_by_page)
    batch_size = 5
    page_id_order = [2, 0, 1]
    mentions_in_page_order = _.mapcat(
        page_id_order, lambda page_id: mentions_by_page[page_id])
    batch_sampler = MentionContextBatchSampler(cursor, page_id_order,
                                               batch_size)
    batches_seen = []
    indexes_seen = []
    for batch_num, batch_indexes in enumerate(batch_sampler):
        assert _.is_empty(_.intersection(batch_indexes, indexes_seen))
        indexes_seen.extend(batch_indexes)
        if batch_num == 0:
            assert _.is_empty(_.difference(batch_indexes, mentions_by_page[2]))
            assert len(batch_indexes) == batch_size
        elif batch_num == 1:
            assert _.is_empty(_.difference(batch_indexes, mentions_by_page[2]))
            assert len(batch_indexes) == batch_size
        elif batch_num == 2:
            assert len(_.intersection(batch_indexes, mentions_by_page[2])) == 2
            assert len(_.intersection(batch_indexes, mentions_by_page[0])) == 1
            assert len(_.intersection(batch_indexes, mentions_by_page[1])) == 1
            assert len(batch_indexes) == 4
        batches_seen.append(batch_num)
    assert _.is_empty(_.difference(mentions_in_page_order, indexes_seen))
    assert batches_seen == [0, 1, 2]

Exemplo n.º 5

0

Exibir arquivo

async def handle_request(request: web.Request, service: object, endpoint_cacher: object):
    req_ctx = {
        'method': request.method,
        'url': service['targets'][service['cur_target_index']],
        'params': dict(request.rel_url.query),
        'data': await request.text(),
        'cookies': dict(request.cookies),
        'headers': pydash.omit(dict(request.headers), 'Host'),
    }
    req = None
    req_cache = None
    req_ctx_hash = None

    if not pydash.is_empty(endpoint_cacher):
        req_ctx_hash = Hasher.hash_sha_256(json.dumps(req_ctx))
        req_cache = await EndpointCacher.get_cache(req_ctx_hash, DB.get_redis(request))

    if pydash.is_empty(req_cache):
        req = await Api.call(**req_ctx)
        if pydash.is_empty(req_ctx_hash):
            req_ctx_hash = Hasher.hash_sha_256(json.dumps(req_ctx))
        not pydash.is_empty(endpoint_cacher) and queue_async_func.s({
            'func': 'EndpointCacher.set_cache',
            'args': [req_ctx_hash, req, int(endpoint_cacher['timeout']), 'redis'],
            'kwargs': {}
        }).apply_async()
    else:
        req = json.loads(req_cache)

    cache_hit = True if not pydash.is_empty(req_cache) else False
    return req, cache_hit

Exemplo n.º 6

0

Exibir arquivo

    def __init__(self, host, port, db, mode=MODE_FAST):
        if _.is_empty(host) or _.is_empty(port) or _.is_empty(db):
            raise RuntimeError(
                'Redis host or port or db missing. Please provide all three.')

        self._redis = redis.StrictRedis(host=host, port=port, db=db)
        self._mode = mode

Exemplo n.º 7

0

Exibir arquivo

async def handle_rate_limiter(request: web.Request, service_id: str, rule: object):
    if not pydash.is_empty(rule):
        entries = await RateLimiter.get_entry_by_rule_id(rule['_id'], DB.get_redis(request))
        if not pydash.is_empty(entries):
            entry = entries[0]
            if int(entry['count']) >= int(rule['max_requests']):
                raise Exception({
                    'message': rule['message'] or 'Too Many Requests',
                    'status_code': int(rule['status_code']) or 429
                })
            queue_async_func.s({
                'func': 'RateLimiter.increment_entry_count',
                'args': [entry['_id'], 'redis'],
                'kwargs': {}
            }).apply_async()
        else:
            entry = {
                'rule_id': rule['_id'],
                'host': request.remote,
                'count': 1,
                'timeout': int(rule['timeout'])
            }
            queue_async_func.s({
                'func': 'RateLimiter.create_entry',
                'args': [entry, 'redis'],
                'kwargs': {}
            }).apply_async()

Exemplo n.º 8

0

Exibir arquivo

async def proxy(request: web.Request, handler: web.RequestHandler):
    try:
        req_start_time = time()
        if pydash.starts_with(request.path_qs, '/raven'):
            return await handler(request)

        service = Regex.best_match(await Regex.get_matched_paths(request.path, DB.get(request, service_controller.table)))
        await handle_service(service, request.remote)

        rate_limiter_rules = await RateLimiter.get_rule_by_service_id(str(service['_id']), DB.get_redis(request))
        rate_limiter_rule = rate_limiter_rules[0] if rate_limiter_rules else None
        await handle_rate_limiter(request, str(service['_id']), rate_limiter_rule)

        breakers = await CircuitBreaker.get_by_service_id(str(service['_id']), DB.get(request, circuit_breaker_controller.table))
        breaker = breakers[0] if breakers else None

        request_validators = await RequestValidator.get_by_service_id(str(service['_id']), DB.get(request, request_validator_controller.table))
        request_validator = request_validators[0] if request_validators else None

        endpoint_cachers = not pydash.is_empty(service) and await EndpointCacher.get_by_service_id(str(service['_id']), DB.get_redis(request)) or None
        endpoint_cacher = endpoint_cachers[0] if endpoint_cachers else None

        await handle_request_validator(request_validator, json.loads(await request.text()), request.method)
        req, req_cache_hit = await handle_request(request, service, endpoint_cacher)

        checks = []

        if not pydash.is_empty(
                breaker) and breaker['status'] == CircuitBreakerStatus.ON.name:
            if req['status'] in breaker['status_codes']:
                checks.append(handle_circuit_breaker(
                    breaker, service, request, req))
            else:
                await CircuitBreaker.incr_count(str(breaker['_id']), DB.get_redis(request))

        queue_async_func.s({
            'func': 'Service.advance_target',
            'args': [str(service['_id']), f'mongo:{service_controller.table}'],
            'kwargs': {}
        }).apply_async()
        req_finish_time = time()
        req_elapsed_time = req_finish_time - req_start_time
        checks.append(handle_insights(request, req, str(
            service['_id']), req_elapsed_time, req_cache_hit))
        await Async.all(checks)

        return web.Response(
            body=Bytes.decode_bytes(
                req['body_bytes']),
            status=req['status'],
            content_type=req['content_type'],
            headers=CIMultiDict(
                pydash.omit(
                    req['headers'],
                    'Content-Type',
                    'Transfer-Encoding',
                    'Content-Encoding')))
    except Exception as err:
        return Error.handle(err)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: solution.py Projeto: nishant009/Uken

    def __init__(self, host, port, db):
        if _.is_empty(host) or _.is_empty(port) or _.is_empty(db):
            raise RuntimeError(
                'Redis host or port or db missing. Please provide all three.')

        self._redis = redis.StrictRedis(host=host, port=port, db=db)
        self._pipe = self._redis.pipeline()
        self._count = 0

Exemplo n.º 10

0

Exibir arquivo

Arquivo: data_cleaners.py Projeto: dmh43/wp-preprocessing-el

def _cleaned_link_is_valid(sentence_text, cleaned_link):
  '''Checks corner cases where the fields are not present due to
typos in the markup or complex use of templates'''
  link_text_is_in_page = 'text' in cleaned_link and sentence_text.find(cleaned_link['text']) != -1
  link_page_is_in_page = 'page' in cleaned_link and sentence_text.find(cleaned_link['page']) != -1
  link_mention_is_in_page = link_text_is_in_page or (link_page_is_in_page and 'text' not in cleaned_link)
  link_mention_is_blank = _.is_empty(cleaned_link['text'].strip()) if 'text' in cleaned_link else False
  link_page_is_blank = _.is_empty(cleaned_link['page'].strip()) if 'page' in cleaned_link else False
  return not link_page_is_blank and not link_mention_is_blank and link_mention_is_in_page

Exemplo n.º 11

0

Exibir arquivo

    async def clear_empty_entries(db: AioRedis):
        empty_entries = []
        entries_keys = await DB.fetch_members(entry_set, db)
        for key in entries_keys:
            entry = await db.hgetall(key, encoding='utf-8')
            pydash.is_empty(entry) and empty_entries.append(key)

        coroutines = []
        for empty_entry in empty_entries:
            coroutines.append(db.srem(entry_set, empty_entry))
            coroutines.append(RateLimiter._clear_indexes(empty_entry, db))

        await Async.all(coroutines)

Exemplo n.º 12

0

Exibir arquivo

def _apply_match_heuristic(page, link_contexts, to_match, entity):
    '''helper for defining heuristics for finding mentions of an entity'''
    matches = u.match_all(to_match, page['plaintext'])
    mentions = sum(link_contexts.values(), [])
    link_context = {
        entity: [{
            'text': to_match,
            'offset': match_index,
            'page_title': page['title'],
            'preredirect': _.upper_first(entity)
        } for match_index in matches]
    }
    filtered_link_context = {
        entity: [
            mention for mention in link_context[entity]
            if not _mention_overlaps(mentions, mention)
        ]
    }
    concat = lambda dest, src: _.uniq_by(dest + src, 'offset') if dest else src
    if not _.is_empty(filtered_link_context[entity]):
        return _.merge_with(link_contexts,
                            filtered_link_context,
                            iteratee=concat)
    else:
        return link_contexts

Exemplo n.º 13

0

Exibir arquivo

def _sentence_to_link_contexts_reducer(redirects_lookup, page, contexts_acc,
                                       sentence):
    contexts = _sentence_to_link_contexts(redirects_lookup, page, sentence)
    if not _.is_empty(contexts):
        concat = lambda dest, src: dest + src if dest else src
        _.merge_with(contexts_acc, contexts, iteratee=concat)
    return contexts_acc

Exemplo n.º 14

0

Exibir arquivo

Arquivo: analysis.py Projeto: wilson1yan/SLM-Lab

def retro_eval(predir, session_index=None):
    '''
    Method to run eval sessions by scanning a predir for ckpt files. Used to rerun failed eval sessions.
    @example

    yarn retro_eval data/reinforce_cartpole_2018_01_22_211751
    '''
    logger.info(f'Retro-evaluate sessions from predir {predir}')
    # collect all unique prepaths first
    prepaths = []
    s_filter = '' if session_index is None else f'_s{session_index}_'
    for filename in os.listdir(predir):
        if filename.endswith('model.pth') and s_filter in filename:
            res = re.search('.+epi(\d+)-totalt(\d+)', filename)
            if res is not None:
                prepath = f'{predir}/{res[0]}'
                if prepath not in prepaths:
                    prepaths.append(prepath)
    if ps.is_empty(prepaths):
        return

    logger.info(f'Starting retro eval')
    np.random.shuffle(
        prepaths)  # so that CUDA_ID by trial/session index is spread out
    rand_spec = util.prepath_to_spec(
        prepaths[0])  # get any prepath, read its max session
    max_session = rand_spec['meta']['max_session']
    util.parallelize_fn(run_wait_eval, prepaths, num_cpus=max_session)

Exemplo n.º 15

0

Exibir arquivo

def flatten_dict(obj, delim='.'):
    '''Missing pydash method to flatten dict'''
    nobj = {}
    for key, val in obj.items():
        if ps.is_dict(val) and not ps.is_empty(val):
            strip = flatten_dict(val, delim)
            for k, v in strip.items():
                nobj[key + delim + k] = v
        elif ps.is_list(val) and not ps.is_empty(val) and ps.is_dict(val[0]):
            for idx, v in enumerate(val):
                nobj[key + delim + str(idx)] = v
                if ps.is_object(v):
                    nobj = flatten_dict(nobj, delim)
        else:
            nobj[key] = val
    return nobj

Exemplo n.º 16

0

Exibir arquivo

    def __init__(self, net_spec, in_dim, out_dim):
        assert len(in_dim) == 3  # image shape (c,w,h)
        nn.Module.__init__(self)
        Net.__init__(self, net_spec, in_dim, out_dim)
        # set default
        util.set_attr(self, dict(
            init_fn=None,
            normalize=False,
            batch_norm=False,
            clip_grad_val=None,
            loss_spec={'name': 'MSELoss'},
            optim_spec={'name': 'Adam'},
            lr_scheduler_spec=None,
            update_type='replace',
            update_frequency=1,
            polyak_coef=0.0,
            gpu=False,
        ))
        util.set_attr(self, self.net_spec, [
            'conv_hid_layers',
            'fc_hid_layers',
            'hid_layers_activation',
            'init_fn',
            'normalize',
            'batch_norm',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # Guard against inappropriate algorithms and environments
        assert isinstance(out_dim, int)

        # conv body
        self.conv_model = self.build_conv_layers(self.conv_hid_layers)
        self.conv_out_dim = self.get_conv_output_size()

        # fc body
        if ps.is_empty(self.fc_hid_layers):
            tail_in_dim = self.conv_out_dim
        else:
            # fc layer from flattened conv
            self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers,
                                                    self.hid_layers_activation)
            tail_in_dim = self.fc_hid_layers[-1]

        # tails. avoid list for single-tail for compute speed
        self.v = nn.Linear(tail_in_dim, 1)  # state value
        self.adv = nn.Linear(tail_in_dim, out_dim)  # action dependent raw advantage
        self.model_tails = nn.ModuleList(self.v, self.adv)

        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
        self.train()

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_fetchers.py Projeto: dmh43/lm_ltr

def test_parse_xml_docs_trouble_fbis():
    doc_path = './tests/fixtures/trouble_fbis'
    doc_lookup = f.parse_xml_docs(doc_path)
    assert len(doc_lookup) == 2
    assert _.is_empty(
        set(doc_lookup.keys()) - set(['FBIS3-10491', 'FBIS3-10081']))
    assert all([len(text) > 100 for text in doc_lookup.values()])

Exemplo n.º 18

0

Exibir arquivo

 def _get_next_batch(self):
   ids = []
   if len(self.ids_from_last_page) > self.batch_size:
     ids = random.sample(list(self.ids_from_last_page), self.batch_size)
     self.ids_from_last_page = self.ids_from_last_page - set(ids)
     shuffle(ids)
     return ids
   else:
     if not _.is_empty(self.ids_from_last_page):
       ids = list(self.ids_from_last_page)
       self.ids_from_last_page = set()
       if self.page_ctr > len(self.page_id_order):
         return ids
     for page_id in self.page_id_order[self.page_ctr:]:
       self.page_ctr += 1
       page_mention_ids = self._get_page_mention_ids(page_id, self.page_ctr)
       ids.extend(page_mention_ids)
       if len(ids) >= self.batch_size:
         self.ids_from_last_page = set(ids[self.batch_size:])
         ids = ids[:self.batch_size]
         shuffle(ids)
         return ids
       else:
         self.ids_from_last_page = set()
     ids = ids[:]
     shuffle(ids)
     return ids

Exemplo n.º 19

0

Exibir arquivo

    def calc_df_row(self, env):
        '''Calculate a row for updating train_df or eval_df.'''
        frame = self.env.clock.get('frame')
        wall_t = env.clock.get_elapsed_wall_t()
        fps = 0 if wall_t == 0 else frame / wall_t

        # update debugging variables
        if net_util.to_check_train_step():
            grad_norms = net_util.get_grad_norms(self.agent.algorithm)
            self.mean_grad_norm = np.nan if ps.is_empty(grad_norms) else np.mean(grad_norms)

        row = pd.Series({
            # epi and frame are always measured from training env
            'epi': self.env.clock.get('epi'),
            # t and reward are measured from a given env or eval_env
            't': env.clock.get('t'),
            'wall_t': wall_t,
            'opt_step': self.env.clock.get('opt_step'),
            'frame': frame,
            'fps': fps,
            'total_reward': np.nanmean(self.total_reward),  # guard for vec env
            'avg_return': np.nan,  # update outside
            'avg_len': np.nan,  # update outside
            'avg_success': np.nan,  # update outside
            'loss': self.loss,
            'lr': self.get_mean_lr(),
            'explore_var': self.explore_var,
            'entropy_coef': self.entropy_coef if hasattr(self, 'entropy_coef') else np.nan,
            'entropy': self.mean_entropy,
            'grad_norm': self.mean_grad_norm,
        }, dtype=np.float32)
        assert all(col in self.train_df.columns for col in
                   row.index), f'Mismatched row keys: {row.index} vs df columns {self.train_df.columns}'
        return row

Exemplo n.º 20

0

Exibir arquivo

Arquivo: mlp.py Projeto: c-w-m/slm-lab

 def build_model_tails(self, out_dim, out_layer_activation):
     '''Build each model_tail. These are stored as Sequential models in model_tails'''
     if not ps.is_list(out_layer_activation):
         out_layer_activation = [out_layer_activation] * len(out_dim)
     model_tails = nn.ModuleList()
     if ps.is_empty(self.tail_hid_layers):
         for out_d, out_activ in zip(out_dim, out_layer_activation):
             tail = net_util.build_fc_model(
                 [self.body_hid_layers[-1], out_d], out_activ)
             model_tails.append(tail)
     else:
         assert len(self.tail_hid_layers) == len(
             out_dim
         ), 'Hydra tail hid_params inconsistent with number out dims'
         for out_d, out_activ, hid_layers in zip(out_dim,
                                                 out_layer_activation,
                                                 self.tail_hid_layers):
             dims = hid_layers
             model_tail = net_util.build_fc_model(
                 dims, self.hid_layers_activation)
             tail_out = net_util.build_fc_model([dims[-1], out_d],
                                                out_activ)
             model_tail.add_module(str(len(model_tail)), tail_out)
             model_tails.append(model_tail)
     return model_tails

Exemplo n.º 21

0

Exibir arquivo

def compare_candidate_ids_tensor(expected, result):
  assert isinstance(result, torch.Tensor)
  expected_candidate_ids = set(expected.numpy())
  result_candidate_ids = set(result.numpy())
  assert _.is_empty(expected_candidate_ids - result_candidate_ids)
  for generated_candidate in result_candidate_ids - expected_candidate_ids:
    assert generated_candidate not in expected_candidate_ids
  return True

Exemplo n.º 22

0

Exibir arquivo

 def build_fc_layers(self, fc_hid_layers):
     '''
     Builds all of the fc layers in the network and store in a Sequential model
     '''
     assert not ps.is_empty(fc_hid_layers)
     dims = [self.conv_out_dim] + fc_hid_layers
     fc_model = net_util.build_sequential(dims, self.hid_layers_activation)
     return fc_model

Exemplo n.º 23

0

Exibir arquivo

Arquivo: net_util.py Projeto: zimoqingfeng/SLM-Lab

def get_loss_fn(cls, loss_param):
    '''Helper to parse loss param and construct loss_fn for net'''
    loss_param = loss_param or {}
    loss_fn = getattr(F, _.get(loss_param, 'name', 'mse_loss'))
    loss_param = _.omit(loss_param, 'name')
    if not _.is_empty(loss_param):
        loss_fn = partial(loss_fn, **loss_param)
    return loss_fn

Exemplo n.º 24

0

Exibir arquivo

async def handle_service(service: object, remote: str):
    if pydash.is_empty(service):
        raise Exception({
            'message': 'Not found',
            'status_code': 404
        })
    if service['state'] in [ServiceState.DOWN.name, ServiceState.OFF.name]:
        raise Exception({
            'message': f"Service is currently {service['state']}",
            'status_code': 503
        })
    if not pydash.is_empty(
            service['whitelisted_hosts']) and remote not in service['whitelisted_hosts'] or not pydash.is_empty(
            service['blacklisted_hosts']) and remote in service['blacklisted_hosts']:
        raise Exception({
            'message': 'Unauthorized',
            'status_code': 401
        })

Exemplo n.º 25

0

Exibir arquivo

Arquivo: net_util.py Projeto: TusharGupta01/SLM-Lab

def get_lr_scheduler(cls, lr_scheduler_spec):
    '''Helper to parse lr_scheduler param and construct Pytorch optim.lr_scheduler'''
    if ps.is_empty(lr_scheduler_spec):
        lr_scheduler = NoOpLRScheduler()
    else:
        LRSchedulerClass = getattr(torch.optim.lr_scheduler, lr_scheduler_spec['name'])
        lr_scheduler_spec = ps.omit(lr_scheduler_spec, 'name')
        lr_scheduler = LRSchedulerClass(cls.optim, **lr_scheduler_spec)
    return lr_scheduler

Exemplo n.º 26

0

Exibir arquivo

Arquivo: create_page_id_order.py Projeto: dmh43/wp-preprocessing-el

def get_page_id_order(cursor):
  cursor.execute('select id from pages')
  page_ids = []
  while True:
    results = cursor.fetchmany(10000)
    if _.is_empty(results): break
    page_ids.extend([row['id'] for row in results])
  shuffle(page_ids)
  return page_ids

Exemplo n.º 27

0

Exibir arquivo

Arquivo: test_fetchers.py Projeto: dmh43/lm_ltr

def test_parse_xml_docs_fbis():
    doc_path = './tests/fixtures/fbis_sample'
    doc_lookup = f.parse_xml_docs(doc_path)
    assert len(doc_lookup) == 5
    assert _.is_empty(
        set(doc_lookup.keys()) - set([
            'FBIS3-10491', 'FBIS3-10397', 'FBIS3-10243', 'FBIS3-10082',
            'FBIS3-5'
        ]))
    assert all([len(text) > 1000 for text in doc_lookup.values()])

Exemplo n.º 28

0

Exibir arquivo

 def __iter__(self):
   while self.page_ctr < len(self.page_id_order) or not _.is_empty(self.ids_from_last_page):
     if (self.limit is not None) and (self.num_mentions_seen >= self.limit): return
     if self.use_fast_sampler:
       self.num_mentions_seen += self.batch_size
       yield [None] * self.batch_size
       continue
     batch = self._get_next_batch()
     yield batch
     self.num_mentions_seen += len(batch)

Exemplo n.º 29

0

Exibir arquivo

 def best_match(entities: list):
     best = {
         'regex_groups': ()
     }
     for entity in entities:
         if pydash.has(
                 entity, 'regex_groups') and len(
                 entity['regex_groups']) > len(
                 best['regex_groups']):
             best = entity
     return not pydash.is_empty(best['regex_groups']) and best or None

Exemplo n.º 30

0

Exibir arquivo

def _execute_register(device: dict, created: str, log: list):
    """
    Tries to POST the device and updates the `device` dict with the resource from the database; if the device could
    not be uploaded the `device` param will contain the database version of the device, not the inputting one. This is
    because the majority of the information of a device is immutable (in concrete the fields used to compute
    the ETAG).

    :param device: Inputting device. It is replaced (keeping the reference) with the db version.
    :param created: Set the _created value to be the same for the device as for the register
    :param log: A log where to append the resulting device if execute_register has been successful
    :raise InnerRequestError: any internal error in the POST that is not about the device already existing.
    """
    new = True
    try:
        if created:
            device['created'] = created
        db_device = execute_post_internal(Naming.resource(device['@type']),
                                          device)
    except InnerRequestError as e:
        new = False
        try:
            db_device = _get_existing_device(e)
            # We add a benchmark todo move to another place?
            device['_id'] = db_device['_id']
            ComponentDomain.benchmark(device)
            external_synthetic_id_fields = pick(
                device, *DeviceDomain.external_synthetic_ids)
            # If the db_device was a placeholder
            # We want to override it with the new device
            if db_device.get('placeholder', False):
                # Eve do not generate defaults from sub-resources
                # And we really need the placeholder default set, specially when
                # discovering a device
                device['placeholder'] = False
                # We create hid when we validate (wrong thing) so we need to manually set it here as we won't
                # validate in this db operation
                device['hid'] = DeviceDomain.hid(device['manufacturer'],
                                                 device['serialNumber'],
                                                 device['model'])
                DeviceDomain.update_one_raw(db_device['_id'], {'$set': device})
            elif not is_empty(external_synthetic_id_fields):
                # External Synthetic identifiers are not intrinsically inherent
                # of devices, and thus can be added later in other Snapshots
                # Note that the device / post and _get_existing_device() have already validated those ids
                DeviceDomain.update_one_raw(
                    db_device['_id'], {'$set': external_synthetic_id_fields})
        except DeviceNotFound:
            raise e
    else:
        log.append(db_device)
    device.clear()
    device.update(db_device)
    device['new'] = new  # Note that the device is 'cleared' before
    return db_device

Exemplo n.º 31

0

Exibir arquivo

Arquivo: test_predicates.py Projeto: dgilland/pydash

def test_is_empty(case, expected):
    assert _.is_empty(case) == expected