def main(): """Example invocation: python solution.py --lista='[(900, 1100), (1300, 1500)]' \ --listb='[(900, 915), (1000, 1015), (1230, 1600)]' """ parser = optparse.OptionParser() parser.add_option('-a', '--lista', action='store', dest='list_a', help='First list', default=None) parser.add_option('-b', '--listb', action='store', dest='list_b', help='Second List', default=None) options, _ = parser.parse_args() if py.is_empty(options.list_a) or py.is_empty(options.list_b): return result = find_diff(ast.literal_eval(options.list_a), ast.literal_eval(options.list_b)) print('Result of listA - listB: ') print(result)
def test_mention_context_batch_sampler(): mentions_by_page = { 0: [40, 50, 60, 70, 80, 90], 1: [100], 2: [0, 10, 20, 30] } cursor = get_mock_cursor(mentions_by_page) batch_size = 5 page_id_order = [2, 0, 1] mentions_in_page_order = _.mapcat( page_id_order, lambda page_id: mentions_by_page[page_id]) batch_sampler = MentionContextBatchSampler(cursor, page_id_order, batch_size) batches_seen = [] indexes_seen = [] for batch_num, batch_indexes in enumerate(batch_sampler): assert _.is_empty(_.intersection(batch_indexes, indexes_seen)) indexes_seen.extend(batch_indexes) if batch_num == 0: assert len(set(batch_indexes) - {0, 10, 20, 30}) == 1 assert any([ mention in set(batch_indexes) - {0, 10, 20, 30} for mention in mentions_by_page[0] ]) assert len(batch_indexes) == batch_size elif batch_num == 1: assert _.is_empty(set(batch_indexes) - set(mentions_by_page[0])) assert len(batch_indexes) == batch_size elif batch_num == 2: assert batch_indexes == [100] assert len(batch_indexes) == 1 batches_seen.append(batch_num) assert _.is_empty(_.difference(mentions_in_page_order, indexes_seen)) assert batches_seen == [0, 1, 2]
def _get_existing_device(e: InnerRequestError) -> dict: """ Gets the device encoded in the InnerRequestError after ensuring integrity in the errors. Only accepted errors are about already existing unique id fields (like _id or hid), as they are the only ones that hint an already existing device. @raise MismatchBetweenUid: When the unique ids point at different devices, this is usually a mispelling error by the user, as some uids can be entered manually. @raise DeviceNotFound: There is not an uid error or it is not well formatted """ devices = [] for field in DeviceDomain.uid_fields | { 'model' }: # Model is used when matching in components with parents for error in e.body['_issues'].get(field, []): with suppress(ValueError, KeyError): device = json_util.loads(error)['NotUnique'] if not is_empty( devices) and device['_id'] != devices[-1][1]['_id']: raise MismatchBetweenUid(field, device['_id'], devices[-1][1]['_id'], devices[-1][0]) devices.append((field, device)) if is_empty(devices): raise DeviceNotFound() return devices[-1][1]
def test_mention_context_batch_sampler_many_last_ids(): mentions_by_page = { 0: [120], 1: [130], 2: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110] } cursor = get_mock_cursor(mentions_by_page) batch_size = 5 page_id_order = [2, 0, 1] mentions_in_page_order = _.mapcat( page_id_order, lambda page_id: mentions_by_page[page_id]) batch_sampler = MentionContextBatchSampler(cursor, page_id_order, batch_size) batches_seen = [] indexes_seen = [] for batch_num, batch_indexes in enumerate(batch_sampler): assert _.is_empty(_.intersection(batch_indexes, indexes_seen)) indexes_seen.extend(batch_indexes) if batch_num == 0: assert _.is_empty(_.difference(batch_indexes, mentions_by_page[2])) assert len(batch_indexes) == batch_size elif batch_num == 1: assert _.is_empty(_.difference(batch_indexes, mentions_by_page[2])) assert len(batch_indexes) == batch_size elif batch_num == 2: assert len(_.intersection(batch_indexes, mentions_by_page[2])) == 2 assert len(_.intersection(batch_indexes, mentions_by_page[0])) == 1 assert len(_.intersection(batch_indexes, mentions_by_page[1])) == 1 assert len(batch_indexes) == 4 batches_seen.append(batch_num) assert _.is_empty(_.difference(mentions_in_page_order, indexes_seen)) assert batches_seen == [0, 1, 2]
async def handle_request(request: web.Request, service: object, endpoint_cacher: object): req_ctx = { 'method': request.method, 'url': service['targets'][service['cur_target_index']], 'params': dict(request.rel_url.query), 'data': await request.text(), 'cookies': dict(request.cookies), 'headers': pydash.omit(dict(request.headers), 'Host'), } req = None req_cache = None req_ctx_hash = None if not pydash.is_empty(endpoint_cacher): req_ctx_hash = Hasher.hash_sha_256(json.dumps(req_ctx)) req_cache = await EndpointCacher.get_cache(req_ctx_hash, DB.get_redis(request)) if pydash.is_empty(req_cache): req = await Api.call(**req_ctx) if pydash.is_empty(req_ctx_hash): req_ctx_hash = Hasher.hash_sha_256(json.dumps(req_ctx)) not pydash.is_empty(endpoint_cacher) and queue_async_func.s({ 'func': 'EndpointCacher.set_cache', 'args': [req_ctx_hash, req, int(endpoint_cacher['timeout']), 'redis'], 'kwargs': {} }).apply_async() else: req = json.loads(req_cache) cache_hit = True if not pydash.is_empty(req_cache) else False return req, cache_hit
def __init__(self, host, port, db, mode=MODE_FAST): if _.is_empty(host) or _.is_empty(port) or _.is_empty(db): raise RuntimeError( 'Redis host or port or db missing. Please provide all three.') self._redis = redis.StrictRedis(host=host, port=port, db=db) self._mode = mode
async def handle_rate_limiter(request: web.Request, service_id: str, rule: object): if not pydash.is_empty(rule): entries = await RateLimiter.get_entry_by_rule_id(rule['_id'], DB.get_redis(request)) if not pydash.is_empty(entries): entry = entries[0] if int(entry['count']) >= int(rule['max_requests']): raise Exception({ 'message': rule['message'] or 'Too Many Requests', 'status_code': int(rule['status_code']) or 429 }) queue_async_func.s({ 'func': 'RateLimiter.increment_entry_count', 'args': [entry['_id'], 'redis'], 'kwargs': {} }).apply_async() else: entry = { 'rule_id': rule['_id'], 'host': request.remote, 'count': 1, 'timeout': int(rule['timeout']) } queue_async_func.s({ 'func': 'RateLimiter.create_entry', 'args': [entry, 'redis'], 'kwargs': {} }).apply_async()
async def proxy(request: web.Request, handler: web.RequestHandler): try: req_start_time = time() if pydash.starts_with(request.path_qs, '/raven'): return await handler(request) service = Regex.best_match(await Regex.get_matched_paths(request.path, DB.get(request, service_controller.table))) await handle_service(service, request.remote) rate_limiter_rules = await RateLimiter.get_rule_by_service_id(str(service['_id']), DB.get_redis(request)) rate_limiter_rule = rate_limiter_rules[0] if rate_limiter_rules else None await handle_rate_limiter(request, str(service['_id']), rate_limiter_rule) breakers = await CircuitBreaker.get_by_service_id(str(service['_id']), DB.get(request, circuit_breaker_controller.table)) breaker = breakers[0] if breakers else None request_validators = await RequestValidator.get_by_service_id(str(service['_id']), DB.get(request, request_validator_controller.table)) request_validator = request_validators[0] if request_validators else None endpoint_cachers = not pydash.is_empty(service) and await EndpointCacher.get_by_service_id(str(service['_id']), DB.get_redis(request)) or None endpoint_cacher = endpoint_cachers[0] if endpoint_cachers else None await handle_request_validator(request_validator, json.loads(await request.text()), request.method) req, req_cache_hit = await handle_request(request, service, endpoint_cacher) checks = [] if not pydash.is_empty( breaker) and breaker['status'] == CircuitBreakerStatus.ON.name: if req['status'] in breaker['status_codes']: checks.append(handle_circuit_breaker( breaker, service, request, req)) else: await CircuitBreaker.incr_count(str(breaker['_id']), DB.get_redis(request)) queue_async_func.s({ 'func': 'Service.advance_target', 'args': [str(service['_id']), f'mongo:{service_controller.table}'], 'kwargs': {} }).apply_async() req_finish_time = time() req_elapsed_time = req_finish_time - req_start_time checks.append(handle_insights(request, req, str( service['_id']), req_elapsed_time, req_cache_hit)) await Async.all(checks) return web.Response( body=Bytes.decode_bytes( req['body_bytes']), status=req['status'], content_type=req['content_type'], headers=CIMultiDict( pydash.omit( req['headers'], 'Content-Type', 'Transfer-Encoding', 'Content-Encoding'))) except Exception as err: return Error.handle(err)
def __init__(self, host, port, db): if _.is_empty(host) or _.is_empty(port) or _.is_empty(db): raise RuntimeError( 'Redis host or port or db missing. Please provide all three.') self._redis = redis.StrictRedis(host=host, port=port, db=db) self._pipe = self._redis.pipeline() self._count = 0
def _cleaned_link_is_valid(sentence_text, cleaned_link): '''Checks corner cases where the fields are not present due to typos in the markup or complex use of templates''' link_text_is_in_page = 'text' in cleaned_link and sentence_text.find(cleaned_link['text']) != -1 link_page_is_in_page = 'page' in cleaned_link and sentence_text.find(cleaned_link['page']) != -1 link_mention_is_in_page = link_text_is_in_page or (link_page_is_in_page and 'text' not in cleaned_link) link_mention_is_blank = _.is_empty(cleaned_link['text'].strip()) if 'text' in cleaned_link else False link_page_is_blank = _.is_empty(cleaned_link['page'].strip()) if 'page' in cleaned_link else False return not link_page_is_blank and not link_mention_is_blank and link_mention_is_in_page
async def clear_empty_entries(db: AioRedis): empty_entries = [] entries_keys = await DB.fetch_members(entry_set, db) for key in entries_keys: entry = await db.hgetall(key, encoding='utf-8') pydash.is_empty(entry) and empty_entries.append(key) coroutines = [] for empty_entry in empty_entries: coroutines.append(db.srem(entry_set, empty_entry)) coroutines.append(RateLimiter._clear_indexes(empty_entry, db)) await Async.all(coroutines)
def _apply_match_heuristic(page, link_contexts, to_match, entity): '''helper for defining heuristics for finding mentions of an entity''' matches = u.match_all(to_match, page['plaintext']) mentions = sum(link_contexts.values(), []) link_context = { entity: [{ 'text': to_match, 'offset': match_index, 'page_title': page['title'], 'preredirect': _.upper_first(entity) } for match_index in matches] } filtered_link_context = { entity: [ mention for mention in link_context[entity] if not _mention_overlaps(mentions, mention) ] } concat = lambda dest, src: _.uniq_by(dest + src, 'offset') if dest else src if not _.is_empty(filtered_link_context[entity]): return _.merge_with(link_contexts, filtered_link_context, iteratee=concat) else: return link_contexts
def _sentence_to_link_contexts_reducer(redirects_lookup, page, contexts_acc, sentence): contexts = _sentence_to_link_contexts(redirects_lookup, page, sentence) if not _.is_empty(contexts): concat = lambda dest, src: dest + src if dest else src _.merge_with(contexts_acc, contexts, iteratee=concat) return contexts_acc
def retro_eval(predir, session_index=None): ''' Method to run eval sessions by scanning a predir for ckpt files. Used to rerun failed eval sessions. @example yarn retro_eval data/reinforce_cartpole_2018_01_22_211751 ''' logger.info(f'Retro-evaluate sessions from predir {predir}') # collect all unique prepaths first prepaths = [] s_filter = '' if session_index is None else f'_s{session_index}_' for filename in os.listdir(predir): if filename.endswith('model.pth') and s_filter in filename: res = re.search('.+epi(\d+)-totalt(\d+)', filename) if res is not None: prepath = f'{predir}/{res[0]}' if prepath not in prepaths: prepaths.append(prepath) if ps.is_empty(prepaths): return logger.info(f'Starting retro eval') np.random.shuffle( prepaths) # so that CUDA_ID by trial/session index is spread out rand_spec = util.prepath_to_spec( prepaths[0]) # get any prepath, read its max session max_session = rand_spec['meta']['max_session'] util.parallelize_fn(run_wait_eval, prepaths, num_cpus=max_session)
def flatten_dict(obj, delim='.'): '''Missing pydash method to flatten dict''' nobj = {} for key, val in obj.items(): if ps.is_dict(val) and not ps.is_empty(val): strip = flatten_dict(val, delim) for k, v in strip.items(): nobj[key + delim + k] = v elif ps.is_list(val) and not ps.is_empty(val) and ps.is_dict(val[0]): for idx, v in enumerate(val): nobj[key + delim + str(idx)] = v if ps.is_object(v): nobj = flatten_dict(nobj, delim) else: nobj[key] = val return nobj
def __init__(self, net_spec, in_dim, out_dim): assert len(in_dim) == 3 # image shape (c,w,h) nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( init_fn=None, normalize=False, batch_norm=False, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', 'init_fn', 'normalize', 'batch_norm', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert isinstance(out_dim, int) # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() # fc body if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim else: # fc layer from flattened conv self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed self.v = nn.Linear(tail_in_dim, 1) # state value self.adv = nn.Linear(tail_in_dim, out_dim) # action dependent raw advantage self.model_tails = nn.ModuleList(self.v, self.adv) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()
def test_parse_xml_docs_trouble_fbis(): doc_path = './tests/fixtures/trouble_fbis' doc_lookup = f.parse_xml_docs(doc_path) assert len(doc_lookup) == 2 assert _.is_empty( set(doc_lookup.keys()) - set(['FBIS3-10491', 'FBIS3-10081'])) assert all([len(text) > 100 for text in doc_lookup.values()])
def _get_next_batch(self): ids = [] if len(self.ids_from_last_page) > self.batch_size: ids = random.sample(list(self.ids_from_last_page), self.batch_size) self.ids_from_last_page = self.ids_from_last_page - set(ids) shuffle(ids) return ids else: if not _.is_empty(self.ids_from_last_page): ids = list(self.ids_from_last_page) self.ids_from_last_page = set() if self.page_ctr > len(self.page_id_order): return ids for page_id in self.page_id_order[self.page_ctr:]: self.page_ctr += 1 page_mention_ids = self._get_page_mention_ids(page_id, self.page_ctr) ids.extend(page_mention_ids) if len(ids) >= self.batch_size: self.ids_from_last_page = set(ids[self.batch_size:]) ids = ids[:self.batch_size] shuffle(ids) return ids else: self.ids_from_last_page = set() ids = ids[:] shuffle(ids) return ids
def calc_df_row(self, env): '''Calculate a row for updating train_df or eval_df.''' frame = self.env.clock.get('frame') wall_t = env.clock.get_elapsed_wall_t() fps = 0 if wall_t == 0 else frame / wall_t # update debugging variables if net_util.to_check_train_step(): grad_norms = net_util.get_grad_norms(self.agent.algorithm) self.mean_grad_norm = np.nan if ps.is_empty(grad_norms) else np.mean(grad_norms) row = pd.Series({ # epi and frame are always measured from training env 'epi': self.env.clock.get('epi'), # t and reward are measured from a given env or eval_env 't': env.clock.get('t'), 'wall_t': wall_t, 'opt_step': self.env.clock.get('opt_step'), 'frame': frame, 'fps': fps, 'total_reward': np.nanmean(self.total_reward), # guard for vec env 'avg_return': np.nan, # update outside 'avg_len': np.nan, # update outside 'avg_success': np.nan, # update outside 'loss': self.loss, 'lr': self.get_mean_lr(), 'explore_var': self.explore_var, 'entropy_coef': self.entropy_coef if hasattr(self, 'entropy_coef') else np.nan, 'entropy': self.mean_entropy, 'grad_norm': self.mean_grad_norm, }, dtype=np.float32) assert all(col in self.train_df.columns for col in row.index), f'Mismatched row keys: {row.index} vs df columns {self.train_df.columns}' return row
def build_model_tails(self, out_dim, out_layer_activation): '''Build each model_tail. These are stored as Sequential models in model_tails''' if not ps.is_list(out_layer_activation): out_layer_activation = [out_layer_activation] * len(out_dim) model_tails = nn.ModuleList() if ps.is_empty(self.tail_hid_layers): for out_d, out_activ in zip(out_dim, out_layer_activation): tail = net_util.build_fc_model( [self.body_hid_layers[-1], out_d], out_activ) model_tails.append(tail) else: assert len(self.tail_hid_layers) == len( out_dim ), 'Hydra tail hid_params inconsistent with number out dims' for out_d, out_activ, hid_layers in zip(out_dim, out_layer_activation, self.tail_hid_layers): dims = hid_layers model_tail = net_util.build_fc_model( dims, self.hid_layers_activation) tail_out = net_util.build_fc_model([dims[-1], out_d], out_activ) model_tail.add_module(str(len(model_tail)), tail_out) model_tails.append(model_tail) return model_tails
def compare_candidate_ids_tensor(expected, result): assert isinstance(result, torch.Tensor) expected_candidate_ids = set(expected.numpy()) result_candidate_ids = set(result.numpy()) assert _.is_empty(expected_candidate_ids - result_candidate_ids) for generated_candidate in result_candidate_ids - expected_candidate_ids: assert generated_candidate not in expected_candidate_ids return True
def build_fc_layers(self, fc_hid_layers): ''' Builds all of the fc layers in the network and store in a Sequential model ''' assert not ps.is_empty(fc_hid_layers) dims = [self.conv_out_dim] + fc_hid_layers fc_model = net_util.build_sequential(dims, self.hid_layers_activation) return fc_model
def get_loss_fn(cls, loss_param): '''Helper to parse loss param and construct loss_fn for net''' loss_param = loss_param or {} loss_fn = getattr(F, _.get(loss_param, 'name', 'mse_loss')) loss_param = _.omit(loss_param, 'name') if not _.is_empty(loss_param): loss_fn = partial(loss_fn, **loss_param) return loss_fn
async def handle_service(service: object, remote: str): if pydash.is_empty(service): raise Exception({ 'message': 'Not found', 'status_code': 404 }) if service['state'] in [ServiceState.DOWN.name, ServiceState.OFF.name]: raise Exception({ 'message': f"Service is currently {service['state']}", 'status_code': 503 }) if not pydash.is_empty( service['whitelisted_hosts']) and remote not in service['whitelisted_hosts'] or not pydash.is_empty( service['blacklisted_hosts']) and remote in service['blacklisted_hosts']: raise Exception({ 'message': 'Unauthorized', 'status_code': 401 })
def get_lr_scheduler(cls, lr_scheduler_spec): '''Helper to parse lr_scheduler param and construct Pytorch optim.lr_scheduler''' if ps.is_empty(lr_scheduler_spec): lr_scheduler = NoOpLRScheduler() else: LRSchedulerClass = getattr(torch.optim.lr_scheduler, lr_scheduler_spec['name']) lr_scheduler_spec = ps.omit(lr_scheduler_spec, 'name') lr_scheduler = LRSchedulerClass(cls.optim, **lr_scheduler_spec) return lr_scheduler
def get_page_id_order(cursor): cursor.execute('select id from pages') page_ids = [] while True: results = cursor.fetchmany(10000) if _.is_empty(results): break page_ids.extend([row['id'] for row in results]) shuffle(page_ids) return page_ids
def test_parse_xml_docs_fbis(): doc_path = './tests/fixtures/fbis_sample' doc_lookup = f.parse_xml_docs(doc_path) assert len(doc_lookup) == 5 assert _.is_empty( set(doc_lookup.keys()) - set([ 'FBIS3-10491', 'FBIS3-10397', 'FBIS3-10243', 'FBIS3-10082', 'FBIS3-5' ])) assert all([len(text) > 1000 for text in doc_lookup.values()])
def __iter__(self): while self.page_ctr < len(self.page_id_order) or not _.is_empty(self.ids_from_last_page): if (self.limit is not None) and (self.num_mentions_seen >= self.limit): return if self.use_fast_sampler: self.num_mentions_seen += self.batch_size yield [None] * self.batch_size continue batch = self._get_next_batch() yield batch self.num_mentions_seen += len(batch)
def best_match(entities: list): best = { 'regex_groups': () } for entity in entities: if pydash.has( entity, 'regex_groups') and len( entity['regex_groups']) > len( best['regex_groups']): best = entity return not pydash.is_empty(best['regex_groups']) and best or None
def _execute_register(device: dict, created: str, log: list): """ Tries to POST the device and updates the `device` dict with the resource from the database; if the device could not be uploaded the `device` param will contain the database version of the device, not the inputting one. This is because the majority of the information of a device is immutable (in concrete the fields used to compute the ETAG). :param device: Inputting device. It is replaced (keeping the reference) with the db version. :param created: Set the _created value to be the same for the device as for the register :param log: A log where to append the resulting device if execute_register has been successful :raise InnerRequestError: any internal error in the POST that is not about the device already existing. """ new = True try: if created: device['created'] = created db_device = execute_post_internal(Naming.resource(device['@type']), device) except InnerRequestError as e: new = False try: db_device = _get_existing_device(e) # We add a benchmark todo move to another place? device['_id'] = db_device['_id'] ComponentDomain.benchmark(device) external_synthetic_id_fields = pick( device, *DeviceDomain.external_synthetic_ids) # If the db_device was a placeholder # We want to override it with the new device if db_device.get('placeholder', False): # Eve do not generate defaults from sub-resources # And we really need the placeholder default set, specially when # discovering a device device['placeholder'] = False # We create hid when we validate (wrong thing) so we need to manually set it here as we won't # validate in this db operation device['hid'] = DeviceDomain.hid(device['manufacturer'], device['serialNumber'], device['model']) DeviceDomain.update_one_raw(db_device['_id'], {'$set': device}) elif not is_empty(external_synthetic_id_fields): # External Synthetic identifiers are not intrinsically inherent # of devices, and thus can be added later in other Snapshots # Note that the device / post and _get_existing_device() have already validated those ids DeviceDomain.update_one_raw( db_device['_id'], {'$set': external_synthetic_id_fields}) except DeviceNotFound: raise e else: log.append(db_device) device.clear() device.update(db_device) device['new'] = new # Note that the device is 'cleared' before return db_device
def test_is_empty(case, expected): assert _.is_empty(case) == expected