def test_df_schema_flags(csvfile, flags): df = csv_sample(csvfile) res = df_schema_flags(df, flags) assert glom(res, ["datatype"]) == ["yesno"] * len(res) # NOTE: index 0 relies on deterinistic dictionary order assert glom(res, [(tuple, "0")]) == df.columns.to_list() assert glom(res, [(T.values(), tuple, "0")]) == [ _FLAG_FMT.format(name=flags.instanceName, col=col) for col in df.columns ]
def test_df_schema_dtype(csvfile, flags): df = csv_sample(csvfile) flags, flagged_cols = set_flags(flags, df.columns) res = df_schema_dtype(df, flags) assert glom(res, ["choices"]) == [_TYPES] * len(res) # NOTE: index 0 relies on deterinistic dictionary order assert glom(res, [(tuple, "0")]) == flagged_cols assert glom(res, [(T.values(), tuple, "0")]) == [ _FLAG_FMT.format(name=flags.instanceName, col=col) for col in flagged_cols ]
def get_room_access_codes(url=BBB_ACCESS_LIST_URL): r = requests.get(url) if r.status_code == 200: spec = ('rooms', [T.values()], [tuple], [{ T[0].upper(): { 'url': T[1], 'access_code': T[2] } }], merge) return glom(r.json(), spec) else: return {}
def _spec_to_type( key: str, value: Dict[str, Dict], bases: Tuple[Type, ...] = ()) -> Type: """Using the type specification, create the custom type objects Parameters ---------- key : str The key name corresponding to the specification. It is used as a template for the custom type name. value : Dict The dictionary with the type specification. It looks like: { "key1": {"type": <type1>, "validator": <validator1>}, "key2": {"type": <type2>, "validator": <validator2>}, # ... } bases : Tuple[Type, ...] Base classes Returns ------- Type Custom type object with validators """ type_k = _type_spec[0] dflt_k = _type_spec[6] fields = glom( # NOTE: original ordering is preserved, apart from moving the data # members w/ default arguments later. [(k, v) for k, v in value.items() if type_k in v and dflt_k not in v] + [(k, v) for k, v in value.items() if type_k in v and dflt_k in v], [( { "k": "0", "v": f"1.{type_k}", # TODO: non-trivial defaults like mutable types "d": Coalesce(f"1.{dflt_k}", default=SKIP), }, T.values(), tuple, )], ) # extract key, value and convert to list of tuples ns = dict( chain(*glom( value.values(), [(Coalesce("validator", default_factory=dict), T.items())], ))) # chain dict.items() and create namespace return make_typedconfig(f"{key}_t", fields, namespace=ns, bases=bases)
def _spec_to_type( key: str, value: Dict[str, Dict], bases: Tuple[Type, ...] = ()) -> Type: """Using the type specification, create the custom type objects Parameters ---------- key : str The key name corresponding to the specification. It is used as a template for the custom type name. value : Dict The dictionary with the type specification. It looks like: { "key1": {"type": <type1>, "validator": <validator1>}, "key2": {"type": <type2>, "validator": <validator2>}, # ... } bases : Tuple[Type] Base classes Returns ------- Type Custom type object with validators """ fields = glom( value.items(), [( { "k": "0", "v": "1.type", # TODO: non-trivial defaults like mutable types "d": Coalesce("1.default", default=SKIP), }, T.values(), tuple, )], ) # extract key, value and convert to list of tuples ns = dict( chain(*glom( value.values(), [(Coalesce("validator", default_factory=dict), T.items())], ))) # chain dict.items() and create namespace return make_dataconfig(f"{key}_t", fields, namespace=ns, bases=bases)
def from_api(cls, campaign, timestamp=None): timestamp = timestamp if timestamp is not None else datetime.datetime.utcnow( ) ret = cls(campaign=campaign, timestamp=timestamp, campaign_results=None, goal_results=None, article_results=None) article_list = [] article_title_list = campaign.article_title_list base_desc = 'Scanning %s @ %s' % (campaign.name, timestamp.isoformat().split('.')[0]) article_title_list = tqdm( article_title_list, desc=base_desc, disable=None, # autodisable on non-tty unit='article') def async_pta_update(pta, attr_func_map): jobs = [] for attr, func in attr_func_map.items(): _debug_log_func = tlog.wrap('debug')(func) cur = gevent.spawn( lambda pta=pta, attr=attr, func=_debug_log_func: setattr( pta, attr, func(pta))) jobs.append(cur) gevent.wait(jobs, timeout=20) return for title in article_title_list: new_desc = base_desc + ' ({:16.16})'.format(title) article_title_list.set_description(new_desc) pta = PTArticle(lang=campaign.lang, title=title, timestamp=timestamp) pta.talk_title = 'Talk:' + title async_pta_update(pta, { 'rev_id': metrics.get_revid, 'talk_rev_id': metrics.get_talk_revid }) if pta.rev_id: async_pta_update( pta, { 'templates': metrics.get_templates, 'talk_templates': metrics.get_talk_templates, 'assessments': metrics.get_assessments, 'citations': metrics.get_citations, 'wikidata_item': metrics.get_wikidata_item }) pta.wikiprojects = metrics.get_wikiprojects( pta) # relies on templates (no network) pta.results = eval_article_goals(pta, campaign.goals) article_list.append(pta) ret.article_list = article_list gres = {} # goal results for goal in campaign.goals: key = slugify(goal['name']) target_ratio = float(goal.get('ratio', 1.0)) results = [a.results[key]['done'] for a in article_list] # TODO: average/median metric value done, not_done = partition(results) # TODO: need to integrate start state for progress tracking ratio = 1.0 if not not_done else float( len(done)) / len(article_list) gres[key] = { 'done_count': len(done), 'not_done_count': len(not_done), 'total_count': len(article_list), 'ratio': ratio, 'target_ratio': target_ratio, 'key': key, 'name': goal['name'], 'desc': goal.get('desc'), 'progress': ratio / target_ratio, 'done': ratio >= target_ratio } ret.campaign_results = glom( gres, { 'done_count': (T.values(), ['done_count'], sum), 'not_done_count': (T.values(), ['not_done_count'], sum), 'total_count': (T.values(), ['total_count'], sum) }) ret.campaign_results['ratio'] = ret.campaign_results[ 'done_count'] / ret.campaign_results['total_count'] ret.goal_results = gres ret.article_results = [attr.asdict(a) for a in article_list] return ret