def game_to_milp(g: Game, robust=True, counter_examples=None): # TODO: implement counter_example encoding if not counter_examples: counter_examples = [{}] model = Model() store = keydefaultdict(lambda x: rob_encode.z(x, g)) # Add counter examples to store for i, ce in enumerate(counter_examples): store.update(counter_example_store(g, ce, i)) # Encode each scenario. scenarios = [ create_scenario(g, i) for i, ce in enumerate(counter_examples) ] constraints, objs = zip(*(encode_game(g2, store) for g2 in scenarios)) # Objective is to maximize the minimum robustness of the scenarios. if len(objs) > 1: obj = stl.andf(*objs) constraints = chain(rob_encode.encode(obj, store, 0), fn.cat(constraints)) else: obj = objs[0] constraints = fn.cat(constraints) for i, (constr, kind) in enumerate(constraints): if constr is True: continue add_constr(model, constr, kind, i) # TODO: support alternative objective functions J = store[obj][0] if isinstance(store[obj], tuple) else store[obj] model.objective = Objective(J, direction='max') return model, store
def game_to_milp(g: Game, robust=True, counter_examples=None): # TODO: implement counter_example encoding if not counter_examples: counter_examples = [{}] model = Model() store = keydefaultdict(lambda x: rob_encode.z(x, g)) # Add counter examples to store for i, ce in enumerate(counter_examples): store.update(counter_example_store(g, ce, i)) # Encode each scenario. scenarios = [ create_scenario(g, i) for i, ce in enumerate(counter_examples) ] constraints, objs = zip(*(encode_game(g2, store) for g2 in scenarios)) # Objective is to maximize the minimum robustness of the scenarios. if len(objs) > 1: obj = stl.andf(*objs) constraints = chain( rob_encode.encode(obj, store, 0), fn.cat(constraints)) else: obj = objs[0] constraints = fn.cat(constraints) for i, (constr, kind) in enumerate(constraints): if constr is True: continue add_constr(model, constr, kind, i) # TODO: support alternative objective functions J = store[obj][0] if isinstance(store[obj], tuple) else store[obj] model.objective = Objective(J, direction='max') return model, store
async def parse_cityescape() -> Iterator[Item]: page = await client.get('https://cityescape.ru/') tree = html.fromstring(page.text.encode()) items = cat( x.xpath('a/@href') for x in tree.xpath(css('.menu-item-object-post')) if 'Ожидается' not in x.text_content()) return cat(await gather_chunks(5, *map(parse_page, items)))
def _extract_atom_values( cls, element: Any, prefix: PathType = () ) -> Iterable[Tuple[PathType, Any]]: """Convert nested structure into flat list with tuple capturing nested paths.""" if isinstance(element, Mapping): return funcy.cat( cls._extract_atom_values(v, prefix + (k,)) for k, v in element.items() ) if isinstance(element, list): return funcy.cat( cls._extract_atom_values(v, prefix + (str(i),)) for i, v in enumerate(element) ) return [(prefix, element)]
def to_signal(ts_mapping) -> DiscreteSignal: if isinstance(ts_mapping, DiscreteSignal): return ts_mapping start = min(fn.pluck(0, fn.cat(ts_mapping.values()))) signals = (signal(v, start, OO, tag=k) for k, v in ts_mapping.items()) return reduce(op.or_, signals)
def validate(weeks): # each week: 6 games len_six = lambda (week, teams): len(teams) == 6 bad_teams = funcy.remove(len_six, weeks.items()) if bad_teams: print "have bad teams!!!" for week, teams in bad_teams: print week, ':', teams sys.exit(1) def tally(hg, game): hg[game[1]] += 1 return hg all_games = funcy.cat(weeks.values()) home_games = reduce(tally, all_games, collections.Counter()) bad_homes = funcy.remove(lambda (x, y): y == 6, home_games.items()) if bad_homes: print "have bad home game count!!!" for owner, homes in bad_homes: print owner, ':', homes sys.exit(1) return
def get_annotations(case_query, control_query, modifier_query=""): # Fetch all relevant data queries = [case_query, control_query, modifier_query] tokens = set(cat(re_all('[a-zA-Z]\w*', query) for query in queries)) df = query_tags_annotations(tokens) # Make tag columns df.tag_name = df.tag_name.str.lower() df.annotation = df.annotation.str.lower() for tag in tokens: tag_name = tag.lower() df[tag_name] = df[df.tag_name == tag_name].annotation # Select only cells with filled annotations df = df.drop(['tag_name', 'annotation'], axis=1) df = df.groupby(['sample_id', 'series_id', 'platform_id', 'gsm_name', 'gpl_name'], as_index=False).first() df = df.convert_objects(convert_numeric=True) # Apply case/control/modifier if modifier_query: df = df.query(modifier_query.lower()) case_df = df.query(case_query.lower()) control_df = df.query(control_query.lower()) # Set 0 and 1 for analysis overlap_df = df.ix[set(case_df.index).intersection(set(control_df.index))] df['sample_class'] = None df['sample_class'].ix[case_df.index] = 1 df['sample_class'].ix[control_df.index] = 0 df['sample_class'].ix[overlap_df.index] = -1 return df.dropna(subset=["sample_class"])
def remove(self, search): if search not in self.metadata.get('searches', {}): return False, "This search is not in" self.metadata['searches'].pop(search) self.metadata['ids'] = list(set(cat(s['ids'] for s in self.metadata['searches'].values()))) return True, ""
def _drop_duplicates(self): """删除重复数据""" coll = self.__set_collection() c = coll.aggregate([{ "$group": { "_id": { 'date': '$date' }, "count": { '$sum': 1 }, "dups": { '$addToSet': '$_id' } } }, { '$match': { 'count': { "$gt": 1 } } }]) data = [i for i in c] duplicates = fy.walk(self.__get_dups_id, data) dups_id_list = fy.cat(duplicates) for i in dups_id_list: coll.delete_one({'_id': i}) print("OK, duplicates droped! Done!")
def _to_aig(self): gate_order, latch_order = self.eval_order_and_gate_lookup lookup = fn.merge( {0: aig.ConstFalse()}, {_to_idx(l): aig.Input(n) for n, l in self.inputs.items()}, { _to_idx(l): aig.LatchIn(n) for n, (l, _, init) in self.latches.items() }, ) latches = set() and_dependencies = {i: (l, r) for i, l, r in self.gates} for gate in fn.cat(gate_order): if _to_idx(gate) in lookup: continue inputs = and_dependencies[gate] sources = [_polarity(i)(lookup[_to_idx(i)]) for i in inputs] lookup[_to_idx(gate)] = aig.AndGate(*sources) latch_dependencies = { i: (n, dep) for n, (i, dep, _) in self.latches.items() } for gate in fn.cat(latch_order): assert _to_idx(gate) in lookup if not isinstance(lookup[_to_idx(gate)], aig.LatchIn): continue name, dep = latch_dependencies[gate] source = _polarity(dep)(lookup[_to_idx(dep)]) latches.add((name, source)) def get_output(v): idx = _to_idx(v) return _polarity(v)(lookup[idx]) top_level = ((k, get_output(v)) for k, v in self.outputs.items()) return aig.AIG(inputs=frozenset(self.inputs), node_map=frozenset(top_level), latch_map=frozenset(latches), latch2init=frozenset( (n, bool(init)) for n, (_, _, init) in self.latches.items()), comments=self.comments)
def get_samples_columns(samples): preferred = [ 'id', 'description', 'characteristics_ch1', 'characteristics_ch2' ] exclude = ['attrs', 'supplementary_file', 'geo_accession'] columns = distinct(cat(s.keys() for s in samples)) return lift(preferred, lwithout(columns, *exclude))
def get_analysis_df(case_query, control_query, modifier_query=""): # Fetch all relevant data queries = [case_query, control_query, modifier_query] tokens = set(cat(re_all('[a-zA-Z]\w*', query) for query in queries)) df = pd.read_sql_query( ''' SELECT sample_id, sample.gsm_name, annotation, series_annotation.series_id, series.gse_name, series_annotation.platform_id, platform.gpl_name, tag.tag_name FROM sample_annotation JOIN sample ON (sample_annotation.sample_id = sample.id) JOIN series_annotation ON (sample_annotation.serie_annotation_id = series_annotation.id) JOIN platform ON (series_annotation.platform_id = platform.id) JOIN tag ON (series_annotation.tag_id = tag.id) JOIN series ON (series_annotation.series_id = series.id) WHERE tag.tag_name ~* %(tags)s ''', conn, params={'tags': '^(%s)$' % '|'.join(map(re.escape, tokens))}) # Make tag columns df.tag_name = df.tag_name.str.lower() df.annotation = df.annotation.str.lower() for tag in tokens: tag_name = tag.lower() df[tag_name] = df[df.tag_name == tag_name].annotation # Select only cells with filled annotations df = df.drop(['tag_name', 'annotation'], axis=1) df = df.groupby( ['sample_id', 'series_id', 'platform_id', 'gsm_name', 'gpl_name'], as_index=False).first() df = df.convert_objects(convert_numeric=True) # Apply case/control/modifier if modifier_query: df = df.query(modifier_query.lower()) case_df = df.query(case_query.lower()) control_df = df.query(control_query.lower()) # Set 0 and 1 for analysis overlap_df = df.ix[set(case_df.index).intersection(set(control_df.index))] df['sample_class'] = None df['sample_class'].ix[case_df.index] = 1 df['sample_class'].ix[control_df.index] = 0 df['sample_class'].ix[overlap_df.index] = -1 return df.dropna(subset=["sample_class"])
def _mygene_fetch(queries, scopes, specie): # To retry or ignore only one chunk on error @ignore(requests.HTTPError, default=[]) @log_errors(lambda msg: cprint(msg, 'red'), stack=False) @retry(10, errors=requests.HTTPError, timeout=lambda n: 5 * 1.4**n) @log_errors(lambda msg: cprint(msg, 'yellow'), stack=False) def querymany(qs): try: return mg.querymany(qs, scopes=scopes, fields=['entrezgene', 'symbol'], species=specie, email='*****@*****.**', verbose=False) except requests.HTTPError as e: # Do not retry on Bad Request if e.response.status_code == 400: return [] raise cprint('> Going to query %d genes in %s...' % (len(queries), scopes), 'cyan') cprint('> sample queries: %s' % ', '.join(take(8, queries)), 'cyan') # Read cache prefix = '%s-%s:' % (SPECIE_PREFIXES[specie], PREFIXES[scopes]) keys = [prefix + q for q in queries] res = { k: pickle.loads(v) if v else '' for k, v in zip(queries, mget(keys)) if v is not None } if res: queries = set(queries) - set(res) print( ('Got %d from cache, %d queries left' % (len(res), len(queries)))) if queries: mg = mygene.MyGeneInfo() # Looks like sorting groups bad queries data = cat( querymany(qs) for qs in chunks(500, tqdm(sorted(queries), leave=False))) new = { str(item['query']): (item['entrezgene'], item['symbol']) for item in data if not item.get('notfound') and 'entrezgene' in item and 'symbol' in item } res.update(new) # Cache results and fails pipe = redis_client.pipeline(transaction=False) for k, v in new.items(): pipe.setex(prefix + k, CACHE_TIMEOUT, pickle.dumps(v, -1)) for k in queries - set(new): pipe.setex(prefix + k, CACHE_TIMEOUT, '') pipe.execute() res = {k: v for k, v in res.items() if v != ''} cprint('-> Got %d matches' % len(res), 'yellow') return res
def get_analysis_df(case_query, control_query, modifier_query=""): # Fetch all relevant data queries = [case_query, control_query, modifier_query] tokens = set(cat(re_all("[a-zA-Z]\w*", query) for query in queries)) df = pd.read_sql_query( """ SELECT sample_id, sample.gsm_name, annotation, series_annotation.series_id, series.gse_name, series_annotation.platform_id, platform.gpl_name, tag.tag_name FROM sample_annotation JOIN sample ON (sample_annotation.sample_id = sample.id) JOIN series_annotation ON (sample_annotation.serie_annotation_id = series_annotation.id) JOIN platform ON (series_annotation.platform_id = platform.id) JOIN tag ON (series_annotation.tag_id = tag.id) JOIN series ON (series_annotation.series_id = series.id) WHERE tag.tag_name ~* %(tags)s """, conn, params={"tags": "^(%s)$" % "|".join(map(re.escape, tokens))}, ) # Make tag columns df.tag_name = df.tag_name.str.lower() df.annotation = df.annotation.str.lower() for tag in tokens: tag_name = tag.lower() df[tag_name] = df[df.tag_name == tag_name].annotation # Select only cells with filled annotations df = df.drop(["tag_name", "annotation"], axis=1) df = df.groupby(["sample_id", "series_id", "platform_id", "gsm_name", "gpl_name"], as_index=False).first() df = df.convert_objects(convert_numeric=True) # Apply case/control/modifier if modifier_query: df = df.query(modifier_query.lower()) case_df = df.query(case_query.lower()) control_df = df.query(control_query.lower()) # Set 0 and 1 for analysis overlap_df = df.ix[set(case_df.index).intersection(set(control_df.index))] df["sample_class"] = None df["sample_class"].ix[case_df.index] = 1 df["sample_class"].ix[control_df.index] = 0 df["sample_class"].ix[overlap_df.index] = -1 return df.dropna(subset=["sample_class"])
def _fleiss_kappa(sample_sets): all_samples_annos = cat(sample_sets) categories = distinct(sv.annotation or '' for sv in all_samples_annos) category_index = {c: i for i, c in enumerate(categories)} stats = defaultdict(lambda: [0] * len(categories)) for sv in all_samples_annos: stats[sv.sample_id][category_index[sv.annotation or '']] += 1 return fleiss_kappa(stats.values())
async def parse_pro_adventure() -> Iterator[Item]: all_items: List[Tuple[Item, ...]] = [] for i in range(1, 42): page = await client.get(f'https://pro-adventure.ru/tours?page={i}') items = tuple(parse_page(page.text)) if not items: # No active item found on page break all_items.append(items) return cat(all_items)
def encode_and_run(g, counter_examples=None): fs, store = encode_games(g, counter_examples) f = reduce(op.and_, fs, TRUE()) model = get_model(f) if model is None: return Result(False, None, None, counter_examples) sol = {v: extract_ts(v, model, g, store) for v in fn.cat(g.model.vars)} return Result(True, 0, sol, counter_examples)
def used_cache( self, targets=None, all_branches=False, with_deps=False, all_tags=False, all_commits=False, remote=None, force=False, jobs=None, recursive=False, ): """Get the stages related to the given target and collect the `info` of its outputs. This is useful to know what files from the cache are _in use_ (namely, a file described as an output on a stage). The scope is, by default, the working directory, but you can use `all_branches`/`all_tags`/`all_commits` to expand the scope. Returns: A dictionary with Schemes (representing output's location) mapped to items containing the output's `dumpd` names and the output's children (if the given output is a directory). """ from dvc.cache import NamedCache cache = NamedCache() for branch in self.brancher( all_branches=all_branches, all_tags=all_tags, all_commits=all_commits, ): targets = targets or [None] pairs = cat( self.collect_granular( target, recursive=recursive, with_deps=with_deps ) for target in targets ) suffix = "({})".format(branch) if branch else "" for stage, filter_info in pairs: used_cache = stage.get_used_cache( remote=remote, force=force, jobs=jobs, filter_info=filter_info, ) cache.update(used_cache, suffix=suffix) return cache
def _update_total(self, fillevent): """用到最新profit数据,所以在update_profit之后""" f = fillevent d = dict(date=f.date) t_re_profit = sum( [i['re_profit'] for i in fy.cat(self.re_profit_dict.values())]) t_profit = t_re_profit + self.unre_profit_dict[ f.instrument][-1]['unre_profit'] if f.target in ['Forex', 'Futures', 'Stock']: d['total'] = self.initial_cash + t_profit self.total_list.append(d)
def extract_queries(lines): lines = remove(r'^(IMAGE:\d+|--[\w>-]+)$', lines) queries = cat(re_iter(r'[\w+.-]+', l) for l in lines) queries = remove(r'_at$|^\d+-\d+$', queries) # No such thing return queries # Clean unicode for mygene # http://stackoverflow.com/questions/15321138/removing-unicode-u2026-like-characters return [ q.decode('unicode_escape').encode('ascii', 'ignore') for q in queries ]
def adj_list(concept_class, parallel=True): if parallel: from pathos.multiprocessing import ProcessingPool pool = ProcessingPool() mapper = pool.map else: mapper = map edge_generator = fn.cat(mapper(get_edges, possible_edges(concept_class))) edge_lists = fn.walk_values(set, fn.group_values(edge_generator)) return defaultdict(set, edge_lists)
def encode_dynamics(g, store): A, B, C = g.model.dyn dt = g.model.dt # Adjust for discrete time A = np.eye(len(g.model.vars.state)) + dt * A B = dt * B C = dt * C times = g.times yield from fn.cat( _encode_dynamics(A, B, C, g.model.vars, store, t) for t in times[:-1])
def get_times(x, tau, lo=None, hi=None): end = min(v.domain.end() for v in x.values()) hi = hi + tau if hi + tau <= end else end lo = lo + tau if lo + tau <= end else end if lo > hi: return [] elif hi == lo: return [lo] all_times = fn.cat(v.slice(lo, hi).items() for v in x.values()) return sorted(set(fn.pluck(0, all_times)))
def tee(wordlen, iomap): imap = BundleMap({i: wordlen for i in iomap}) omap = BundleMap({o: wordlen for o in fn.cat(iomap.values())}) blasted = defaultdict(list) for i, outs in iomap.items(): for o in outs: for k, v in zip(imap[i], omap[o]): blasted[k].append(v) return aigbv.AIGBV(imap=imap, omap=omap, aig=aiger.tee(blasted))
def encode_and_run(g: Game, robust=True, counter_examples=None): model, store = game_to_milp(g, robust, counter_examples) status = model.optimize() if status in ('infeasible', 'unbounded'): return Result(False, None, None, counter_examples) elif status == "optimal": cost = model.objective.value sol = {v: extract_ts(v, model, g, store) for v in fn.cat(g.model.vars)} return Result(cost > 0, cost, sol, counter_examples) else: raise NotImplementedError((model, status))
def tee(wordlen, iomap): input_map = frozenset((i, named_indexes(wordlen, i)) for i in iomap) output_map = frozenset( (o, named_indexes(wordlen, o)) for o in fn.cat(iomap.values())) blasted_iomap = fn.merge(*({ _name_idx(iname, idx): [_name_idx(o, idx) for o in iomap[iname]] } for iname, idx in product(iomap, range(wordlen)))) return aigbv.AIGBV( aig=aiger.tee(blasted_iomap), input_map=input_map, output_map=output_map, )
def get_active_port(): devices = dict(ZTE=Zte, HW=Huawei) action = 'get_active_port' reader = csv.reader(open('deviceList.csv')) next(reader) olts = (line[:2] for line in reader) funcs = map(lambda x: partial(getattr(devices.get(x[1]), action), x[0]), olts) with Pool(64) as p: rslt = p.map(lambda f: f(), funcs) rslt = cat(rslt) with open('up_port.csv', 'w') as fb: writer = csv.writer(fb) writer.writerows(rslt)
async def parse_pik(batch=10) -> Iterator[Item]: items = {} start = 1 while True: coros = map(get_page, range(start, start + batch)) done = False for item in cat(await asyncio.gather(*coros)): done = done or item['id'] in items items[item['id']] = item if done: return map(parse_item, items.values()) start += batch
def update_tweet_db(): """ Use this command after scraping a fresh batch of results using scrape. This adds any new tweets from the results to the tweet db, using IDs. """ tweets_w_id = fy.cat(extract_tweets_and_ids(row) for row in TABLE.all()) for text,id in tweets_w_id: ID_TABLE.upsert( dict(id = id, text = text), keys = ['id', 'tweet'] ) click.echo("tweet text database updated.")
def _drop_duplicates(self): """删除重复数据""" coll = self._set_collection() c = coll.aggregate([{"$group": {"_id": {'id': '$id'}, #记住去了解一下此处是如何去重的 "count": {'$sum': 1}, #$addToSet也是追加内容且会过滤数据 "dups": {'$addToSet': '$_id'}}}, #$push代表追加内容但是不会过滤数据 {'$match': {'count': {"$gt": 1}}} ] ) data = [i for i in c] duplicates = fy.walk(self.__get_dups_id, data) dups_id_list = fy.cat(duplicates) for i in dups_id_list: coll.delete_one({'_id': i})
def _update_total(self, fillevent): """用到最新profit数据,所以在update_profit之后""" f = fillevent d = dict(date=f.date) t_re_profit = sum( [i['re_profit'] for i in fy.cat(self.re_profit_dict.values())]) cur_unre_profit = self.unre_profit_dict[f.instrument][-1] t_profit = t_re_profit + cur_unre_profit['unre_profit'] t_profit_high = t_re_profit + cur_unre_profit['unre_profit_high'] t_profit_low = t_re_profit + cur_unre_profit['unre_profit_low'] t_commission = self.commission_dict[f.instrument][-1]['commission'] d['total'] = self.initial_cash + t_profit - t_commission d['total_high'] = self.initial_cash + t_profit_high - t_commission d['total_low'] = self.initial_cash + t_profit_low - t_commission self.total_list.append(d)
def encode_game(g, store, ce=None): # obj, *non_obj = relabel_specs(g, counter_examples) obj, *non_obj = g.specs obj = stl.utils.discretize(obj, dt=g.model.dt, distribute=True) non_obj = { stl.utils.discretize(phi, dt=g.model.dt, distribute=True) for phi in non_obj if phi != stl.TOP } # Constraints robustness = rob_encode.encode(obj, store, 0) # TODO dynamics = rob_encode.encode_dynamics(g, store) other = cat(bool_encode.encode(psi, store, 0) for psi in non_obj) return fn.chain(robustness, dynamics, other), obj
def get_scheduled_component_ids(client, pipeline_id): """ Return ids of component objects of the pipeline which are in "SCHEDULED" state. """ paginator = client.get_paginator('query_objects') response_iterator = paginator.paginate(pipelineId=pipeline_id, query={ "selectors": [{ "fieldName": "@status", "operator": { "type": "EQ", "values": ["SCHEDULED"] } }] }, sphere="COMPONENT") return list(funcy.cat(response["ids"] for response in response_iterator))
def output_csv(output_csv): """Write a csv listing of all entrants.""" entrants = fy.cat(extract_entrants(row['page_html']) for row in TABLE.all()) # We could technically use the first entry's keys, but I like this column order. keys = [ 'bib_number', 'name', 'age', 'gender', 'city', 'state', 'county', 'origin', ] writer = csv.DictWriter(output_csv, keys) writer.writeheader() writer.writerows(entrants) click.echo('Wrote %d entrants.' % len(entrants))