def lexicon_features(fset, blend, cw1, cw2, lexicon_dict): sw1_d = keyfilter(lambda x: x == cw1, lexicon_dict) mother_entry = [] father_entry = [] pos_entry = [] for k, v in sw1_d.items(): mother_entry.append(v[1]) father_entry.append(v[2]) pos_entry.append(v[0]) #print(pos_entry) fset['sw1_pos'] = '_'.join(pos_entry) #fset['sw1_father'] = '_'.join(father_entry) #fset['sw1_mother'] = '_'.join(mother_entry) sw2_d = keyfilter(lambda x: x == cw2, lexicon_dict) mother_entry = [] father_entry = [] pos_entry = [] for k, v in sw2_d.items(): mother_entry.append(v[1]) father_entry.append(v[2]) pos_entry.append(v[0]) fset['sw2_pos'] = '_'.join(pos_entry) #fset['sw2_father'] = '_'.join(father_entry) #fset['sw2_mother'] = '_'.join(mother_entry) return fset
def start_thread(func, fn_arg, **kwargs): """ Wrapper-function to call ``func`` in new thread. Every kwarg that fits the signature of ``func`` is plugged into ``func``, the others are used in the constructor of :class:`threading.Thread`. Note: The :meth:`toolz.curry` decorator enables partial evaluation without extra use of partial. Examples: >>> def f(x,y="y",z="z"): ... print("Hello from",threading.current_thread().name) ... print(x,y,z) >>> callback = start_thread(f,z="non_default_z",name="print_thread") >>> callback(2) Hello from print_thread 2 y non_default_z """ sig = inspect.signature(func) fn_parameters = sig.parameters fn_kwargs = toolz.keyfilter(lambda k: k in fn_parameters, kwargs) thread_kwargs = toolz.keyfilter(lambda k: k not in fn_parameters, kwargs) thread = threading.Thread(target=partial(func, fn_arg, **fn_kwargs), **thread_kwargs) thread.start()
def resolve_conflicts(calls, puts, holdings): """ :param calls: buy assets list :param puts: sell positions list :param holdings: dict , ledger holdings , asset : position :return: list """ # 判断买入标的的sid与卖出持仓的sid是否存在冲突 --- 主要多策略组合与多策略并行的的区别 positive_sids = [r.sid for r in calls] if calls else [] negatives_sids = [p.asset.sid for p in puts] if puts else [] union_sids = set(positive_sids) & set(negatives_sids) assert not union_sids, 'buy and sell the targeted sid on a day is not allowed' # asset tag name means pipeline_name call_proxy = {r.tag: r for r in calls} if calls else {} hold_proxy = {p.name: p for p in holdings.values()} if holdings else {} # 基于capital执行直接买入标的的 extra = set(call_proxy) - set(hold_proxy) if extra: extra_mappings = keyfilter(lambda x: x in extra, call_proxy) else: extra_mappings = dict() extra_positives = list(extra_mappings.values()) # print('engine extra_positives', extra_positives) # pipeline --- 产生相同的asset对象(算法自动加仓) common = set(call_proxy) & set(hold_proxy) increment_positives = [ call_proxy[c] for c in common if call_proxy[c] == hold_proxy[c].asset ] # print('engine increment_positives', increment_positives) # direct_positives = set(extra_positives) | set(increment_positives) direct_positives = list( set(extra_positives) | set(increment_positives)) # print('engine direct_positives', direct_positives) # 基于持仓卖出 --- 分为2种 , 1.直接卖出 , 2.卖出买入 ,基于pipeline name # 2种 --- 一个pipeline同时存在买入和卖出行为 put_proxy = {r.name: r for r in puts} if puts else {} common_pipe = set(call_proxy) & set(put_proxy) if common_pipe: # duals (position, asset) conflicts = [ name for name in common_pipe if put_proxy[name].asset == call_proxy[name] ] assert not conflicts, ValueError( 'name : %r have conflicts between ump and pipe ' % conflicts) dual = [(put_proxy[name], call_proxy[name]) for name in common_pipe] else: dual = set() # print('engine dual', dual) # 1种 --- 直接卖出 negatives = set(put_proxy) - set(common_pipe) negative_puts = keyfilter(lambda x: x in negatives, put_proxy) direct_negatives = list(negative_puts.values()) # print('engine direct_negatives', direct_negatives) # asset positions duals return direct_positives, direct_negatives, dual
def resource_bcolz(rootdir, **kwargs): if os.path.exists(rootdir): kwargs = keyfilter(carray_keywords.__contains__, kwargs) return ctable(rootdir=rootdir, **kwargs) else: if 'dshape' in kwargs: dtype = to_numpy_dtype(kwargs['dshape']) kwargs = keyfilter(carray_keywords.__contains__, kwargs) return ctable(np.empty(0, dtype), rootdir=rootdir, **kwargs) else: raise ValueError("File does not exist and no `dshape=` given")
def resource_bcolz(rootdir, **kwargs): if os.path.exists(rootdir): kwargs = keyfilter(keywords(ctable).__contains__, kwargs) return ctable(rootdir=rootdir, **kwargs) else: if 'dshape' in kwargs: dtype = to_numpy_dtype(kwargs['dshape']) kwargs = keyfilter(keywords(ctable).__contains__, kwargs) return ctable(np.empty(0, dtype), rootdir=rootdir, **kwargs) else: raise ValueError("File does not exist and no `dshape=` given")
def freq_features(w, w_split, freqd, pref=True): wsum, affsum = freqd[w], 0 corpus_sum = sum(freqd.values()) if pref: affsum = sum( keyfilter(lambda x: x.startswith(w_split), freqd).values()) else: affsum = sum(keyfilter(lambda x: x.endswith(w_split), freqd).values()) w1f = wsum / affsum if affsum > 0 else 0.0 c1f = wsum / corpus_sum return [w1f, c1f]
def resource_sql(uri, *args, **kwargs): kwargs2 = keyfilter(keywords(sa.create_engine).__contains__, kwargs) engine = create_engine(uri, **kwargs2) ds = kwargs.get('dshape') schema = kwargs.get('schema') # we were also given a table name if args and isinstance(args[0], (str, unicode)): table_name, args = args[0], args[1:] metadata = metadata_of_engine(engine, schema=schema) with ignoring(sa.exc.NoSuchTableError): return attach_schema(sa.Table(table_name, metadata, autoload=True, autoload_with=engine, schema=schema), schema) if ds: t = dshape_to_table(table_name, ds, metadata=metadata) t.create() return t else: raise ValueError("Table does not exist and no dshape provided") # We were not given a table name if ds: create_from_datashape(engine, ds, schema=schema) return engine
def resource_bcolz(uri, dshape=None, expected_dshape=None, **kwargs): if os.path.exists(uri): try: return ctable(rootdir=uri) except IOError: # __rootdirs__ doesn't exist because we aren't a ctable return carray(rootdir=uri) else: if not dshape: raise ValueError("Must specify either existing bcolz directory or" " valid datashape") dshape = datashape.dshape(dshape) dt = datashape.to_numpy_dtype(dshape) shape_tail = tuple(map(int, dshape.shape[1:])) # tail of shape if dshape.shape[0] == datashape.var: shape = (0,) + shape_tail else: shape = (int(dshape.shape[0]),) + shape_tail x = np.empty(shape=shape, dtype=dt) kwargs = keyfilter(keywords.__contains__, kwargs) expectedlen = kwargs.pop('expectedlen', int(expected_dshape[0]) if expected_dshape is not None and isinstance(expected_dshape[0], datashape.Fixed) else None) if datashape.predicates.isrecord(dshape.measure): return ctable(x, rootdir=uri, expectedlen=expectedlen, **kwargs) else: return carray(x, rootdir=uri, expectedlen=expectedlen, **kwargs)
def into(a, b, **kwargs): dialect = b.dialect.copy() del dialect['lineterminator'] dates = [i for i, typ in enumerate(b.schema[0].types) if 'date' in str(typ)] schema = b.schema if '?' in str(schema): schema = dshape(str(schema).replace('?', '')) dtypes = valmap(to_numpy_dtype, schema[0].dict) datenames = [name for name in dtypes if np.issubdtype(dtypes[name], np.datetime64)] dtypes = dict((k, v) for k, v in dtypes.items() if not np.issubdtype(v, np.datetime64)) if 'strict' in dialect: del dialect['strict'] # Pass only keyword arguments appropriate for read_csv kws = keywords(pd.read_csv) options = toolz.merge(dialect, kwargs) options = toolz.keyfilter(lambda k: k in kws, options) if b.open == gzip.open: options['compression'] = 'gzip' return pd.read_csv(b.path, skiprows=1 if b.header else 0, dtype=dtypes, parse_dates=datenames, names=b.columns, **options)
async def create_notifications(self, notifications): results = [] # create non-existant users before creating notifications usernames = [] for notification in notifications: usernames.append(notification['to_username']) usernames.append(notification.get('from_username')) usernames = set(u for u in usernames if u) results.append(await self.create_users(usernames)) # group notifications by keys to allow multi-row inserts # grouped_notifications = toolz.groupby(lambda x: tuple(x.keys()), # notifications) # logger.debug('create_notifications', # notification_count=len(notifications), # group_count=len(grouped_notifications.keys())) #futures = [] wwwpoll_columns = set(c.name for c in wwwpoll_table.c._all_columns) async with self.async_engine.acquire() as conn: for n in notifications: results.append(await conn.execute( notifications_table.insert().values(**n))) n2 = toolz.keyfilter(lambda k: k in wwwpoll_columns, n) results.append(await conn.execute(wwwpoll_table.insert().values(**n2) )) return all(results)
def resource_bcolz(uri, dshape=None, **kwargs): if os.path.exists(uri): return ctable(rootdir=uri) else: if not dshape: raise ValueError("Must specify either existing bcolz directory or" "valid datashape") dshape = datashape.dshape(dshape) dt = datashape.to_numpy_dtype(dshape) x = np.empty(shape=(0,), dtype=dt) if datashape.predicates.isrecord(dshape.measure): return ctable(x, rootdir=uri, **keyfilter(keywords.__contains__, kwargs)) else: return carray(x, rootdir=uri, **keyfilter(keywords.__contains__, kwargs))
def to_frame(self, columns=None): """ Make a DataFrame with the given columns. Parameters ---------- columns : sequence, optional Sequence of the column names desired in the DataFrame. If None all columns are returned, including registered columns. Returns ------- frame : pandas.DataFrame """ extra_cols = _columns_for_table(self.name) if columns: local_cols = [c for c in self._frame.columns if c in columns and c not in extra_cols] extra_cols = toolz.keyfilter(lambda c: c in columns, extra_cols) df = self._frame[local_cols].copy() else: df = self._frame.copy() for name, col in extra_cols.items(): df[name] = col() return df
def pick(whitelist, d): sub = toolz.keyfilter( lambda key: key in whitelist, d) if isinstance(d, DD): return DD(sub) else: return sub
def _collect_injectables(names): """ Find all the injectables specified in `names`. Parameters ---------- names : list of str Returns ------- injectables : dict Keys are the names, values are wrappers if the injectable is a table. If it's a plain injectable the value itself is given or the injectable function is evaluated. """ names = set(names) dicts = toolz.keyfilter(lambda x: x in names, toolz.merge(_INJECTABLES, _TABLES)) if set(dicts.keys()) != names: raise KeyError('not all injectables found. ' 'missing: {}'.format(names - set(dicts.keys()))) for name, thing in dicts.items(): if isinstance(thing, _InjectableFuncWrapper): dicts[name] = thing() elif isinstance(thing, _TableSourceWrapper): dicts[name] = thing.convert() return dicts
def append_table_to_csv(csv, selectable, dshape=None, **kwargs): kwargs = keyfilter(keywords(CopyToCSV).__contains__, merge(csv.dialect, kwargs)) stmt = CopyToCSV(selectable, os.path.abspath(csv.path), **kwargs) with selectable.bind.connect() as conn: conn.execute(stmt) return csv
def test_access_data(): """Demonstrate and test access patterns within the data island.""" binders = { "Polyethylene Glycol 100M": 0.02, "Sodium lignosulfonate": 0.004, "Polyvinyl Acetate": 0.0001 } powders = {"Al2O3": 0.96} island = make_data_island( density=1.0, bulk_modulus=300.0, firing_temperature=750.0, binders=binders, powders=powders, tag="Me" ) # read the density value assert(island.measurements[0].properties[0].value == NominalReal(1.0, '')) # read the bulk modulus value assert(island.measurements[0].properties[1].value == NormalReal(300.0, 3.0, '')) # read the firing temperature assert(island.process.conditions[0].value == UniformReal(749.5, 750.5, 'degC')) assert(island.process.parameters[0].value == DiscreteCategorical({"hot": 1.0})) # read the quantity of alumina quantities = island.process.ingredients[0].material.process.conditions[0].value.quantities assert(list( keyfilter(lambda x: x == "Al2O3", quantities).values() )[0] == 0.96) # check that the serialization results in the correct number of objects in the preface # (note that neither measurements nor ingredients are serialized) assert(len(json.loads(dumps(island))["context"]) == 26)
def handle_db_get(request): try: db_method, query_params = parse_uri(request) if db_method == '/get': response_data = keyfilter( lambda key: key in query_params, memdb, ) if db_method == '/set': values = parse_set_query_params(query_params) mutate_merge(memdb, values) response_data = ["success"] body = json.dumps(response_data) response = Response( status=200, body=body, ) except ValueError as e: response = Response( status=500, body="Server Error", ) print(e) return response.to_string()
def to_be_updated(self): left = dict() existing_assets = self._retrieve_assets_from_sqlite() # update equity -- list equities = self._request_equities() left['equity'] = set(equities) - set(existing_assets.get('equity', [])) print('update equities', len(left['equity'])) # update convertible --- dict contain basics convertibles = self._request_convertibles() # print('full convertible', len(convertibles.keys())) update_convertibles = set(convertibles) - set( existing_assets.get('convertible', [])) left['convertible'] = keyfilter(lambda x: x in update_convertibles, convertibles) print('update convertibles', len(left['convertible'])) # update funds -- frame contain basics fund = self._request_funds() # print('full fund', len(fund)) update_funds = set(fund['基金代码'].values) - set( existing_assets.get('fund', [])) fund_frame = fund[fund['基金代码'].isin( update_funds)] if update_funds else pd.DataFrame() left['fund'] = fund_frame print('update funds', len(left['fund'])) # update duals duals = self._request_duals() # print('dual', duals) left['dual'] = duals return left
def _collect_injectables(names): """ Find all the injectables specified in `names`. Parameters ---------- names : list of str Returns ------- injectables : dict Keys are the names, values are wrappers if the injectable is a table. If it's a plain injectable the value itself is given or the injectable function is evaluated. """ names = set(names) dicts = toolz.keyfilter( lambda x: x in names, toolz.merge(_INJECTABLES, _TABLES)) if set(dicts.keys()) != names: raise KeyError( 'not all injectables found. ' 'missing: {}'.format(names - set(dicts.keys()))) for name, thing in dicts.items(): if isinstance(thing, _InjectableFuncWrapper): dicts[name] = thing() elif isinstance(thing, _TableSourceWrapper): dicts[name] = thing.convert() return dicts
def details(lat_lon): """ Gives more details about a lat_lon from the lat_lons function. """ return {'location': lat_lon, # we remove the events field because it contains some user's name 'data': keyfilter(lambda k: k != 'events', requests.post(DETAILS, data=lat_lon).json())}
def append_table_to_csv(csv, selectable, dshape=None, **kwargs): kwargs = keyfilter(keywords(CopyToCSV).__contains__, merge(csv.dialect, kwargs)) stmt = CopyToCSV(selectable, os.path.abspath(csv.path), **kwargs) with selectable.bind.begin() as conn: conn.execute(stmt) csv.has_header = stmt.header return csv
def _resnet(arch, block, layers, pretrained, progress, **kwargs): model = ResNet(block, layers, **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) if not model.norm_layer_kwargs.get('track_running_stats', True): state_dict = keyfilter(lambda k: 'running' not in k, state_dict) model.load_state_dict(state_dict) return model
def into(a, b, **kwargs): if isinstance(a, type): kwargs = keyfilter(carray_keywords.__contains__, kwargs) return carray(b, **kwargs) else: a.append(b) a.flush() return a
def process_other_params(cls, forms): # type: (dict) -> dict la = [ 'app', 'flashver', 'swfurl', 'tcurl', 'pageurl', 'addr', 'clientid', 'call', 'name', 'type' ] other_params = keyfilter(lambda x: x not in la, forms) # type: dict return other_params
def _build_particles(stream_name, parameters, data): subset = keyfilter(lambda k: k in parameters, data) grouped = OmsExtractor._group_by_timestamp(subset) particles = [] for timestamp, attrs in grouped.iteritems(): attrs = OmsExtractor._convert_attrs_to_ion(parameters, attrs) particles.append(OmsExtractor._build_particle(stream_name, timestamp, attrs)) return particles
def get_test_cases(task): kwarglist = toolz.keyfilter(lambda x: x != "return", task.run.__annotations__) if kwarglist: value = next(kwarglist.itervalues()) return [toolz.valmap(lambda x: x[i], kwarglist) for i in xrange(len(value))] else: return [{}]
def into(a, b, **kwargs): kwargs = keyfilter(carray_keywords.__contains__, kwargs) chunks = partition_all(1024, b) chunk = next(chunks) a = into(a, chunk, **kwargs) for chunk in chunks: a.append(list(zip(*chunk))) a.flush() return a
def _csv_to_DataFrame(c, dshape=None, chunksize=None, **kwargs): has_header = kwargs.pop('has_header', c.has_header) if has_header is False: header = None elif has_header is True: header = 0 else: header = 'infer' sep = kwargs.pop('sep', kwargs.pop('delimiter', c.dialect.get('delimiter', ','))) encoding = kwargs.get('encoding', c.encoding) if dshape: dtypes, parse_dates = dshape_to_pandas(dshape) if isrecord(dshape.measure): names = kwargs.get('names', dshape.measure.names) else: names = kwargs.get('names') else: dtypes = parse_dates = names = None usecols = kwargs.pop('usecols', None) if parse_dates and usecols: parse_dates = [col for col in parse_dates if col in usecols] compression = kwargs.pop('compression', {'gz': 'gzip', 'bz2': 'bz2'}.get(ext(c.path))) # See read_csv docs for header for reasoning if names: try: found_names = pd.read_csv(c.path, encoding=encoding, compression=compression, nrows=1) except StopIteration: found_names = pd.read_csv(c.path, encoding=encoding, compression=compression) if names and header == 'infer': if [n.strip() for n in found_names] == [n.strip() for n in names]: header = 0 elif (all(re.match('^\s*\D\w*\s*$', n) for n in found_names) and not all(dt == datashape.string for dt in dshape.measure.types)): header = 0 else: header = None kwargs2 = keyfilter(keywords(pandas.read_csv).__contains__, kwargs) return pandas.read_csv(c.path, header=header, sep=sep, encoding=encoding, dtype=dtypes, parse_dates=parse_dates, names=names, compression=compression, chunksize=chunksize, usecols=usecols, **kwargs2)
def retrieve_type_assets(self, category): if category == 'fund': fund_assets = keyfilter( lambda x: x not in ['equity', 'convertible'], self._asset_type_cache) category_assets = set(chain(*fund_assets.values())) else: category_assets = self._asset_type_cache[category] return category_assets
def into(a, b, **kwargs): kwargs = keyfilter(keywords(ctable).__contains__, kwargs) chunks = partition_all(1024, b) chunk = next(chunks) a = into(a, chunk, **kwargs) for chunk in chunks: a.append(list(zip(*chunk))) a.flush() return a
def freq_features(w, w_split, lexicon, corpora, pref=True): dataf = f'/home/adam/Documents/lexical_blends_project/lexicon_wordlists/{lexicon}_{corpora}_wordlist_f.pickle' with open(dataf, 'rb') as f: freqd = pickle.load(f) wsum, affsum = freqd[w], 0 corpus_sum = sum(freqd.values()) if pref: affsum = sum( keyfilter(lambda x: x.startswith(w_split), freqd).values()) else: affsum = sum(keyfilter(lambda x: x.endswith(w_split), freqd).values()) w1f = wsum / affsum if affsum > 0 else 0.0 c1f = wsum / corpus_sum return [w1f, c1f]
def _make_sales_order(customer_id, **kwargs): args = keyfilter(lambda x: x in ["transaction_date", "customer_address"], kwargs) doc = frappe.get_doc( merge( { "doctype": "Sales Order", "customer": customer_id, "order_type": "Shopping Cart", "company": frappe.defaults.get_user_default("company"), "currency": frappe.defaults.get_user_default("currency"), "selling_price_list": frappe.get_cached_value("Selling Settings", None, "selling_price_list"), }, args, )) warehouse = frappe.get_cached_value("Stock Settings", None, "default_warehouse") for item_args in json.loads(kwargs.get("items", "[]")): doc.append( "items", merge( keyfilter(lambda x: x in ["item_code", "qty", "rate"], item_args), { "warehouse": warehouse, "uom": frappe.get_cached_value("Item", item_args.get("item_code"), "stock_uom"), }, ), ) doc.set_missing_values() return doc
def _csv_to_dataframe(c, dshape=None, chunksize=None, **kwargs): header = { False: None, True: 0 }.get(kwargs.pop('has_header', c.has_header), 'infer') sep = kwargs.pop('sep', kwargs.pop('delimiter', c.dialect.get('delimiter', ','))) encoding = kwargs.pop('encoding', c.encoding) if dshape: dtypes, parse_dates = dshape_to_pandas(dshape) if isrecord(dshape.measure): names = kwargs.get('names', dshape.measure.names) else: names = kwargs.get('names') else: dtypes = parse_dates = names = None usecols = kwargs.pop('usecols', None) if parse_dates and usecols: parse_dates = [col for col in parse_dates if col in usecols] # See read_csv docs for header for reasoning if names: try: with c.open() as f: found_names = pd.read_csv(f, nrows=1, encoding=encoding, sep=sep) except StopIteration: with c.open() as f: found_names = pd.read_csv(f, encoding=encoding, sep=sep) if names and header == 'infer': if [n.strip() for n in found_names] == [n.strip() for n in names]: header = 0 elif (all(re.match('^\s*\D\w*\s*$', n) for n in found_names) and not all(dt == datashape.string for dt in dshape.measure.types)): header = 0 else: header = None kwargs = keyfilter(keywords(pd.read_csv).__contains__, kwargs) with c.open() as f: return pd.read_csv(f, header=header, sep=sep, encoding=encoding, dtype=dtypes, parse_dates=parse_dates, names=names, chunksize=chunksize, usecols=usecols, **kwargs)
def fextract(keys: Collection, data_dict: Dict) -> Dict: """ Extracts (key, value) pairs from d if key is in keys :param keys: a collection, should support __contains__ :param data_dict: a data dictionary :return: a data dictionary """ return keyfilter( lambda k: k in keys, data_dict )
def process_two( data: Tuple, fieldfunc: Callable = lambda x: x.startswith("departure")) -> int: rules, yours, nearby = data valid = validate_tickets(rules, nearby)[0] interpreted = interpret_tickets(rules, valid) departure_fields = keyfilter(fieldfunc, interpreted) answer = reduce(mul, [yours[f[0]] for f in departure_fields.values()]) return answer
def get_test_cases(task): kwarglist = toolz.keyfilter(lambda x: x != "return", task.run.__annotations__) if kwarglist: value = next(kwarglist.itervalues()) return [ toolz.valmap(lambda x: x[i], kwarglist) for i in xrange(len(value)) ] else: return [{}]
def append_table_to_csv(csv, selectable, dshape=None, bind=None, **kwargs): kwargs = keyfilter( keywords(CopyToCSV).__contains__, merge(csv.dialect, kwargs)) stmt = CopyToCSV(selectable, os.path.abspath(csv.path), bind=bind, **kwargs) with getbind(selectable, bind).begin() as conn: conn.execute(stmt) return csv
def test_curried_namespace(): namespace = {} def should_curry(func): if not callable(func) or isinstance(func, curry): return False nargs = num_required_args(func) if nargs is None or nargs > 1: return True else: return nargs == 1 and has_keywords(func) def curry_namespace(ns): return dict(( name, curry(f) if should_curry(f) else f, ) for name, f in ns.items() if '__' not in name) all_auto_curried = curry_namespace(vars(eth_utils)) inferred_namespace = valfilter(callable, all_auto_curried) curried_namespace = valfilter(callable, eth_utils.curried.__dict__) if inferred_namespace != curried_namespace: missing = set(inferred_namespace) - set(curried_namespace) if missing: to_insert = sorted("%s," % f for f in missing) raise AssertionError( 'There are missing functions in eth_utils.curried:\n' + '\n'.join(to_insert)) extra = set(curried_namespace) - set(inferred_namespace) if extra: raise AssertionError( 'There are extra functions in eth_utils.curried:\n' + '\n'.join(sorted(extra))) unequal = merge_with(list, inferred_namespace, curried_namespace) unequal = valfilter(lambda x: x[0] != x[1], unequal) to_curry = keyfilter(lambda x: should_curry(getattr(eth_utils, x)), unequal) if to_curry: to_curry_formatted = sorted('{0} = curry({0})'.format(f) for f in to_curry) raise AssertionError( 'There are missing functions to curry in eth_utils.curried:\n' + '\n'.join(to_curry_formatted)) elif unequal: not_to_curry_formatted = sorted(unequal) raise AssertionError( 'Missing functions NOT to curry in eth_utils.curried:\n' + '\n'.join(not_to_curry_formatted)) else: raise AssertionError("unexplained difference between %r and %r" % ( inferred_namespace, curried_namespace, ))
def fremove(keys: Collection, data_dict: Dict) -> Dict: """ Remove keys according to a list :param keys: :param data_dict: :return: """ return keyfilter( lambda k: k not in keys, data_dict )
def _csv_to_dataframe(c, dshape=None, chunksize=None, **kwargs): header = {False: None, True: 0}.get( kwargs.pop('has_header', c.has_header), 'infer') sep = kwargs.pop( 'sep', kwargs.pop('delimiter', c.dialect.get('delimiter', ','))) encoding = kwargs.pop('encoding', c.encoding) if dshape: dtypes, parse_dates = dshape_to_pandas(dshape) if isrecord(dshape.measure): names = kwargs.get('names', dshape.measure.names) else: names = kwargs.get('names') else: dtypes = parse_dates = names = None usecols = kwargs.pop('usecols', None) if parse_dates and usecols: parse_dates = [col for col in parse_dates if col in usecols] # See read_csv docs for header for reasoning if names: try: with c.open() as f: found_names = pd.read_csv(f, nrows=1, encoding=encoding, sep=sep) except StopIteration: with c.open() as f: found_names = pd.read_csv(f, encoding=encoding, sep=sep) if names and header == 'infer': if [n.strip() for n in found_names] == [n.strip() for n in names]: header = 0 elif (all(re.match('^\s*\D\w*\s*$', n) for n in found_names) and not all(dt == datashape.string for dt in dshape.measure.types)): header = 0 else: header = None kwargs = keyfilter(keywords(pd.read_csv).__contains__, kwargs) with c.open() as f: return pd.read_csv(f, header=header, sep=sep, encoding=encoding, dtype=dtypes, parse_dates=parse_dates, names=names, chunksize=chunksize, usecols=usecols, **kwargs)
def clean_dir_info(obj): """ If needing to display a dictionary of some class that doesnt have __dict__, the boltons.dir_dict will work, but this cleans it up for serializing """ try: d = dir_dict(obj) except Exception: d = dict() d1 = keyfilter(complement(_is_dunder), d) return dictfilter(d1, factory = AttributeDict)
def flookup(lookup_map: Dict, keys: Collection, data_dict: Dict) -> Dict: result = keyfilter(lambda k: k not in keys, data_dict) for key in keys: if key in data_dict: if data_dict[key] in lookup_map: result[key] = lookup_map[data_dict[key]] # found ! :D else: result[key] = None # not found :'( else: pass # field does not exist :/ return result
def into(a, b, **kwargs): b = iter(b) if isinstance(a, type): x = into(np.ndarray, next(b), **kwargs) kwargs2 = keyfilter(carray_keywords.__contains__, kwargs) a = a(x, **kwargs2) for chunk in b: x = into(np.ndarray, chunk, **kwargs) a.append(x) a.flush() a.flush() return a
def _csv_to_dataframe(c, dshape=None, chunksize=None, **kwargs): header = {False: None, True: 0}.get(kwargs.pop("has_header", c.has_header), "infer") sep = kwargs.pop("sep", kwargs.pop("delimiter", c.dialect.get("delimiter", ","))) encoding = kwargs.pop("encoding", c.encoding) if dshape: dtypes, parse_dates = dshape_to_pandas(dshape) if isrecord(dshape.measure): names = kwargs.get("names", dshape.measure.names) else: names = kwargs.get("names") else: dtypes = parse_dates = names = None usecols = kwargs.pop("usecols", None) if parse_dates and usecols: parse_dates = [col for col in parse_dates if col in usecols] compression = kwargs.pop("compression", {"gz": "gzip", "bz2": "bz2"}.get(ext(c.path))) # See read_csv docs for header for reasoning if names: try: found_names = pd.read_csv(c.path, encoding=encoding, compression=compression, nrows=1) except StopIteration: found_names = pd.read_csv(c.path, encoding=encoding, compression=compression) if names and header == "infer": if [n.strip() for n in found_names] == [n.strip() for n in names]: header = 0 elif all(re.match("^\s*\D\w*\s*$", n) for n in found_names) and not all( dt == datashape.string for dt in dshape.measure.types ): header = 0 else: header = None kwargs = keyfilter(keywords(pd.read_csv).__contains__, kwargs) return pd.read_csv( c.path, header=header, sep=sep, encoding=encoding, dtype=dtypes, parse_dates=parse_dates, names=names, compression=compression, chunksize=chunksize, usecols=usecols, **kwargs )
def get_prox_infos(proxes, keys=None): if keys is None: keys = ['time', 'iter'] pred = lambda x: x in keys out = [] for prox in proxes: if hasattr(prox, 'info'): out += [keyfilter(pred, prox.info)] else: out += [None] return out
def from_transposon_fusion(cls, fusion, id_=None, drop_metadata=None): """Converts (annotated) transposon fusion to an insertion. Requires Parameters ---------- id : str ID to use for the insertion. Returns ------- Insertion Insertion object derived from the transposon fusion. """ if drop_metadata is None: drop_metadata = {'strand_genome', 'strand_transposon'} # Fusion should have annotated transposon feature. if 'feature_name' not in fusion.metadata: raise ValueError( 'Fusion does not have an annotated transposon feature') strand = fusion.strand_genome * fusion.strand_transposon if 'gene_strand' in fusion.metadata: gene_strand = fusion.metadata['gene_strand'] orientation = 'sense' if strand == gene_strand else 'antisense' else: orientation = None ins_metadata = toolz.keyfilter(lambda k: k not in drop_metadata, fusion.metadata) ins_metadata['transposon_anchor'] = fusion.anchor_transposon if orientation is not None: ins_metadata['orientation'] = orientation return Insertion( id=id_, seqname=fusion.seqname, position=fusion.anchor_genome, strand=strand, support_junction=fusion.support_junction, support_spanning=fusion.support_spanning, support=fusion.support, metadata=frozendict(ins_metadata))
def dataset_from_dshape(file, datapath, ds, **kwargs): dtype = varlen_dtype(to_numpy_dtype(ds)) if datashape.var not in list(ds): shape = to_numpy(ds)[0] elif len(ds.shape) == 1: shape = (0,) else: raise ValueError("Don't know how to handle varlen nd shapes") if shape: kwargs['chunks'] = kwargs.get('chunks', True) kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:]) kwargs2 = keyfilter(h5py_attributes.__contains__, kwargs) return file.require_dataset(datapath, shape=shape, dtype=dtype, **kwargs2)
def resource_sql(uri, *args, **kwargs): kwargs2 = keyfilter(keywords(sa.create_engine).__contains__, kwargs) engine = sa.create_engine(uri, **kwargs2) if args and isinstance(args[0], _strtypes): table_name, args = args[0], args[1:] metadata = sa.MetaData(engine) metadata.reflect() if table_name not in metadata.tables: if 'dshape' in kwargs: t = dshape_to_table(table_name, kwargs['dshape'], metadata) t.create() return t else: raise ValueError("Table does not exist and no dshape provided") return metadata.tables[table_name] else: return engine
def filter_kwargs(f, kwargs): """Return a dict of valid kwargs for `f` from a subset of `kwargs` Examples -------- >>> def f(a, b=1, c=2): ... return a + b + c ... >>> raw_kwargs = dict(a=1, b=3, d=4) >>> f(**raw_kwargs) Traceback (most recent call last): ... TypeError: f() got an unexpected keyword argument 'd' >>> kwargs = filter_kwargs(f, raw_kwargs) >>> f(**kwargs) 6 """ return keyfilter(keywords(f).__contains__, kwargs)
def append_table_to_csv(csv, selectable, dshape=None, bind=None, **kwargs): kwargs = keyfilter(keywords(CopyToCSV).__contains__, merge(csv.dialect, kwargs)) stmt = CopyToCSV( selectable, os.path.abspath(csv.path) if csv.path is not None else None, bind=bind, **kwargs ) bind = getbind(selectable, bind) if bind.dialect.name == 'postgresql': with csv.open('ab+') as f: with bind.begin() as conn: conn.connection.cursor().copy_expert(literal_compile(stmt), f) else: with bind.begin() as conn: conn.execute(stmt) return csv
def resource_sql(uri, *args, **kwargs): kwargs2 = keyfilter(keywords(sa.create_engine).__contains__, kwargs) engine = create_engine(uri, **kwargs2) ds = kwargs.get('dshape') if args and isinstance(args[0], str): table_name, args = args[0], args[1:] metadata = metadata_of_engine(engine) metadata.reflect(views=engine.dialect.supports_views) if table_name not in metadata.tables: if ds: t = dshape_to_table(table_name, ds, metadata) t.create() return t else: raise ValueError("Table does not exist and no dshape provided") return metadata.tables[table_name] if ds: create_from_datashape(engine, ds) return engine
def _get_broadcasts(tables): """ Get the broadcasts associated with a set of tables. Parameters ---------- tables : sequence of str Table names for which broadcasts have been registered. Returns ------- casts : dict of `_Broadcast` Keys are tuples of strings like (cast_name, onto_name). """ tables = set(tables) casts = toolz.keyfilter( lambda x: x[0] in tables and x[1] in tables, _BROADCASTS) if tables - set(toolz.concat(casts.keys())): raise ValueError('Not enough links to merge all tables.') return casts
def to_frame(self, columns=None): """ Make a DataFrame with the given columns. Will always return a copy of the underlying table. Parameters ---------- columns : sequence, optional Sequence of the column names desired in the DataFrame. If None all columns are returned, including registered columns. Returns ------- frame : pandas.DataFrame """ extra_cols = _columns_for_table(self.name) if columns: local_cols = [c for c in self.local.columns if c in columns and c not in extra_cols] extra_cols = toolz.keyfilter(lambda c: c in columns, extra_cols) df = self.local[local_cols].copy() else: df = self.local.copy() with log_start_finish( 'computing {!r} columns for table {!r}'.format( len(extra_cols), self.name), logger): for name, col in extra_cols.items(): with log_start_finish( 'computing column {!r} for table {!r}'.format( name, self.name), logger): df[name] = col() return df
def extract_junction_fusions(chimeric_data, merge_dist=None): """Extracts junction fusions from a STAR chimeric read dataframe.""" # Ensure chimeric data only contains junction reads. chimeric_data = chimeric_data.query('junction_type >= 0') if len(chimeric_data) > 0: # Add flanking annotation. chimeric_data = chimeric_data.copy() flanks = (_flank_sizes(t) for t in chimeric_data.itertuples()) chimeric_data['flank_a'], chimeric_data['flank_b'] = zip(*flanks) # Group by position and summarize. grouped = chimeric_data.groupby([ 'seqname_a', 'location_a', 'strand_a', 'seqname_b', 'location_b', 'strand_b' ]) summarized = (grouped.agg({ 'flank_a': 'max', 'flank_b': 'max', 'read_name': 'nunique' }).reset_index().assign(support_spanning=0) .rename(columns={'read_name': 'support_junction'})) # Transform to Fusions. fusions = (Fusion(**toolz.keyfilter(lambda k: k not in {'Index'}, row._asdict())) for row in summarized.itertuples()) # Merge fusions within dist. if merge_dist is not None: fusions = Fusion.merge(fusions, max_dist=merge_dist) for fusion in fusions: yield fusion
def make_wrapper(func): # Step 1. Wrap this function in apply_jit # from apply_jit # Get the input arguments from the function in_args = inspect.getargspec(func).args try: jit_args = inspect.getargspec(jit).args + ['nopython'] except TypeError: # print ("This should only be seen in RTD, if not install numba!") return func kwargs_for_func = toolz.keyfilter(in_args.__contains__, kwargs) kwargs_for_jit = toolz.keyfilter(jit_args.__contains__, kwargs) # Any name that is a taxcalc parameter (or the special case 'puf' # Boolean flag is given special treatment. Identify those names here dd_key_list = list(Parameters.default_data(metadata=True).keys()) allowed_parameters = dd_key_list allowed_parameters += list(arg[1:] for arg in dd_key_list) allowed_parameters.append("puf") additional_parameters = [arg for arg in in_args if arg in allowed_parameters] additional_parameters += parameters # Remote duplicates all_parameters = list(set(additional_parameters)) src = inspect.getsourcelines(func)[0] # Discover the return arguments by walking # the AST of the function all_returned_vals = [] gnr = GetReturnNode() all_out_args = None for node in ast.walk(ast.parse(''.join(src))): all_out_args = gnr.visit(node) if all_out_args: break if not all_out_args: raise ValueError("Can't find return statement in function!") # Now create the apply jitted function applied_jitted_f = make_apply_function(func, list(reversed(all_out_args)), in_args, parameters=all_parameters, do_jit=True, **kwargs_for_jit) def wrapper(*args, **kwargs): in_arrays = [] out_arrays = [] pm_or_pf = [] for farg in all_out_args + in_args: if hasattr(args[0], farg): in_arrays.append(getattr(args[0], farg)) pm_or_pf.append("pm") elif hasattr(args[1], farg): in_arrays.append(getattr(args[1], farg)) pm_or_pf.append("pf") elif farg not in kwargs_for_func: raise ValueError("Unknown arg: " + farg) # Create the high level function high_level_func = create_toplevel_function_string(all_out_args, list(in_args), pm_or_pf, kwargs_for_func) func_code = compile(high_level_func, "<string>", "exec") fakeglobals = {} eval(func_code, {"applied_f": applied_jitted_f}, fakeglobals) high_level_fn = fakeglobals['hl_func'] ans = high_level_fn(*args, **kwargs) return ans return wrapper
def convert_numpy_to_bcolz_carray(x, **kwargs): return carray(x, **keyfilter(keywords.__contains__, kwargs))