def test_normalize_function(): def f1(a, b, c=1): pass def f2(a, b=1, c=2): pass def f3(a): pass assert normalize_function(f2) f = lambda a: a assert normalize_function(f) assert (normalize_function(partial(f2, b=2)) == normalize_function(partial(f2, b=2))) assert (normalize_function(partial(f2, b=2)) != normalize_function(partial(f2, b=3))) assert (normalize_function(partial(f1, b=2)) != normalize_function(partial(f2, b=2))) assert (normalize_function(compose(f2, f3)) == normalize_function(compose(f2, f3))) assert (normalize_function(compose(f2, f3)) != normalize_function(compose(f2, f1))) assert normalize_function(curry(f2)) == normalize_function(curry(f2)) assert normalize_function(curry(f2)) != normalize_function(curry(f1)) assert (normalize_function(curry(f2, b=1)) == normalize_function(curry(f2, b=1))) assert (normalize_function(curry(f2, b=1)) != normalize_function(curry(f2, b=2)))
def _tree_reduce(x, aggregate, axis, keepdims, dtype, split_every=None, combine=None): """Perform the tree reduction step of a reduction. Lower level, users should use ``reduction`` or ``arg_reduction`` directly. """ # Normalize split_every split_every = split_every or _globals.get('split_every', 4) if isinstance(split_every, dict): split_every = dict((k, split_every.get(k, 2)) for k in axis) elif isinstance(split_every, int): n = builtins.max(int(split_every ** (1/(len(axis) or 1))), 2) split_every = dict.fromkeys(axis, n) else: split_every = dict((k, v) for (k, v) in enumerate(x.numblocks) if k in axis) # Reduce across intermediates depth = 1 for i, n in enumerate(x.numblocks): if i in split_every and split_every[i] != 1: depth = int(builtins.max(depth, ceil(log(n, split_every[i])))) func = compose(partial(combine or aggregate, axis=axis, keepdims=True), partial(_concatenate2, axes=axis)) for i in range(depth - 1): x = partial_reduce(func, x, split_every, True, None) func = compose(partial(aggregate, axis=axis, keepdims=keepdims), partial(_concatenate2, axes=axis)) return partial_reduce(func, x, split_every, keepdims=keepdims, dtype=dtype)
def _tree_reduce(x, aggregate, axis, keepdims, dtype, split_every=None, combine=None, name=None, concatenate=True): """ Perform the tree reduction step of a reduction. Lower level, users should use ``reduction`` or ``arg_reduction`` directly. """ # Normalize split_every split_every = split_every or config.get('split_every', 4) if isinstance(split_every, dict): split_every = dict((k, split_every.get(k, 2)) for k in axis) elif isinstance(split_every, Integral): n = builtins.max(int(split_every ** (1 / (len(axis) or 1))), 2) split_every = dict.fromkeys(axis, n) else: raise ValueError("split_every must be a int or a dict") # Reduce across intermediates depth = 1 for i, n in enumerate(x.numblocks): if i in split_every and split_every[i] != 1: depth = int(builtins.max(depth, ceil(log(n, split_every[i])))) func = partial(combine or aggregate, axis=axis, keepdims=True) if concatenate: func = compose(func, partial(_concatenate2, axes=axis)) for i in range(depth - 1): x = partial_reduce(func, x, split_every, True, dtype=dtype, name=(name or funcname(combine or aggregate)) + '-partial') func = partial(aggregate, axis=axis, keepdims=keepdims) if concatenate: func = compose(func, partial(_concatenate2, axes=axis)) return partial_reduce(func, x, split_every, keepdims=keepdims, dtype=dtype, name=(name or funcname(aggregate)) + '-aggregate')
def get_service_step(service_recipe): """ Get step timedelta: The smaller duration of service_recipe's periods. """ def diff(start, end): return end - start res_delta_diffs = compose(map(lambda p: diff(*p)), get('delta_periods')) return compose(min, map(min), map(res_delta_diffs))(service_recipe)
def _common(self, Z, y): scale = Scaler(Z) transform = compose(prepend_x0, Scaler.normalize) X = transform(scale) data = zip(X, y) h_theta0 = [0.] * len(X[0]) coeff = compose(scale.denormalize, get(0), lin_reg(J, gradJ, h_theta0, it_max=2000)) h_thetad = coeff(data) return h_thetad
def zhongji(ip='', username='', password=''): try: result = [] child = telnet(ip, username, password) child.sendline( "display cu section bbs-config | in link-aggregation") while True: index = child.expect([hw_prompt, hw_pager], timeout=120) if index == 0: result.append(child.before) child.sendline('quit') child.expect(':') child.sendline('y') child.close() break else: result.append(child.before) child.send(" ") continue except (pexpect.EOF, pexpect.TIMEOUT) as e: return ['fail', None, ip] rslt = ''.join(result).split('\r\n')[1:-1] rec = [x.replace('\x1b[37D', '').strip().split()[2:] for x in rslt if 'add-member' in x] def port(x): p = x[2].split(',') p1 = ['/'.join((x[1], y)) for y in p] return list(cons(x[0], p1)) ff = lambda x, y: merge_with(compose(unique, concat), x, y) rec1 = [port(x) for x in rec] rec2 = [{x[0]: x} for x in rec1] rec3 = reduce(ff, rec2, dict()) return ['success', rec3, ip]
def parse_people(do_request): logger.info('Parsing people') def parse_representative(doc): doc = doc('div.wpsPortletBody') raw_birth_date = doc('fieldset table').eq(0).find('td').eq(1).text().replace(' ', '') return { 'name': doc.find('h3').eq(0).text(), 'birthDate': arrow.get(raw_birth_date, 'D.M.YYYY') if raw_birth_date else None, 'image': DZ_RS_URL + doc.find('img').eq(0).attr('src'), 'group': doc('.panelBox100 a').attr('href'), 'location': doc(u'*:contains("Volilno okro")').parent().text().split(':')[1].strip(), 'gender': "F" if 'Poslanka' in str(doc) else "M", } # get all people return toolz.compose( # get back metadata curried.map(parse_representative), # visit person's link curried.map(do_request), # get a link for each person lambda doc: doc("p.podnaslovOsebaLI a").map(lambda i, r: pq(r).attr('href')), # get page with a list of people do_request, )(DZ_RS_PEOPLE_URL)
def doctable(ctx): df = pd.read_csv('./docs/flight-options.csv') # open an existing document doc = docx.Document('./docs/style-reference.docx') as_int = partial(format_decimal, format='#') as_usd = partial(format_currency, currency='USD') s = doc.sections[0] width = s.page_width - s.left_margin - s.right_margin doc.add_picture('./docs/diagrams_002.png', width=width) formatters = { 'ticket_price': as_usd, 'total_hours': as_int, 'trip': as_int, 'airline': partial(shorten_long_name, width=20), 'selected': compose({0: 'No', 1: 'Yes'}.get, int) } add_table(df, doc, table_style='Plain Table 3', formatters=formatters) # save the doc doc.save('./docs/test.docx')
def create_store(reducer, initial_state=None): store = yield from next_handler(reducer, initial_state) dispatch = store.dispatch middleware_api = dict(dispatch=lambda action: store.dispatch(action), state_func=lambda: store.state) chain = map(lambda middleware: middleware(**middleware_api), middlewares) store.dispatch = toolz.compose(*chain)(dispatch) return store
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None): """ General version of reductions >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False) # doctest: +SKIP """ if axis is None: axis = tuple(range(x.ndim)) if isinstance(axis, int): axis = (axis,) chunk2 = partial(chunk, axis=axis, keepdims=True) aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims) inds = tuple(range(x.ndim)) tmp = atop(chunk2, next(names), inds, x, inds) inds2 = tuple(i for i in inds if i not in axis) result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)), next(names), inds2, tmp, inds, dtype=dtype) if keepdims: dsk = result.dask.copy() for k in flatten(result._keys()): k2 = (k[0],) + insert_many(k[1:], axis, 0) dsk[k2] = dsk.pop(k) blockdims = insert_many(result.blockdims, axis, [1]) return Array(dsk, result.name, blockdims=blockdims, dtype=dtype) else: return result
def interface_check(): clear_log() cmd = "match(s:Switch) where s.model='T64G' or s.model='S9306' or s.model='S9303' or s.model='S8905' return s.ip,s.model" # cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2" nodes = graph.cypher.execute(cmd) switchs = [(x[0], x[1]) for x in nodes] list(map(compose(output_interface, get_interface), switchs))
def test_quantiles_uneven_buckets(self): permute = partial(permute_rows, 5) shape = (5, 5) factor_data = permute(log1p(arange(25, dtype=float).reshape(shape))) mask_data = permute(self.eye_mask(shape=shape)) f = F() m = Mask() permuted_array = compose(permute, partial(array, dtype=int64_dtype)) self.check_terms( terms={ '3_masked': f.quantiles(bins=3, mask=m), '7_masked': f.quantiles(bins=7, mask=m), }, initial_workspace={ f: factor_data, m: mask_data, }, expected={ '3_masked': permuted_array([[-1, 0, 0, 1, 2], [0, -1, 0, 1, 2], [0, 0, -1, 1, 2], [0, 0, 1, -1, 2], [0, 0, 1, 2, -1]]), '7_masked': permuted_array([[-1, 0, 2, 4, 6], [0, -1, 2, 4, 6], [0, 2, -1, 4, 6], [0, 2, 4, -1, 6], [0, 2, 4, 6, -1]]), }, mask=self.build_mask(self.ones_mask(shape=shape)), )
def cli(board_source, key, token, to, output, board): """Hi, I'm TrelloScribe. I take Trello boards and turn them into documents!""" # Compose a sequence of functions based on the options chosen # Note toolz.compose() works right to left read_phase = { 'id': download_board(key, token), 'name': toolz.compose(download_board(key, token), search_boards(key, token)), 'file': read_board } convert_phase = { 'raw': partial(json.dumps, indent=2), 'md': ast_to_md, 'html': toolz.compose(md_to_html, ast_to_md) } toolz.pipe(board, read_phase[board_source], trello_to_ast, convert_phase[to], partial(click.echo, file=output))
def input2output(query: Dict[str, Any], fields: List[str], options: Dict[str, int]) -> None: inout = compose(formated_output(fields), get_output(fields), get_info, make_query(query, fields)) html_text = inout(options) viewoutput(html_text)
def build_task_nodes(files=None, select=None, task_uuid=None, human_readable=True): """ Build the task nodes given some input data, query criteria and formatting options. """ def task_transformers(): if human_readable: yield _convert_timestamp yield json.loads def filter_funcs(): if select is not None: for query in select: yield filter_by_jmespath(query) if task_uuid is not None: yield filter_by_uuid(task_uuid) if files is None: files = [sys.stdin] tree = Tree() tasks = imap(compose(*task_transformers()), chain.from_iterable(files)) return tree.nodes(tree.merge_tasks(tasks, filter_funcs())),
def _lookup_symbol_strict(self, symbol, as_of_date): # split the symbol into the components, if there are no # company/share class parts then share_class_symbol will be empty company_symbol, share_class_symbol = split_delimited_symbol(symbol) try: owners = self.symbol_ownership_map[company_symbol, share_class_symbol] assert owners, "empty owners list for %r" % symbol except KeyError: # no equity has ever held this symbol raise SymbolNotFound(symbol=symbol) if not as_of_date: if len(owners) > 1: # more than one equity has held this ticker, this is ambigious # without the date raise MultipleSymbolsFound( symbol=symbol, options=set(map(compose(self.retrieve_asset, attrgetter("sid")), owners)) ) # exactly one equity has ever held this symbol, we may resolve # without the date return self.retrieve_asset(owners[0].sid) for start, end, sid, _ in owners: if start <= as_of_date < end: # find the equity that owned it on the given asof date return self.retrieve_asset(sid) # no equity held the ticker on the given asof date raise SymbolNotFound(symbol=symbol)
def display_task_tree(args): """ Read the input files, apply any command-line-specified behaviour and display the task tree. """ def task_transformers(): if args.human_readable: yield _convert_timestamp yield json.loads def filter_funcs(): if args.select: for query in args.select: yield filter_by_jmespath(query) if args.task_uuid: yield filter_by_uuid(args.task_uuid) if not args.files: args.files.append(sys.stdin) tree = Tree() tasks = imap(compose(*task_transformers()), chain.from_iterable(args.files)) render_task_nodes( write=sys.stdout.write, nodes=tree.nodes(tree.merge_tasks(tasks, filter_funcs())), ignored_task_keys=set(args.ignored_task_keys) or None, field_limit=args.field_limit, )
def ref_with_vcf_dicts_strategy_factory(draw): ''' Generate vcf records for randomish locations along a randomishly generated reference sequence. Each vcf record generator will have a randomish sized "chunk" of the reference to use Returns (reference sequence(str), iterable(vcf dicts)) ''' seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20)) size = len(seq) # This gets you a list of numbers that are randomish and increasing ranges = draw(rolling_sum(1, 3, int(size/2)).map(lambda xs: ifilter(lambda x: x < size, xs)) )#.filter(_not(bool))) # Stream lets you re-use a generator without draining it. # Pairs will hold start/stop values for each part of sequence pairs = Stream() << partition(2, ranges) # POSs will contain the start position of each vcf row POSs = Stream() << imap(operator.itemgetter(0), pairs) # VCF files start at index 1; python starts at 0 pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs) #grab the pieces of the reference to build our elts from chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1) #random chromosome name chrom = draw(st.text(string.ascii_letters)) # Draw a new record for each of the Positions we have made vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs, chunks) #TODO: ranges must be non-empty. Assuming vcfs for now. # vcfs can be a a generator #assume(len(vcfs) > 0) return (seq, vcfs)
def ngram_tuples(n, string, minlen=3, maxlen=25): """ Creates ngram tuples of size 'n' from 'string'. Also, changes string to lowercase, removes generic stopwords and splits on all non alphanumeric. Ex: In [2]: list(ngram_tuples(n=1, string='Just another example text.')) Out[2]: [('another',), ('example',), ('text',)] In [2]: list(ngram_tuples(n=2, string='Just another example text.')) Out[2]: [('another', 'example'), ('example', 'text')] In [11]: list(ngram_tuples(3, 'I needed a longer example text for this example.')) Out[11]: [('needed', 'longer', 'example'), ('longer', 'example', 'text'), ('example', 'text', 'example')] minlen - filter out words that have fewer characters than 'minlen'. maxlen - filter out words that have more characters than 'maxlen'. """ return tlz.pipe(string, lower, simple_split, filter_longer_than(maxlen), tlz.compose(tlz.concat, map_c(splitter_of_words)), filter_shorter_than(minlen), filter_stopwords, sliding_window_c(n))
def lookup_by_supplementary_field(self, field_name, value, as_of_date): try: owners = self.equity_supplementary_map[ field_name, value, ] assert owners, 'empty owners list for %r' % (field_name, value) except KeyError: # no equity has ever held this value raise ValueNotFoundForField(field=field_name, value=value) if not as_of_date: if len(owners) > 1: # more than one equity has held this value, this is ambigious # without the date raise MultipleValuesFoundForField( field=field_name, value=value, options=set(map( compose(self.retrieve_asset, attrgetter('sid')), owners, )), ) # exactly one equity has ever held this value, we may resolve # without the date return self.retrieve_asset(owners[0].sid) for start, end, sid, _ in owners: if start <= as_of_date < end: # find the equity that owned it on the given asof date return self.retrieve_asset(sid) # no equity held the value on the given asof date raise ValueNotFoundForField(field=field_name, value=value)
def format_results(terminal_width, key_list, separator, text_list, left_align=True, min_factor=3, **kwargs): """Returns formatted results in two columns. """ key_width = max(map(len, key_list)) separator_length = len(separator) desc_wrap = toolz.identity if terminal_width: if key_width / terminal_width > .5: key_width = terminal_width // 2 - 3 text_width = terminal_width - key_width - separator_length if text_width * min_factor > terminal_width: desc_wrap = toolz.compose( ('\n' + ' ' * (key_width + separator_length)).join, toolz.partial(textwrap.wrap, width=text_width, **kwargs), ) if left_align: fmt = '%-*s%s%s' else: fmt = '%*s%s%s' for key, text in zip(key_list, text_list): text = desc_wrap(text) if len(key) > key_width: yield fmt % (key_width, key, separator, '') yield fmt % (key_width, '', ' ' * separator_length, text) else: yield fmt % (key_width, key, separator, text)
def compute_up(expr, data, **kwargs): leaf = expr._leaves()[0] chunk = symbol('chunk', DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure,)))) (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr, chunk=chunk) inds = tuple(range(ndim(leaf))) dtype = expr.dshape.measure.to_numpy_dtype() tmp = atop( curry(compute_it, chunk_expr, [chunk], **kwargs), inds, data, inds, dtype=dtype, ) return atop( compose( curry(compute_it, agg_expr, [agg], **kwargs), curry(_concatenate2, axes=expr.axis), ), tuple(i for i in inds if i not in expr.axis), tmp, inds, dtype=dtype, )
def forcastall(intid): data=map(int,read_artist(intid)["action_1"]) sun=training(data,4) fun=toolz.compose(str,int) predictdata=map(fun,toolz.take(60,sun)) #focast 60 days with open("./past_forcast/{aid}.csv".format(aid=intid),"wt") as f: f.write(",".join(predictdata))
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None, split_every=None, combine=None): """ General version of reductions >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False) # doctest: +SKIP """ if axis is None: axis = tuple(range(x.ndim)) if isinstance(axis, int): axis = (axis,) axis = tuple(i if i >= 0 else x.ndim + i for i in axis) if dtype and 'dtype' in getargspec(chunk).args: chunk = partial(chunk, dtype=dtype) if dtype and 'dtype' in getargspec(aggregate).args: aggregate = partial(aggregate, dtype=dtype) # Normalize split_every split_every = split_every or _globals.get('split_every', 4) if isinstance(split_every, dict): split_every = dict((k, split_every.get(k, 2)) for k in axis) elif isinstance(split_every, int): n = builtins.max(int(split_every ** (1/(len(axis) or 1))), 2) split_every = dict.fromkeys(axis, n) else: split_every = dict((k, v) for (k, v) in enumerate(x.numblocks) if k in axis) # Map chunk across all blocks inds = tuple(range(x.ndim)) tmp = atop(partial(chunk, axis=axis, keepdims=True), inds, x, inds) tmp._chunks = tuple((1,)*len(c) if i in axis else c for (i, c) in enumerate(tmp.chunks)) # Reduce across intermediates depth = 1 for i, n in enumerate(tmp.numblocks): if i in split_every and split_every[i] != 1: depth = int(builtins.max(depth, ceil(log(n, split_every[i])))) func = compose(partial(combine or aggregate, axis=axis, keepdims=True), partial(_concatenate2, axes=axis)) for i in range(depth - 1): tmp = partial_reduce(func, tmp, split_every, True, None) func = compose(partial(aggregate, axis=axis, keepdims=keepdims), partial(_concatenate2, axes=axis)) return partial_reduce(func, tmp, split_every, keepdims=keepdims, dtype=dtype)
def minimize(self, f_df, x0, display=sys.stdout, maxiter=1e3): self.display = display self.theta = x0 # setup xk = self.algorithm.send(destruct(x0).copy()) store = defaultdict(list) runtimes = [] if len(self.operators) == 0: self.operators = [proxops.identity()] # setup obj, grad = wrap(f_df, x0) transform = compose(destruct, *reversed(self.operators), self.restruct) self.optional_print(tp.header(['Iteration', 'Objective', '||Grad||', 'Runtime'])) try: for k in count(): # setup tstart = perf_counter() f = obj(xk) df = grad(xk) xk = transform(self.algorithm.send(df)) runtimes.append(perf_counter() - tstart) store['f'].append(f) # Update display self.optional_print(tp.row([k, f, np.linalg.norm(destruct(df)), tp.humantime(runtimes[-1])])) if k >= maxiter: break except KeyboardInterrupt: pass self.optional_print(tp.bottom(4)) # cleanup self.optional_print(u'\u279b Final objective: {}'.format(store['f'][-1])) self.optional_print(u'\u279b Total runtime: {}'.format(tp.humantime(sum(runtimes)))) self.optional_print(u'\u279b Per iteration runtime: {} +/- {}'.format( tp.humantime(np.mean(runtimes)), tp.humantime(np.std(runtimes)), )) # result return OptimizeResult({ 'x': self.restruct(xk), 'f': f, 'df': self.restruct(df), 'k': k, 'obj': np.array(store['f']), })
def common_subexpression(*tables): """ Common sub expression between subtables >>> t = TableSymbol('t', '{x: int, y: int}') >>> common_subexpression(t['x'], t['y']) t """ sets = [set(t.subterms()) for t in tables] return builtins.max(set.intersection(*sets), key=compose(len, str))
def wrap(f_df, xref, size=1): """ Memoizes an objective + gradient function, and splits it into two functions that return just the objective and gradient, respectively. Parameters ---------- f_df : function Must be unary (takes a single argument) size : int, optional Size of the cache (Default=1) """ memoized_f_df = lrucache(lambda x: f_df(restruct(x, xref)), size) objective = compose(first, memoized_f_df) gradient = compose(destruct, second, memoized_f_df) return objective, gradient
def test_top_and_bottom_with_groupby_and_mask(self, dtype, seed): permute = partial(permute_rows, seed) permuted_array = compose(permute, partial(array, dtype=int64_dtype)) shape = (8, 8) # Shuffle the input rows to verify that we correctly pick out the top # values independently of order. factor_data = permute(arange(0, 64, dtype=dtype).reshape(shape)) classifier_data = permuted_array([[0, 0, 1, 1, 2, 2, 0, 0], [0, 0, 1, 1, 2, 2, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3], [0, 1, 2, 3, 0, 1, 2, 3], [0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) f = self.f c = self.c self.check_terms( terms={ 'top2': f.top(2, groupby=c), 'bottom2': f.bottom(2, groupby=c), }, initial_workspace={ f: factor_data, c: classifier_data, }, expected={ # Should be the rightmost two entries in classifier_data, # ignoring the off-diagonal. 'top2': permuted_array([[0, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 0, 1], [1, 1, 1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 0, 1, 1, 1], [0, 1, 1, 0, 0, 0, 1, 1], [0, 1, 0, 1, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 1, 1]], dtype=bool), # Should be the rightmost two entries in classifier_data, # ignoring the off-diagonal. 'bottom2': permuted_array([[1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 0, 1, 1, 1], [1, 1, 0, 0, 1, 1, 0, 0], [1, 1, 0, 0, 1, 1, 0, 0], [1, 0, 1, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 0]], dtype=bool), }, mask=self.build_mask(permute(rot90(self.eye_mask(shape=shape)))), )
def scale_data(train_data, test_data): Z_train, y_train = zip(*train_data) scale = Scaler() scale.fit(Z_train) transform = compose(prepend_x0, scale.transform) scaledX_train = transform(Z_train) scaled_train = list(zip(scaledX_train, y_train)) Z_test, y_test = zip(*test_data) scaledX_test = transform(Z_test) scaled_test = list(zip(scaledX_test, y_test)) return scaled_train, scaled_test
def words(): yield instructions.LOAD_CONST(compose( pprint, partial(sorted, key=op.attrgetter('name')), dict.values, )) yield instructions.LOAD_CONST(globals) yield instructions.CALL_FUNCTION(0) yield instructions.CALL_FUNCTION(1) yield instructions.POP_TOP() yield next_instruction()
def _get_data(args, columns): warehouse_conditions = ("warehouse = %(warehouse)s" if args.get( "warehouse" ) else ( "warehouse IN (SELECT name FROM `tabWarehouse` WHERE company = %(company)s)" )) items = frappe.db.sql( """ SELECT i.item_code AS item_code, i.brand AS brand, i.item_name AS item_name, id.default_supplier AS supplier, p.price_list_rate AS price, b.actual_qty AS stock FROM `tabItem` AS i LEFT JOIN `tabItem Price` AS p ON p.item_code = i.item_code AND p.price_list = %(price_list)s LEFT JOIN ( SELECT item_code, SUM(actual_qty) AS actual_qty FROM `tabBin` WHERE {warehouse_conditions} GROUP BY item_code ) AS b ON b.item_code = i.item_code LEFT JOIN `tabItem Default` AS id ON id.parent = i.name AND id.company = %(company)s """.format(warehouse_conditions=warehouse_conditions), values={ "price_list": args.get("price_list"), "company": args.get("company"), "warehouse": args.get("warehouse"), }, as_dict=1, ) sles = frappe.db.sql( """ SELECT item_code, posting_date, actual_qty FROM `tabStock Ledger Entry` WHERE docstatus < 2 AND voucher_type = 'Sales Invoice' AND company = %(company)s AND {warehouse_conditions} AND posting_date BETWEEN %(start_date)s AND %(end_date)s """.format(warehouse_conditions=warehouse_conditions), values={ "company": args.get("company"), "warehouse": args.get("warehouse"), "start_date": args.get("start_date"), "end_date": args.get("end_date"), }, as_dict=1, ) keys = compose(list, partial(pluck, "fieldname"))(columns) periods = filter(lambda x: x.get("start_date") and x.get("end_date"), columns) set_consumption = _set_consumption(sles, periods) make_row = compose(partial(keyfilter, lambda k: k in keys), set_consumption) return map(make_row, items)
def validator(train_data: pd.DataFrame, split_fn: SplitterFnType, train_fn: LearnerFnType, eval_fn: EvalFnType, perturb_fn_train: PerturbFnType = identity, perturb_fn_test: PerturbFnType = identity, predict_oof: bool = False) -> ValidatorReturnType: """ Splits the training data into folds given by the split function and performs a train-evaluation sequence on each fold by calling ``validator_iteration``. Parameters ---------- train_data : pandas.DataFrame A Pandas' DataFrame with training data split_fn : function pandas.DataFrame -> list of tuple Partially defined split function that takes a dataset and returns a list of folds. Each fold is a Tuple of arrays. The fist array in each tuple contains training indexes while the second array contains validation indexes. train_fn : function pandas.DataFrame -> prediction_function, predictions_dataset, logs A partially defined learning function that takes a training set and returns a predict function, a dataset with training predictions and training logs. eval_fn : function pandas.DataFrame -> dict A partially defined evaluation function that takes a dataset with prediction and returns the evaluation logs. perturb_fn_train : PerturbFnType A partially defined corruption function that takes a dataset and returns a corrupted dataset. Perturbation applied at train-time. perturb_fn_test : PerturbFnType A partially defined corruption function that takes a dataset and returns a corrupted dataset. Perturbation applied at test-time. predict_oof : bool Whether to return out of fold predictions on the logs Returns ---------- A list of log-like dictionary evaluations. """ folds, logs = split_fn(train_data) train_fn = compose(train_fn, perturb_fn_train) eval_fn = compose(eval_fn, perturb_fn_test) def fold_iter(fold: Tuple[int, Tuple[pd.Index, pd.Index]]) -> LogType: (fold_num, (train_index, test_indexes)) = fold return validator_iteration(train_data, train_index, test_indexes, fold_num, train_fn, eval_fn, predict_oof) zipped_logs = pipe(folds, enumerate, map(fold_iter), partial(zip, logs)) def _join_split_log( log_tuple: Tuple[LogType, LogType]) -> Tuple[LogType, LogType]: train_log = {} split_log, validator_log = log_tuple train_log["train_log"] = validator_log["train_log"] return train_log, assoc(dissoc(validator_log, "train_log"), "split_log", split_log) def get_perturbed_columns(perturbator: PerturbFnType) -> List[str]: args = inspect.getfullargspec(perturbator).kwonlydefaults return args['cols'] train_logs, validator_logs = zip(*map(_join_split_log, zipped_logs)) first_train_log = first(train_logs) perturbator_log = { 'perturbated_train': [], 'perturbated_test': [] } # type: LogType if perturb_fn_train != identity: perturbator_log['perturbated_train'] = get_perturbed_columns( perturb_fn_train) if perturb_fn_test != identity: perturbator_log['perturbated_test'] = get_perturbed_columns( perturb_fn_test) first_train_log = assoc(first_train_log, "perturbator_log", perturbator_log) return assoc(first_train_log, "validator_log", list(validator_logs))
def test_quantiles_masked(self, seed): permute = partial(permute_rows, seed) # 7 x 7 so that we divide evenly into 2/3/6-tiles after including the # nan value in each row. shape = (7, 7) # Shuffle the input rows to verify that we don't depend on the order. # Take the log to ensure that we don't depend on linear scaling or # integrality of inputs factor_data = permute(log1p(arange(49, dtype=float).reshape(shape))) factor_data_w_nans = where( permute(rot90(self.eye_mask(shape=shape))), factor_data, nan, ) mask_data = permute(self.eye_mask(shape=shape)) f = F() f_nans = OtherF() m = Mask() # Apply the same shuffle we applied to the input rows to our # expectations. Doing it this way makes it obvious that our # expectation corresponds to our input, while still testing against # a range of input orderings. permuted_array = compose(permute, partial(array, dtype=int64_dtype)) self.check_terms( terms={ '2_masked': f.quantiles(bins=2, mask=m), '3_masked': f.quantiles(bins=3, mask=m), '6_masked': f.quantiles(bins=6, mask=m), '2_nans': f_nans.quantiles(bins=2), '3_nans': f_nans.quantiles(bins=3), '6_nans': f_nans.quantiles(bins=6), }, initial_workspace={ f: factor_data, f_nans: factor_data_w_nans, m: mask_data, }, expected={ # Expected results here are the same as in # test_quantiles_unmasked, except with diagonals of -1s # interpolated to match the effects of masking and/or input # nans. '2_masked': permuted_array([[-1, 0, 0, 0, 1, 1, 1], [0, -1, 0, 0, 1, 1, 1], [0, 0, -1, 0, 1, 1, 1], [0, 0, 0, -1, 1, 1, 1], [0, 0, 0, 1, -1, 1, 1], [0, 0, 0, 1, 1, -1, 1], [0, 0, 0, 1, 1, 1, -1]]), '3_masked': permuted_array([[-1, 0, 0, 1, 1, 2, 2], [0, -1, 0, 1, 1, 2, 2], [0, 0, -1, 1, 1, 2, 2], [0, 0, 1, -1, 1, 2, 2], [0, 0, 1, 1, -1, 2, 2], [0, 0, 1, 1, 2, -1, 2], [0, 0, 1, 1, 2, 2, -1]]), '6_masked': permuted_array([[-1, 0, 1, 2, 3, 4, 5], [0, -1, 1, 2, 3, 4, 5], [0, 1, -1, 2, 3, 4, 5], [0, 1, 2, -1, 3, 4, 5], [0, 1, 2, 3, -1, 4, 5], [0, 1, 2, 3, 4, -1, 5], [0, 1, 2, 3, 4, 5, -1]]), '2_nans': permuted_array([[0, 0, 0, 1, 1, 1, -1], [0, 0, 0, 1, 1, -1, 1], [0, 0, 0, 1, -1, 1, 1], [0, 0, 0, -1, 1, 1, 1], [0, 0, -1, 0, 1, 1, 1], [0, -1, 0, 0, 1, 1, 1], [-1, 0, 0, 0, 1, 1, 1]]), '3_nans': permuted_array([[0, 0, 1, 1, 2, 2, -1], [0, 0, 1, 1, 2, -1, 2], [0, 0, 1, 1, -1, 2, 2], [0, 0, 1, -1, 1, 2, 2], [0, 0, -1, 1, 1, 2, 2], [0, -1, 0, 1, 1, 2, 2], [-1, 0, 0, 1, 1, 2, 2]]), '6_nans': permuted_array([[0, 1, 2, 3, 4, 5, -1], [0, 1, 2, 3, 4, -1, 5], [0, 1, 2, 3, -1, 4, 5], [0, 1, 2, -1, 3, 4, 5], [0, 1, -1, 2, 3, 4, 5], [0, -1, 1, 2, 3, 4, 5], [-1, 0, 1, 2, 3, 4, 5]]), }, mask=self.build_mask(self.ones_mask(shape=shape)), )
"voucher_no", "batch_no", ] def execute(filters=None): columns, data = stock_ledger(filters) return _get_columns(columns), _get_data(data, filters) _get_columns = compose( list, partial(filter, lambda x: x.get("fieldname") in _fields), lambda x: x[:5] + [{ "label": _("Default Supplier"), "fieldname": "default_supplier", "fieldtype": "Link", "options": "Supplier", "width": 100, }] + x[5:], ) def _get_data(data, filters): item_codes = compose(list, unique, partial(pluck, "item_code"))(data) if not item_codes: return data query = frappe.db.sql( """ SELECT i.item_code AS item_code,
def lookup_symbol(self, symbol, as_of_date, fuzzy=False): """ Return matching Equity of name symbol in database. If multiple Equities are found and as_of_date is not set, raises MultipleSymbolsFound. If no Equity was active at as_of_date raises SymbolNotFound. """ # Format inputs if as_of_date is not None: as_of_date = pd.Timestamp(normalize_date(as_of_date)) company_symbol, share_class_symbol, fuzzy_symbol = \ split_delimited_symbol(symbol) equities_cols = self.equities.c if as_of_date: ad_value = as_of_date.value if fuzzy: # Search for a single exact match on the fuzzy column fuzzy_candidates = sa.select((equities_cols.sid,)).where( (equities_cols.fuzzy_symbol == fuzzy_symbol) & (equities_cols.start_date <= ad_value) & (equities_cols.end_date >= ad_value), ).execute().fetchall() # If exactly one SID exists for fuzzy_symbol, return that sid if len(fuzzy_candidates) == 1: return self._retrieve_equity(fuzzy_candidates[0]['sid']) # Search for exact matches of the split-up company_symbol and # share_class_symbol candidates = sa.select((equities_cols.sid,)).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) & (equities_cols.start_date <= ad_value) & (equities_cols.end_date >= ad_value), ).execute().fetchall() # If exactly one SID exists for symbol, return that symbol if len(candidates) == 1: return self._retrieve_equity(candidates[0]['sid']) # If no SID exists for symbol, return SID with the # highest-but-not-over end_date elif not candidates: sid = sa.select((equities_cols.sid,)).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) & (equities_cols.start_date <= ad_value), ).order_by( equities_cols.end_date.desc(), ).scalar() if sid is not None: return self._retrieve_equity(sid) # If multiple SIDs exist for symbol, return latest start_date with # end_date as a tie-breaker elif len(candidates) > 1: sid = sa.select((equities_cols.sid,)).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) & (equities_cols.start_date <= ad_value), ).order_by( equities_cols.start_date.desc(), equities_cols.end_date.desc(), ).scalar() if sid is not None: return self._retrieve_equity(sid) raise SymbolNotFound(symbol=symbol) else: # If this is a fuzzy look-up, check if there is exactly one match # for the fuzzy symbol if fuzzy: fuzzy_sids = sa.select((equities_cols.sid,)).where( (equities_cols.fuzzy_symbol == fuzzy_symbol) ).execute().fetchall() if len(fuzzy_sids) == 1: return self._retrieve_equity(fuzzy_sids[0]['sid']) sids = sa.select((equities_cols.sid,)).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) ).execute().fetchall() if len(sids) == 1: return self._retrieve_equity(sids[0]['sid']) elif not sids: raise SymbolNotFound(symbol=symbol) else: raise MultipleSymbolsFound( symbol=symbol, options=list(map( compose(self._retrieve_equity, itemgetter('sid')), sids, )) )
return keyfilter(lambda k: k in whitelist, d) def sum_by(key): return compose(sum, partial(map, lambda x: x.get(key))) def key_by(key, items): return reduceby(key, lambda a, x: merge(a, x), items, {}) split_to_list = excepts( AttributeError, compose( list, partial(filter, lambda x: x), partial(map, lambda x: x.strip()), lambda x: x.split(","), ), lambda x: None, ) def with_report_error_check(data_fn): def fn(*args, **kwargs): try: return data_fn(*args, **kwargs) except ProgrammingError: return [] return fn
def get_pet_relations(pet): return compose(list, partial(pluck, 'customer'))(frappe.get_all('Pet Relation', filters={'parent': pet}, fields=['customer']))
from skimage import color from funcy import iffy, constantly, tap, rpartial from toolz import memoize, curry, compose, pipe from toolz.curried import map, juxt, mapcat, concatv from toolz.sandbox.core import unzip from geopandas import gpd from osgeo import ogr, gdal, osr from lenses import lens from abfs.path import * from abfs.constants import * from abfs.group_data_split import GroupDataSplit, DEFAULT_SPLIT_CONFIG from abfs.conversions import area_in_square_feet from abfs.segmentation_augmentation import SegmentationAugmentation, MOVE_SCALE_ROTATE list_unzip = compose(map(list), unzip) list_concatv = compose(list, concatv) BLACK = 0 BINARY_WHITE = 1 ALWAYS_TRUE = lambda df: df.index != -1 class Data(): def __init__(self, config, split_config=DEFAULT_SPLIT_CONFIG, seg_aug_config=MOVE_SCALE_ROTATE, batch_size=16, override_df=None, aug_random_seed=None,
@dispatch(Selection, RDD) def compute_up(t, rdd, **kwargs): predicate = optimize(t.predicate, rdd) predicate = rrowfunc(predicate, t._child) return rdd.filter(predicate) rdd_reductions = { reductions.sum: RDD.sum, reductions.min: RDD.min, reductions.max: RDD.max, reductions.count: RDD.count, reductions.mean: RDD.mean, reductions.var: RDD.variance, reductions.std: RDD.stdev, reductions.nunique: compose(RDD.count, RDD.distinct) } @dispatch(tuple(rdd_reductions), RDD) def compute_up(t, rdd, **kwargs): return rdd_reductions[type(t)](rdd) def istruthy(x): return not not x @dispatch(reductions.any, RDD) def compute_up(t, rdd, **kwargs): return istruthy(rdd.filter(identity).take(1))
def encode_coord_to_str(coord): return compose(convert_to_str, fix_integers, split_str_to_int, invert_if_negative(coord), left_shift, to_binary, multiply_round)(coord)
def truncate_categorical(df: pd.DataFrame, columns_to_truncate: List[str], percentile: float, replacement: Union[str, float] = -9999, replace_unseen: Union[str, float] = -9999, store_mapping: bool = False) -> LearnerReturnType: """ Truncate infrequent categories and replace them by a single one. You can think of it like "others" category. The default behaviour is to replace the original values. To store the transformed values in a new column, specify `prefix` or `suffix` in the parameters, or specify a dictionary with the desired column mapping using the `columns_mapping` parameter. Parameters ---------- df : pandas.DataFrame A Pandas' DataFrame that must contain a `prediction_column` columns. columns_to_truncate : list of str The df columns names to perform the truncation. percentile : float Categories less frequent than the percentile will be replaced by the same one. replacement: int, str, float or nan The value to use when a category is less frequent that the percentile variable. replace_unseen : int, str, float, or nan The value to impute unseen categories. store_mapping : bool (default: False) Whether to store the feature value -> integer dictionary in the log. """ get_categs = lambda col: (df[col].value_counts() / len(df)).to_dict() update = lambda d: map( lambda kv: (kv[0], replacement) if kv[1] <= percentile else (kv[0], kv[0]), d.items()) categs_to_dict = lambda categ_dict: dict(categ_dict) vec = { column: compose(categs_to_dict, update, get_categs)(column) for column in columns_to_truncate } def p(new_df: pd.DataFrame) -> pd.DataFrame: return apply_replacements(new_df, columns_to_truncate, vec, replace_unseen) p.__doc__ = learner_pred_fn_docstring("truncate_categorical") log: LearnerLogType = { 'truncate_categorical': { 'transformed_column': columns_to_truncate, 'replace_unseen': replace_unseen } } if store_mapping: log["truncate_categorical"]["mapping"] = vec return p, p(df), log
Check if the word is in the vocab of the model Parameters ---------- word : str Returns ------- str it returns the original word or an empty string if the word is not in the vocabulary """ return word if word in nlp.vocab else "" parse_ = fp.compose(vocab, lemma, keyword, nlp) def parse(texts): """ Map the parsing function across the list of texts. The parsing function currently is: - pass through a spacy model (nlp) - get the keyword of the sentence (keyword) - lemma the resulting word (lemma) - vocab check that the word is in the vocabulary (vocab) For the single document version use: ``parse_`` Parameters
def sumDigits(values: Iterable[int]) -> int: curryMap = curry(map) return compose(sum, concat, curryMap(toDigits))(values)
def _write_internal(self, iterator, assets): """ Internal implementation of write. `iterator` should be an iterator yielding pairs of (asset, ctable). """ total_rows = 0 first_row = {} last_row = {} calendar_offset = {} # Maps column name -> output carray. columns = { k: carray(array([], dtype=uint32)) for k in US_EQUITY_PRICING_BCOLZ_COLUMNS } earliest_date = None sessions = self._calendar.sessions_in_range(self._start_session, self._end_session) if assets is not None: @apply def iterator(iterator=iterator, assets=set(assets)): for asset_id, table in iterator: if asset_id not in assets: raise ValueError('unknown asset id %r' % asset_id) yield asset_id, table for asset_id, table in iterator: nrows = len(table) for column_name in columns: if column_name == 'id': # We know what the content of this column is, so don't # bother reading it. columns['id'].append( full((nrows, ), asset_id, dtype='uint32'), ) continue columns[column_name].append(table[column_name]) if earliest_date is None: earliest_date = table["day"][0] else: earliest_date = min(earliest_date, table["day"][0]) # Bcolz doesn't support ints as keys in `attrs`, so convert # assets to strings for use as attr keys. asset_key = str(asset_id) # Calculate the index into the array of the first and last row # for this asset. This allows us to efficiently load single # assets when querying the data back out of the table. first_row[asset_key] = total_rows last_row[asset_key] = total_rows + nrows - 1 total_rows += nrows table_day_to_session = compose( self._calendar.minute_to_session_label, partial(Timestamp, unit='s', tz='UTC'), ) asset_first_day = table_day_to_session(table['day'][0]) asset_last_day = table_day_to_session(table['day'][-1]) asset_sessions = sessions[sessions.slice_indexer( asset_first_day, asset_last_day)] assert len(table) == len(asset_sessions), ( 'Got {} rows for daily bars table with first day={}, last ' 'day={}, expected {} rows.\n' 'Missing sessions: {}\n' 'Extra sessions: {}'.format( len(table), asset_first_day.date(), asset_last_day.date(), len(asset_sessions), asset_sessions.difference( to_datetime( np.array(table['day']), unit='s', utc=True, )).tolist(), to_datetime( np.array(table['day']), unit='s', utc=True, ).difference(asset_sessions).tolist(), )) # Calculate the number of trading days between the first date # in the stored data and the first date of **this** asset. This # offset used for output alignment by the reader. calendar_offset[asset_key] = sessions.get_loc(asset_first_day) # This writes the table to disk. full_table = ctable( columns=[ columns[colname] for colname in US_EQUITY_PRICING_BCOLZ_COLUMNS ], names=US_EQUITY_PRICING_BCOLZ_COLUMNS, rootdir=self._filename, mode='w', ) full_table.attrs['first_trading_day'] = (earliest_date if earliest_date is not None else iNaT) full_table.attrs['first_row'] = first_row full_table.attrs['last_row'] = last_row full_table.attrs['calendar_offset'] = calendar_offset full_table.attrs['calendar_name'] = self._calendar.name full_table.attrs['start_session_ns'] = self._start_session.value full_table.attrs['end_session_ns'] = self._end_session.value full_table.flush() return full_table
def _get_keys(args): return compose(list, partial(pluck, "fieldname"), _get_columns)(args)
def visitor(self): return compose(reversed, list)
def sum_by(key): return compose(sum, partial(map, lambda x: x.get(key)))
def checkSum(n: int) -> int: return compose(sumDigits, doubleEveryOther, toDigits)(n)
def __zip_candles_data_frame(self, df): return compose(zip)( *map(df.get, candles_utils.columns_with_ticker.keys()) )
f1(2)(3) from functools import partial f3 = partial(f, 2) f3(3) #From toolz def stem(word): """ Stem word to primitive form """ return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") from toolz import compose, frequencies, partial wordcount = compose(frequencies, partial(map, stem), str.split) sentence = "This cat jumped over this other cat!" wordcount(sentence) {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} ######################################## ############ #Python precompiled modules: http://www.lfd.uci.edu/~gohlke/pythonlibs/ ####################################### # http://web.comlab.ox.ac.uk/oucl/work/jeremy.gibbons/publications/spigot.pdf def pi_digits(): """generator for digits of pi"""
def isValid(n: int) -> bool: curryEquals = curry(operator.eq) curryMod = curry(operator.mod) return compose(curryEquals(0), flip(curryMod, 10), checkSum)(n)
def _lookup_symbol_fuzzy(self, symbol, as_of_date): symbol = symbol.upper() company_symbol, share_class_symbol = split_delimited_symbol(symbol) try: owners = self.fuzzy_symbol_ownership_map[ company_symbol + share_class_symbol ] assert owners, 'empty owners list for %r' % symbol except KeyError: # no equity has ever held a symbol matching the fuzzy symbol raise SymbolNotFound(symbol=symbol) if not as_of_date: if len(owners) == 1: # only one valid match return self.retrieve_asset(owners[0].sid) options = [] for _, _, sid, sym in owners: if sym == symbol: # there are multiple options, look for exact matches options.append(self.retrieve_asset(sid)) if len(options) == 1: # there was only one exact match return options[0] # there are more than one exact match for this fuzzy symbol raise MultipleSymbolsFound( symbol=symbol, options=set(options), ) options = [] for start, end, sid, sym in owners: if start <= as_of_date < end: # see which fuzzy symbols were owned on the asof date. options.append((sid, sym)) if not options: # no equity owned the fuzzy symbol on the date requested SymbolNotFound(symbol=symbol) if len(options) == 1: # there was only one owner, return it return self.retrieve_asset(options[0][0]) for sid, sym in options: if sym == symbol: # look for an exact match on the asof date return self.retrieve_asset(sid) # multiple equities held tickers matching the fuzzy ticker but # there are no exact matches raise MultipleSymbolsFound( symbol=symbol, options=set(map( compose(self.retrieve_asset, itemgetter(0)), options, )), )
def doubleEveryOther(values: Iterable[int]) -> Iterable[int]: return compose(myZipWith(operator.mul, cycle([1, 2])), reverse)(values)
def decode(coord_str): return compose(fix_coords, group_pairs, maybe_invert_shift, coords_arr_to_bin, split_str)(coord_str)
Parameters ---------- query the querylike object to evaluate api the API to handle the request loaders The registry of object loaders auth The authentication object sender The request sender """ return thread_last(query, attrgetter('__req__'), api.prepare, (flip(api.add_auth), auth), sender, api.parse, loaders(query.__rtype__)) _simple_json_api = Api(prepare=methodcaller('add_prefix', 'https://'), parse=compose(json.loads, methodcaller('decode'), attrgetter('content')), add_auth=lambda req, auth: (req if auth is None else req.add_basic_auth(auth))) simple_resolve = partial(resolve, api=_simple_json_api, loaders=load.simple_registry, auth=None, sender=http.urllib_sender()) """a basic resolver"""
def execute(filters=None): columns = _get_columns() keys = compose(list, partial(pluck, "fieldname"))(columns) clauses, values = _get_filters(filters) data = _get_data(clauses, values, keys) return columns, data
def filter_whitespace(tokenset): """ Filters out tokens that are only whitespace. """ return tlz.filter(tlz.compose(bool, str.strip), tokenset)
( # different Python class, right side is param dt.date, ibis.param(dt.timestamp), False, ), ], ) def test_scalar_parameter_compare(left, right, expected): assert left.equals(right) == expected @pytest.mark.parametrize( ('case', 'creator'), [ (datetime.now(), toolz.compose(methodcaller('time'), ibis.timestamp)), ('now', toolz.compose(methodcaller('time'), ibis.timestamp)), (datetime.now().time(), ibis.time), ('10:37', ibis.time), ], ) @pytest.mark.parametrize(('left', 'right'), [(1, 'a'), ('a', 1), (1.0, 2.0), (['a'], [1])]) def test_between_time_failure_time(case, creator, left, right): value = creator(case) with pytest.raises(TypeError): value.between(left, right) def test_custom_type_binary_operations(): class Foo(ir.ValueExpr):
def from_blaze( expr, deltas='auto', checkpoints='auto', loader=None, resources=None, odo_kwargs=None, missing_values=None, no_deltas_rule='warn', no_checkpoints_rule='warn', apply_deltas_adjustments=True, ): """Create a Pipeline API object from a blaze expression. Parameters ---------- expr : Expr The blaze expression to use. deltas : Expr, 'auto' or None, optional The expression to use for the point in time adjustments. If the string 'auto' is passed, a deltas expr will be looked up by stepping up the expression tree and looking for another field with the name of ``expr._name`` + '_deltas'. If None is passed, no deltas will be used. checkpoints : Expr, 'auto' or None, optional The expression to use for the forward fill checkpoints. If the string 'auto' is passed, a checkpoints expr will be looked up by stepping up the expression tree and looking for another field with the name of ``expr._name`` + '_checkpoints'. If None is passed, no checkpoints will be used. loader : BlazeLoader, optional The blaze loader to attach this pipeline dataset to. If None is passed, the global blaze loader is used. resources : dict or any, optional The data to execute the blaze expressions against. This is used as the scope for ``bz.compute``. odo_kwargs : dict, optional The keyword arguments to pass to odo when evaluating the expressions. missing_values : dict[str -> any], optional A dict mapping column names to missing values for those columns. Missing values are required for integral columns. no_deltas_rule : {'warn', 'raise', 'ignore'}, optional What should happen if ``deltas='auto'`` but no deltas can be found. 'warn' says to raise a warning but continue. 'raise' says to raise an exception if no deltas can be found. 'ignore' says take no action and proceed with no deltas. no_checkpoints_rule : {'warn', 'raise', 'ignore'}, optional What should happen if ``checkpoints='auto'`` but no checkpoints can be found. 'warn' says to raise a warning but continue. 'raise' says to raise an exception if no deltas can be found. 'ignore' says take no action and proceed with no deltas. apply_deltas_adjustments : bool, optional Whether or not deltas adjustments should be applied for this dataset. True by default because not applying deltas adjustments is an exception rather than the rule. Returns ------- pipeline_api_obj : DataSet or BoundColumn Either a new dataset or bound column based on the shape of the expr passed in. If a table shaped expression is passed, this will return a ``DataSet`` that represents the whole table. If an array-like shape is passed, a ``BoundColumn`` on the dataset that would be constructed from passing the parent is returned. """ if 'auto' in {deltas, checkpoints}: invalid_nodes = tuple(filter(is_invalid_deltas_node, expr._subterms())) if invalid_nodes: raise TypeError( 'expression with auto %s may only contain (%s) nodes,' " found: %s" % ( ' or '.join( ['deltas'] if deltas is not None else [] + ['checkpoints'] if checkpoints is not None else [], ), ', '.join(map(get__name__, valid_deltas_node_types)), ', '.join( set(map(compose(get__name__, type), invalid_nodes)), ), ), ) deltas = _get_metadata( 'deltas', expr, deltas, no_deltas_rule, ) checkpoints = _get_metadata( 'checkpoints', expr, checkpoints, no_checkpoints_rule, ) # Check if this is a single column out of a dataset. if bz.ndim(expr) != 1: raise TypeError( 'expression was not tabular or array-like,' ' %s dimensions: %d' % ( 'too many' if bz.ndim(expr) > 1 else 'not enough', bz.ndim(expr), ), ) single_column = None if isscalar(expr.dshape.measure): # This is a single column. Record which column we are to return # but create the entire dataset. single_column = rename = expr._name field_hit = False if not isinstance(expr, traversable_nodes): raise TypeError( "expression '%s' was array-like but not a simple field of" " some larger table" % str(expr), ) while isinstance(expr, traversable_nodes): if isinstance(expr, bz.expr.Field): if not field_hit: field_hit = True else: break rename = expr._name expr = expr._child dataset_expr = expr.relabel({rename: single_column}) else: dataset_expr = expr measure = dataset_expr.dshape.measure if not isrecord(measure) or AD_FIELD_NAME not in measure.names: raise TypeError( "The dataset must be a collection of records with at least an" " '{ad}' field. Fields provided: '{fields}'\nhint: maybe you need" " to use `relabel` to change your field names".format( ad=AD_FIELD_NAME, fields=measure, ), ) _check_datetime_field(AD_FIELD_NAME, measure) dataset_expr, deltas, checkpoints = _ensure_timestamp_field( dataset_expr, deltas, checkpoints, ) if deltas is not None and (sorted(deltas.dshape.measure.fields) != sorted( measure.fields)): raise TypeError( 'baseline measure != deltas measure:\n%s != %s' % ( measure, deltas.dshape.measure, ), ) if (checkpoints is not None and (sorted(checkpoints.dshape.measure.fields) != sorted(measure.fields))): raise TypeError( 'baseline measure != checkpoints measure:\n%s != %s' % ( measure, checkpoints.dshape.measure, ), ) # Ensure that we have a data resource to execute the query against. _check_resources('expr', dataset_expr, resources) _check_resources('deltas', deltas, resources) _check_resources('checkpoints', checkpoints, resources) # Create or retrieve the Pipeline API dataset. if missing_values is None: missing_values = {} ds = new_dataset(dataset_expr, deltas, frozenset(missing_values.items())) # Register our new dataset with the loader. (loader if loader is not None else global_loader)[ds] = ExprData( bind_expression_to_resources(dataset_expr, resources), bind_expression_to_resources(deltas, resources) if deltas is not None else None, bind_expression_to_resources(checkpoints, resources) if checkpoints is not None else None, odo_kwargs=odo_kwargs, apply_deltas_adjustments=apply_deltas_adjustments) if single_column is not None: # We were passed a single column, extract and return it. return getattr(ds, single_column) return ds
import requests from toolz import ( compose, ) fetch_rates = lambda: requests.get('https://api.gemini.com/v1/pricefeed') parse_json = lambda x: x.json() pred = lambda x: x['pair'] == 'ETHUSD' filter_on_ethusd_pair = lambda x: filter(pred, x) get_only_element = lambda x: x[0] get_price = lambda x: x.get('price') fetch_exchange_rate = compose( float, get_price, get_only_element, list, filter_on_ethusd_pair, parse_json, fetch_rates, )