def make_batches(self, trans_text, src, dest, parent): """ The Cloud Translation API is optimized for a recommended length for each request of 5K characters (code points). For Cloud Translation - Advanced, the maximum number of code points for a single request is 30K. """ template_request = { "parent": parent, "contents": [], "mime_type": "text/plain", # mime types: text/plain, text/html "target_language_code": dest, } if not src == "auto": template_request.update({"source_language_code": src}) cuts = self.count_chars(trans_text) data_dict = OrderedDict() for i in cuts: data_dict.update({str(i): template_request.copy()}) request_batches = [] if len(cuts) < 1: tr = template_request.copy() tr["contents"] = trans_text request_batches.append(tr) else: x = toolz.first(cuts) rest = list(toolz.drop(1, cuts)) data_dict[str(x)]["contents"] = [] for et in enumerate(trans_text): if et[0] < x: data_dict[str(x)]["contents"].append(et[1]) else: cuts = rest if len(cuts) > 0: x = toolz.first(cuts) rest = list(toolz.drop(1, cuts)) data_dict[str(x)]["contents"] = [] data_dict[str(x)]["contents"].append(et[1]) else: data_dict[str(x)]["contents"].append(et[1]) for idx, req in data_dict.items(): print( f"req {idx} -> {int(sys.getsizeof(req['contents'])) / 1024} kbytes ({len(req['contents'])} )" ) request_batches.append(req) return request_batches
def runLengthEncode(lst: Union[List, str]) -> List[Tuple[Any, int]]: n: int = 1 newList: List = [] while len(lst) > 0: if len(lst) == 1: newList.append((first(lst), n)) return newList elif first(lst) == second(lst): n += 1 lst = list(drop(1, lst)) else: newList.append((first(lst), n)) lst = list(drop(1, lst)) n = 1 return newList
def scatter_to_workers(ncores, data, rpc=rpc, report=True): """ Scatter data directly to workers This distributes data in a round-robin fashion to a set of workers based on how many cores they have. ncores should be a dictionary mapping worker identities to numbers of cores. See scatter for parameter docstring """ assert isinstance(ncores, dict) assert isinstance(data, dict) workers = list(concat([w] * nc for w, nc in ncores.items())) names, data = list(zip(*data.items())) worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers)) _round_robin_counter[0] += len(data) L = list(zip(worker_iter, names, data)) d = groupby(0, L) d = {worker: {key: value for _, key, value in v} for worker, v in d.items()} rpcs = {addr: rpc(addr) for addr in d} try: out = yield All([rpcs[address].update_data(data=v, report=report) for address, v in d.items()]) finally: for r in rpcs.values(): r.close_rpc() nbytes = merge(o['nbytes'] for o in out) who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()} raise Return((names, who_has, nbytes))
def skip_while(predicate: Callable, xs: Iterable): # This implementation seems hokey. skip_count = 0 for x in xs: if not predicate(x): break skip_count += 1 yield from toolz.drop(skip_count, xs)
def scatter_to_workers(center, ncores, data, key=None, report=True): """ Scatter data directly to workers This distributes data in a round-robin fashion to a set of workers based on how many cores they have. ncores should be a dictionary mapping worker identities to numbers of cores. See scatter for parameter docstring """ if isinstance(center, str): ip, port = center.split(':') elif isinstance(center, rpc): ip, port = center.ip, center.port elif isinstance(center, tuple): ip, port = center else: raise TypeError("Bad type for center") if key is None: key = str(uuid.uuid1()) if isinstance(ncores, Iterable) and not isinstance(ncores, dict): k = len(data) // len(ncores) ncores = {worker: k for worker in ncores} workers = list(concat([w] * nc for w, nc in ncores.items())) in_type = type(data) if isinstance(data, dict): names, data = list(zip(*data.items())) else: names = ('%s-%d' % (key, i) for i in count(0)) worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers)) _round_robin_counter[0] += len(data) L = list(zip(worker_iter, names, data)) d = groupby(0, L) d = {k: {b: c for a, b, c in v} for k, v in d.items()} out = yield All([rpc(ip=w_ip, port=w_port).update_data(data=v, close=True, report=report) for (w_ip, w_port), v in d.items()]) nbytes = merge([o[1]['nbytes'] for o in out]) who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()} result = [RemoteData(b, ip, port, result=c) for a, b, c in L] if in_type is dict: result = dict(zip(names, result)) raise Return((result, who_has, nbytes))
def scatter_to_workers(ncores, data, rpc, report=True, serialize=True): """ Scatter data directly to workers This distributes data in a round-robin fashion to a set of workers based on how many cores they have. ncores should be a dictionary mapping worker identities to numbers of cores. See scatter for parameter docstring """ if isinstance(ncores, Iterable) and not isinstance(ncores, dict): k = len(data) // len(ncores) ncores = {coerce_to_address(worker): k for worker in ncores} workers = list(concat([w] * nc for w, nc in ncores.items())) if isinstance(data, dict): names, data = list(zip(*data.items())) else: names = [] for x in data: try: names.append(tokenize(x)) except: names.append(str(uuid.uuid1())) worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers)) _round_robin_counter[0] += len(data) L = list(zip(worker_iter, names, data)) d = groupby(0, L) d = { worker: {key: dumps(value) if serialize else value for _, key, value in v} for worker, v in d.items() } rpcs = {addr: rpc(addr) for addr in d} try: out = yield All([ rpcs[address].update_data(data=v, report=report) for address, v in d.items() ]) finally: for r in rpcs.values(): r.close_rpc() nbytes = merge(o['nbytes'] for o in out) who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()} raise Return((names, who_has, nbytes))
def splicehtmlmap(f, html): """ Generator that takes html in sexpr form and applies a function f to the leaf nodes (which are strings), then splices the result into the body. `f` should have signature `str` -> `list[sexpr form html elements]` eg f = lambda: [['div', 'hello world']] splicehtmlmap(f, ['body', '']) -> ['body', ['div', 'hello world']] """ yield t.first(html) for e in t.drop(1, html): if isinstance(e, (list, tuple)): yield splicehtmlmap(f,e) elif isinstance(e, str): yield from f(e) else: yield e
def splicehtmlmap(f, html): """ Generator that takes html in sexpr form and applies a function f to the leaf nodes (which are strings), then splices the result into the body. `f` should have signature `str` -> `list[sexpr form html elements]` eg f = lambda: [['div', 'hello world']] splicehtmlmap(f, ['body', '']) -> ['body', ['div', 'hello world']] """ yield t.first(html) for e in t.drop(1, html): if isinstance(e, (list, tuple)): yield splicehtmlmap(f, e) elif isinstance(e, str): yield from f(e) else: yield e
def scatter_to_workers(ncores, data, report=True, serialize=True): """ Scatter data directly to workers This distributes data in a round-robin fashion to a set of workers based on how many cores they have. ncores should be a dictionary mapping worker identities to numbers of cores. See scatter for parameter docstring """ if isinstance(ncores, Iterable) and not isinstance(ncores, dict): k = len(data) // len(ncores) ncores = {coerce_to_address(worker): k for worker in ncores} workers = list(concat([w] * nc for w, nc in ncores.items())) if isinstance(data, dict): names, data = list(zip(*data.items())) else: names = [] for x in data: try: names.append(tokenize(x)) except: names.append(str(uuid.uuid1())) worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers)) _round_robin_counter[0] += len(data) L = list(zip(worker_iter, names, data)) d = groupby(0, L) d = {worker: {key: dumps(value) if serialize else value for _, key, value in v} for worker, v in d.items()} rpcs = {addr: rpc(addr) for addr in d} try: out = yield All([rpcs[address].update_data(data=v, close=True, report=report) for address, v in d.items()]) finally: for r in rpcs.values(): r.close_rpc() nbytes = merge(o['nbytes'] for o in out) who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()} raise Return((names, who_has, nbytes))
def scatter_to_workers(ncores, data, report=True): """ Scatter data directly to workers This distributes data in a round-robin fashion to a set of workers based on how many cores they have. ncores should be a dictionary mapping worker identities to numbers of cores. See scatter for parameter docstring """ if isinstance(ncores, Iterable) and not isinstance(ncores, dict): k = len(data) // len(ncores) ncores = {worker: k for worker in ncores} workers = list(concat([w] * nc for w, nc in ncores.items())) in_type = type(data) if isinstance(data, dict): names, data = list(zip(*data.items())) else: names = [] for x in data: try: names.append(tokenize(x)) except: names.append(str(uuid.uuid1())) worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers)) _round_robin_counter[0] += len(data) L = list(zip(worker_iter, names, data)) d = groupby(0, L) d = {k: {b: c for a, b, c in v} for k, v in d.items()} out = yield All([ rpc(ip=w_ip, port=w_port).update_data(data=v, close=True, report=report) for (w_ip, w_port), v in d.items() ]) nbytes = merge([o[1]['nbytes'] for o in out]) who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()} raise Return((names, who_has, nbytes))
def optimalPath(threes: [[int]]) -> ([Step], [Step]): forwardPriceToA = threes[0][0] crossPriceToA = threes[0][1] + threes[0][2] forwardPriceToB = threes[0][1] crossPriceToB = threes[0][0] + threes[0][2] newPathToA = [Step("A", forwardPriceToA) ] if forwardPriceToA <= crossPriceToA else [ Step("B", forwardPriceToB), Step("C", threes[0][2]) ] newPathToB = [Step("B", forwardPriceToB) ] if forwardPriceToB <= crossPriceToB else [ Step("A", forwardPriceToA), Step("C", threes[0][2]) ] accumulator = (pvector(newPathToA), pvector(newPathToB)) newThrees = drop(1, threes) return accumulate(roadStep, newThrees, accumulator)
def scatter_to_workers(ncores, data, report=True): """ Scatter data directly to workers This distributes data in a round-robin fashion to a set of workers based on how many cores they have. ncores should be a dictionary mapping worker identities to numbers of cores. See scatter for parameter docstring """ if isinstance(ncores, Iterable) and not isinstance(ncores, dict): k = len(data) // len(ncores) ncores = {worker: k for worker in ncores} workers = list(concat([w] * nc for w, nc in ncores.items())) in_type = type(data) if isinstance(data, dict): names, data = list(zip(*data.items())) else: names = [] for x in data: try: names.append(tokenize(x)) except: names.append(str(uuid.uuid1())) worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers)) _round_robin_counter[0] += len(data) L = list(zip(worker_iter, names, data)) d = groupby(0, L) d = {k: {b: c for a, b, c in v} for k, v in d.items()} out = yield All([rpc(ip=w_ip, port=w_port).update_data(data=v, close=True, report=report) for (w_ip, w_port), v in d.items()]) nbytes = merge([o[1]['nbytes'] for o in out]) who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()} raise Return((names, who_has, nbytes))
def make_payment_entry(source_name): member = frappe.get_doc('Gym Member', source_name) invoices = frappe.get_all( 'Sales Invoice', filters=[ ['customer', '=', member.customer], ['docstatus', '=', '1'], ['status', '!=', 'Paid'], ], ) pes = compose( partial(map, lambda x: get_payment_entry('Sales Invoice', x)), partial(pluck, 'name'), )(invoices) pe = first(pes) if pes else _make_new_pe(member) for entry in drop(1, pes): pe.set('paid_amount', pe.paid_amount + entry.paid_amount) pe.set('received_amount', pe.received_amount + entry.received_amount) for ref in entry.references: pe.append('references', ref) pe.set_amounts() return pe
def update_summary_qc(data, key, base=None, secondary=None): """ updates summary_qc, keyed by key. key is generally the program the quality control metrics came from. if key already exists, the specified base/secondary files are added as secondary files to the existing key, removing duplicates. stick files into summary_qc if you want them propagated forward and available for multiqc """ summary = deepish_copy(get_summary_qc(data, {})) files = [[base], [secondary], tz.get_in([key, "base"], summary, []), tz.get_in([key, "secondary"], summary, [])] files = list(set([x for x in flatten(files) if x])) base = tz.first(files) secondary = list(tz.drop(1, files)) if base and secondary: summary[key] = {"base": base, "secondary": secondary} elif base: summary[key] = {"base": base} data = set_summary_qc(data, summary) return data
def make_payment_entry(source_name): member = frappe.get_doc("Gym Member", source_name) invoices = frappe.get_all( "Sales Invoice", filters=[ ["customer", "=", member.customer], ["docstatus", "=", "1"], ["status", "!=", "Paid"], ], ) pes = compose( list, partial(map, lambda x: get_payment_entry("Sales Invoice", x)), partial(pluck, "name"), )(invoices) pe = first(pes) if pes else _make_new_pe(member) for entry in drop(1, pes): pe.set("paid_amount", pe.paid_amount + entry.paid_amount) pe.set("received_amount", pe.received_amount + entry.received_amount) for ref in entry.references: pe.append("references", ref) pe.set_amounts() return pe
def scatter_to_workers(ncores, data, rpc=rpc, report=True, serializers=None): """ Scatter data directly to workers This distributes data in a round-robin fashion to a set of workers based on how many cores they have. ncores should be a dictionary mapping worker identities to numbers of cores. See scatter for parameter docstring """ assert isinstance(ncores, dict) assert isinstance(data, dict) workers = list(concat([w] * nc for w, nc in ncores.items())) names, data = list(zip(*data.items())) worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers)) _round_robin_counter[0] += len(data) L = list(zip(worker_iter, names, data)) d = groupby(0, L) d = {worker: {key: value for _, key, value in v} for worker, v in d.items()} rpcs = {addr: rpc(addr) for addr in d} try: out = yield All([rpcs[address].update_data(data=v, report=report, serializers=serializers) for address, v in d.items()]) finally: for r in rpcs.values(): r.close_rpc() nbytes = merge(o['nbytes'] for o in out) who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()} raise Return((names, who_has, nbytes))
def rest(x): return drop(1, x)
def cases(f): return map(int, drop(1, f))
def drop(seq): return toolz.drop(n, seq)
def get_points(self): points = self.stencil.to_grid(self.point + self.stepsize * self.simplex) return (SimplexPoint(x, self, i) for i, x in drop(1, enumerate(points)))
def until_convergence(it: Iterator[Params], eq: Callable = lambda x: x[0] != x[1]) -> Params: it2 = tz.drop(1, it) pairs = zip(it, it2) return tz.first(itertools.dropwhile(eq, pairs))[0]