Пример #1
0
    def make_batches(self, trans_text, src, dest, parent):
        """
        The Cloud Translation API is optimized for a recommended length for each request of 5K characters (code points).
        For Cloud Translation - Advanced, the maximum number of code points for a single request is 30K.
        """
        template_request = {
            "parent": parent,
            "contents": [],
            "mime_type": "text/plain",  # mime types: text/plain, text/html
            "target_language_code": dest,
        }
        if not src == "auto":
            template_request.update({"source_language_code": src})

        cuts = self.count_chars(trans_text)

        data_dict = OrderedDict()
        for i in cuts:
            data_dict.update({str(i): template_request.copy()})

        request_batches = []

        if len(cuts) < 1:
            tr = template_request.copy()
            tr["contents"] = trans_text
            request_batches.append(tr)
        else:
            x = toolz.first(cuts)
            rest = list(toolz.drop(1, cuts))
            data_dict[str(x)]["contents"] = []
            for et in enumerate(trans_text):
                if et[0] < x:
                    data_dict[str(x)]["contents"].append(et[1])
                else:
                    cuts = rest
                    if len(cuts) > 0:
                        x = toolz.first(cuts)
                        rest = list(toolz.drop(1, cuts))
                        data_dict[str(x)]["contents"] = []
                        data_dict[str(x)]["contents"].append(et[1])
                    else:
                        data_dict[str(x)]["contents"].append(et[1])

        for idx, req in data_dict.items():
            print(
                f"req {idx} -> {int(sys.getsizeof(req['contents'])) / 1024} kbytes ({len(req['contents'])} )"
            )
            request_batches.append(req)

        return request_batches
Пример #2
0
def runLengthEncode(lst: Union[List, str]) -> List[Tuple[Any, int]]:
    n: int = 1
    newList: List = []
    while len(lst) > 0:
        if len(lst) == 1:
            newList.append((first(lst), n))
            return newList
        elif first(lst) == second(lst):
            n += 1
            lst = list(drop(1, lst))
        else:
            newList.append((first(lst), n))
            lst = list(drop(1, lst))
            n = 1
    return newList
Пример #3
0
def scatter_to_workers(ncores, data, rpc=rpc, report=True):
    """ Scatter data directly to workers

    This distributes data in a round-robin fashion to a set of workers based on
    how many cores they have.  ncores should be a dictionary mapping worker
    identities to numbers of cores.

    See scatter for parameter docstring
    """
    assert isinstance(ncores, dict)
    assert isinstance(data, dict)

    workers = list(concat([w] * nc for w, nc in ncores.items()))
    names, data = list(zip(*data.items()))

    worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers))
    _round_robin_counter[0] += len(data)

    L = list(zip(worker_iter, names, data))
    d = groupby(0, L)
    d = {worker: {key: value for _, key, value in v}
         for worker, v in d.items()}

    rpcs = {addr: rpc(addr) for addr in d}
    try:
        out = yield All([rpcs[address].update_data(data=v, report=report)
                         for address, v in d.items()])
    finally:
        for r in rpcs.values():
            r.close_rpc()
    nbytes = merge(o['nbytes'] for o in out)

    who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()}

    raise Return((names, who_has, nbytes))
Пример #4
0
def skip_while(predicate: Callable, xs: Iterable):
    # This implementation seems hokey.
    skip_count = 0
    for x in xs:
        if not predicate(x):
            break
        skip_count += 1
    yield from toolz.drop(skip_count, xs)
Пример #5
0
def scatter_to_workers(center, ncores, data, key=None, report=True):
    """ Scatter data directly to workers

    This distributes data in a round-robin fashion to a set of workers based on
    how many cores they have.  ncores should be a dictionary mapping worker
    identities to numbers of cores.

    See scatter for parameter docstring
    """
    if isinstance(center, str):
        ip, port = center.split(':')
    elif isinstance(center, rpc):
        ip, port = center.ip, center.port
    elif isinstance(center, tuple):
        ip, port = center
    else:
        raise TypeError("Bad type for center")

    if key is None:
        key = str(uuid.uuid1())

    if isinstance(ncores, Iterable) and not isinstance(ncores, dict):
        k = len(data) // len(ncores)
        ncores = {worker: k for worker in ncores}

    workers = list(concat([w] * nc for w, nc in ncores.items()))
    in_type = type(data)
    if isinstance(data, dict):
        names, data = list(zip(*data.items()))
    else:
        names = ('%s-%d' % (key, i) for i in count(0))

    worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers))
    _round_robin_counter[0] += len(data)

    L = list(zip(worker_iter, names, data))
    d = groupby(0, L)
    d = {k: {b: c for a, b, c in v}
          for k, v in d.items()}

    out = yield All([rpc(ip=w_ip, port=w_port).update_data(data=v,
                                             close=True, report=report)
                 for (w_ip, w_port), v in d.items()])
    nbytes = merge([o[1]['nbytes'] for o in out])

    who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()}

    result = [RemoteData(b, ip, port, result=c)
                for a, b, c in L]
    if in_type is dict:
        result = dict(zip(names, result))

    raise Return((result, who_has, nbytes))
Пример #6
0
def scatter_to_workers(ncores, data, rpc, report=True, serialize=True):
    """ Scatter data directly to workers

    This distributes data in a round-robin fashion to a set of workers based on
    how many cores they have.  ncores should be a dictionary mapping worker
    identities to numbers of cores.

    See scatter for parameter docstring
    """
    if isinstance(ncores, Iterable) and not isinstance(ncores, dict):
        k = len(data) // len(ncores)
        ncores = {coerce_to_address(worker): k for worker in ncores}

    workers = list(concat([w] * nc for w, nc in ncores.items()))
    if isinstance(data, dict):
        names, data = list(zip(*data.items()))
    else:
        names = []
        for x in data:
            try:
                names.append(tokenize(x))
            except:
                names.append(str(uuid.uuid1()))

    worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers))
    _round_robin_counter[0] += len(data)

    L = list(zip(worker_iter, names, data))
    d = groupby(0, L)
    d = {
        worker:
        {key: dumps(value) if serialize else value
         for _, key, value in v}
        for worker, v in d.items()
    }

    rpcs = {addr: rpc(addr) for addr in d}
    try:
        out = yield All([
            rpcs[address].update_data(data=v, report=report)
            for address, v in d.items()
        ])
    finally:
        for r in rpcs.values():
            r.close_rpc()
    nbytes = merge(o['nbytes'] for o in out)

    who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()}

    raise Return((names, who_has, nbytes))
Пример #7
0
def splicehtmlmap(f, html):
  """
  Generator that takes html in sexpr form and applies a function f to the
  leaf nodes (which are strings), then splices the result into the body.
  `f` should have signature `str` -> `list[sexpr form html elements]`
  eg f = lambda: [['div', 'hello world']]
  splicehtmlmap(f, ['body', '']) -> ['body', ['div', 'hello world']]
  """
  yield t.first(html)
  for e in t.drop(1, html):
    if isinstance(e, (list, tuple)):
      yield splicehtmlmap(f,e)
    elif isinstance(e, str):
      yield from f(e)
    else:
      yield e
Пример #8
0
def splicehtmlmap(f, html):
    """
  Generator that takes html in sexpr form and applies a function f to the
  leaf nodes (which are strings), then splices the result into the body.
  `f` should have signature `str` -> `list[sexpr form html elements]`
  eg f = lambda: [['div', 'hello world']]
  splicehtmlmap(f, ['body', '']) -> ['body', ['div', 'hello world']]
  """
    yield t.first(html)
    for e in t.drop(1, html):
        if isinstance(e, (list, tuple)):
            yield splicehtmlmap(f, e)
        elif isinstance(e, str):
            yield from f(e)
        else:
            yield e
Пример #9
0
def scatter_to_workers(ncores, data, report=True, serialize=True):
    """ Scatter data directly to workers

    This distributes data in a round-robin fashion to a set of workers based on
    how many cores they have.  ncores should be a dictionary mapping worker
    identities to numbers of cores.

    See scatter for parameter docstring
    """
    if isinstance(ncores, Iterable) and not isinstance(ncores, dict):
        k = len(data) // len(ncores)
        ncores = {coerce_to_address(worker): k for worker in ncores}

    workers = list(concat([w] * nc for w, nc in ncores.items()))
    if isinstance(data, dict):
        names, data = list(zip(*data.items()))
    else:
        names = []
        for x in data:
            try:
                names.append(tokenize(x))
            except:
                names.append(str(uuid.uuid1()))

    worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers))
    _round_robin_counter[0] += len(data)

    L = list(zip(worker_iter, names, data))
    d = groupby(0, L)
    d = {worker: {key: dumps(value) if serialize else value
                   for _, key, value in v}
          for worker, v in d.items()}

    rpcs = {addr: rpc(addr) for addr in d}
    try:
        out = yield All([rpcs[address].update_data(data=v,
                                                 close=True, report=report)
                     for address, v in d.items()])
    finally:
        for r in rpcs.values():
            r.close_rpc()
    nbytes = merge(o['nbytes'] for o in out)

    who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()}

    raise Return((names, who_has, nbytes))
Пример #10
0
def scatter_to_workers(ncores, data, report=True):
    """ Scatter data directly to workers

    This distributes data in a round-robin fashion to a set of workers based on
    how many cores they have.  ncores should be a dictionary mapping worker
    identities to numbers of cores.

    See scatter for parameter docstring
    """
    if isinstance(ncores, Iterable) and not isinstance(ncores, dict):
        k = len(data) // len(ncores)
        ncores = {worker: k for worker in ncores}

    workers = list(concat([w] * nc for w, nc in ncores.items()))
    in_type = type(data)
    if isinstance(data, dict):
        names, data = list(zip(*data.items()))
    else:
        names = []
        for x in data:
            try:
                names.append(tokenize(x))
            except:
                names.append(str(uuid.uuid1()))

    worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers))
    _round_robin_counter[0] += len(data)

    L = list(zip(worker_iter, names, data))
    d = groupby(0, L)
    d = {k: {b: c for a, b, c in v} for k, v in d.items()}

    out = yield All([
        rpc(ip=w_ip, port=w_port).update_data(data=v,
                                              close=True,
                                              report=report)
        for (w_ip, w_port), v in d.items()
    ])
    nbytes = merge([o[1]['nbytes'] for o in out])

    who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()}

    raise Return((names, who_has, nbytes))
def optimalPath(threes: [[int]]) -> ([Step], [Step]):
    forwardPriceToA = threes[0][0]
    crossPriceToA = threes[0][1] + threes[0][2]
    forwardPriceToB = threes[0][1]
    crossPriceToB = threes[0][0] + threes[0][2]

    newPathToA = [Step("A", forwardPriceToA)
                  ] if forwardPriceToA <= crossPriceToA else [
                      Step("B", forwardPriceToB),
                      Step("C", threes[0][2])
                  ]
    newPathToB = [Step("B", forwardPriceToB)
                  ] if forwardPriceToB <= crossPriceToB else [
                      Step("A", forwardPriceToA),
                      Step("C", threes[0][2])
                  ]
    accumulator = (pvector(newPathToA), pvector(newPathToB))

    newThrees = drop(1, threes)
    return accumulate(roadStep, newThrees, accumulator)
Пример #12
0
def scatter_to_workers(ncores, data, report=True):
    """ Scatter data directly to workers

    This distributes data in a round-robin fashion to a set of workers based on
    how many cores they have.  ncores should be a dictionary mapping worker
    identities to numbers of cores.

    See scatter for parameter docstring
    """
    if isinstance(ncores, Iterable) and not isinstance(ncores, dict):
        k = len(data) // len(ncores)
        ncores = {worker: k for worker in ncores}

    workers = list(concat([w] * nc for w, nc in ncores.items()))
    in_type = type(data)
    if isinstance(data, dict):
        names, data = list(zip(*data.items()))
    else:
        names = []
        for x in data:
            try:
                names.append(tokenize(x))
            except:
                names.append(str(uuid.uuid1()))

    worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers))
    _round_robin_counter[0] += len(data)

    L = list(zip(worker_iter, names, data))
    d = groupby(0, L)
    d = {k: {b: c for a, b, c in v}
          for k, v in d.items()}

    out = yield All([rpc(ip=w_ip, port=w_port).update_data(data=v,
                                             close=True, report=report)
                 for (w_ip, w_port), v in d.items()])
    nbytes = merge([o[1]['nbytes'] for o in out])

    who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()}

    raise Return((names, who_has, nbytes))
Пример #13
0
def make_payment_entry(source_name):
    member = frappe.get_doc('Gym Member', source_name)
    invoices = frappe.get_all(
        'Sales Invoice',
        filters=[
            ['customer', '=', member.customer],
            ['docstatus', '=', '1'],
            ['status', '!=', 'Paid'],
        ],
    )
    pes = compose(
        partial(map, lambda x: get_payment_entry('Sales Invoice', x)),
        partial(pluck, 'name'),
    )(invoices)
    pe = first(pes) if pes else _make_new_pe(member)
    for entry in drop(1, pes):
        pe.set('paid_amount', pe.paid_amount + entry.paid_amount)
        pe.set('received_amount', pe.received_amount + entry.received_amount)
        for ref in entry.references:
            pe.append('references', ref)
    pe.set_amounts()
    return pe
Пример #14
0
def update_summary_qc(data, key, base=None, secondary=None):
    """
    updates summary_qc, keyed by key. key is generally the program the quality
    control metrics came from. if key already exists, the specified
    base/secondary files are added as secondary files to the existing
    key, removing duplicates.

    stick files into summary_qc if you want them propagated forward
    and available for multiqc
    """
    summary = deepish_copy(get_summary_qc(data, {}))
    files = [[base], [secondary],
             tz.get_in([key, "base"], summary, []),
             tz.get_in([key, "secondary"], summary, [])]
    files = list(set([x for x in flatten(files) if x]))
    base = tz.first(files)
    secondary = list(tz.drop(1, files))
    if base and secondary:
        summary[key] = {"base": base, "secondary": secondary}
    elif base:
        summary[key] = {"base": base}
    data = set_summary_qc(data, summary)
    return data
Пример #15
0
def make_payment_entry(source_name):
    member = frappe.get_doc("Gym Member", source_name)
    invoices = frappe.get_all(
        "Sales Invoice",
        filters=[
            ["customer", "=", member.customer],
            ["docstatus", "=", "1"],
            ["status", "!=", "Paid"],
        ],
    )
    pes = compose(
        list,
        partial(map, lambda x: get_payment_entry("Sales Invoice", x)),
        partial(pluck, "name"),
    )(invoices)
    pe = first(pes) if pes else _make_new_pe(member)
    for entry in drop(1, pes):
        pe.set("paid_amount", pe.paid_amount + entry.paid_amount)
        pe.set("received_amount", pe.received_amount + entry.received_amount)
        for ref in entry.references:
            pe.append("references", ref)
    pe.set_amounts()
    return pe
Пример #16
0
def scatter_to_workers(ncores, data, rpc=rpc, report=True, serializers=None):
    """ Scatter data directly to workers

    This distributes data in a round-robin fashion to a set of workers based on
    how many cores they have.  ncores should be a dictionary mapping worker
    identities to numbers of cores.

    See scatter for parameter docstring
    """
    assert isinstance(ncores, dict)
    assert isinstance(data, dict)

    workers = list(concat([w] * nc for w, nc in ncores.items()))
    names, data = list(zip(*data.items()))

    worker_iter = drop(_round_robin_counter[0] % len(workers), cycle(workers))
    _round_robin_counter[0] += len(data)

    L = list(zip(worker_iter, names, data))
    d = groupby(0, L)
    d = {worker: {key: value for _, key, value in v}
         for worker, v in d.items()}

    rpcs = {addr: rpc(addr) for addr in d}
    try:
        out = yield All([rpcs[address].update_data(data=v, report=report,
                                                   serializers=serializers)
                         for address, v in d.items()])
    finally:
        for r in rpcs.values():
            r.close_rpc()

    nbytes = merge(o['nbytes'] for o in out)

    who_has = {k: [w for w, _, _ in v] for k, v in groupby(1, L).items()}

    raise Return((names, who_has, nbytes))
Пример #17
0
def rest(x):
    return drop(1, x)
Пример #18
0
def cases(f):
    return map(int, drop(1, f))
Пример #19
0
 def drop(seq):
     return toolz.drop(n, seq)
Пример #20
0
def rest(x):
    return drop(1, x)
Пример #21
0
 def get_points(self):
     points = self.stencil.to_grid(self.point +
                                   self.stepsize * self.simplex)
     return (SimplexPoint(x, self, i)
             for i, x in drop(1, enumerate(points)))
Пример #22
0
def until_convergence(it: Iterator[Params],
                      eq: Callable = lambda x: x[0] != x[1]) -> Params:
    it2 = tz.drop(1, it)
    pairs = zip(it, it2)
    return tz.first(itertools.dropwhile(eq, pairs))[0]