Python bucket示例，more_itertools.bucket Python示例

示例#1

0

显示文件

文件： commits.py 项目： karlicoss/HPI

def test() -> None:
    from my.coding.commits import commits
    all_commits = list(commits())
    assert len(all_commits) > 100

    buckets = bucket(all_commits, key=lambda c: c.repo)
    by_repo = {k: list(buckets[k]) for k in buckets}

示例#2

0

显示文件

文件： stream_decorators.py 项目： real-digital/esque

 def create_partition_buffers(stream):
     bucketed_stream = more_itertools.bucket(stream, key=attrgetter("partition"))
     partition_buffers: Dict[int, Iterator[StreamEvent]] = {
         p: more_itertools.peekable(iter(bucketed_stream[p])) for p in range(partition_count)
     }
     global_event_buffer = bucketed_stream[StreamEvent.ALL_PARTITIONS]
     return partition_buffers, global_event_buffer

示例#3

0

显示文件

文件： youtube.py 项目： karlicoss/HPI

def test() -> None:
    from my.youtube.takeout import watched, Watched
    videos = [w for w in watched() if not isinstance(w, Exception)]
    assert len(videos) > 1000

    # results in nicer errors, otherwise annoying to check against thousands of videos
    grouped = bucket(videos, key=lambda w: (w.url, w.title))

    w1 = Watched(
        url='https://www.youtube.com/watch?v=hTGJfRPLe08',
        title='Jamie xx - Gosh',
        when=pytz.timezone('Europe/London').localize(
            datetime(year=2018, month=6, day=21, hour=6, minute=48,
                     second=34)),
    )
    assert w1 in list(grouped[(w1.url, w1.title)])

    w2 = Watched(
        url='https://www.youtube.com/watch?v=IZ_8b_Ydsv0',
        title='Why LESS Sensitive Tests Might Be Better',
        when=pytz.utc.localize(
            datetime(year=2021, month=1, day=15, hour=17, minute=54,
                     second=12)),
    )
    assert w2 in list(grouped[(w2.url, w2.title)])

示例#4

0

显示文件

def select_results(version, url_list):
    """Prépare le traitement des résultats de tests des documents dont tous les tests
	ont été effectués. Récupère les résultats dans la collection de résultats et les
	valeurs nécessaires dans les documents issus du scraping.
	Sélection des résultats de test définitifs à partir des choix majoritaires des
	testeurs.
	"""

    dbfinder = mongo.MongoLoad(
        {
            'img_url': {
                '$in': url_list
            },
            'search_version': version
        }, {
            'img_url': 1,
            'locations_selected': 1,
            'sufficient': 1,
            '_id': 0
        })

    group_results = bucket(dbfinder.retrieve('Resultats_Test_Expert_1'),
                           key=lambda x: x['img_url'])

    dbfinder.reinit({
        'img_url': {
            '$in': url_list
        },
        'search_version': version
    }, {
        'search_version': 1,
        'country': 1,
        'img_url': 1,
        'tag_list': 1,
        'location_list': 1,
        '_id': 0
    })

    final_results = []
    for doc in dbfinder.retrieve('Resultats_RGN'):
        result = list(
            group_results[doc['img_url']]
        )  #Conversion de l'itérateur car double parcours nécessaire
        doc['locations_selected'] = [
            True if comp.count(True) > len(comp) / 2 else False
            for comp in zip(*[res['locations_selected'] for res in result])
        ]

        doc['sufficient'] = True if sum(
            1 if b else -1
            for b in [res['sufficient'] for res in result]) > 0 else False
        doc['processed'] = False

        final_results.append(doc)

    return final_results

示例#5

0

显示文件

def radix_sort(A, max_digits):
    """More elegant, but far too slow due to the call to bucket()."""
    radix_keys = list(map(str, range(10)))
    B = [str(a_i).zfill(max_digits) for a_i in A]
    for digit_index in range(-1, -max_digits - 1, -1):
        buckets = bucket(B, key=lambda b_i: b_i[digit_index])
        B = []
        for j in radix_keys:
            B.extend(buckets[j])
    return [int(b_i) for b_i in B]

示例#6

0

显示文件

文件： main.py 项目： bigblindbais/baise.ro

def research():
    flask.current_app.logger.info('Serving RESEARCH')
    page = pages.get_or_404('research')

    articles = mitt.bucket(sorted(topics, key=lambda t: t.path),
                           key=lambda a: a.meta['group'])
    refs = get_refs()

    return flask.render_template('research.html', active='research',
                                 page=page,
                                 groups=groups,
                                 articles=articles,
                                 refs=refs)

示例#7

0

显示文件

def by_night() -> Dict[date, Emfit]:
    res: Dict[date, Emfit] = {}
    # TODO shit. I need some sort of interrupted sleep detection?
    grouped = bucket(get_datas(), key=lambda s: s.date)
    for dd in grouped:
        sleeps = list(grouped[dd])
        if len(sleeps) > 1:
            logger.warning("multiple sleeps per night, not handled yet: %s",
                           sleeps)
            continue
        [s] = sleeps
        res[s.date] = s
    return res

示例#8

0

显示文件

文件： iterutils.py 项目： apljungquist/sprig

def bucket_merge(
    iterable: Iterable[T],
    sort_key: Callable[[T], Any],
    bucket_key: Callable[[T], U],
    buckets: Iterable[U],
) -> Iterator[T]:
    """Sort a partially sorted iterable lazily

    If the iterable can be split into individually sorted buckets then this function
    will sort it.
    """
    buckets_ = set(buckets)
    iterables = more_itertools.bucket(iterable, bucket_key, lambda x: x in buckets_)
    yield from imerge((iterables[bucket] for bucket in buckets_), key=sort_key)

示例#9

0

显示文件

def CompleteSystem(S, context):
    """
    Algorithm C1, p. 385

    >>> tvars=var("x y z")
    >>> w = function("w")(*tvars)
    >>> # these DPs are constructed from C1, pp 384
    >>> h1=diff(w, x,x,x, y,y,z,z)
    >>> h2=diff(w, x,x,x,     z,z,z)
    >>> h3=diff(w, x,     y,  z,z,z)
    >>> h4=diff(w, x,     y)
    >>> ctx=Context((w,),(x,y,z), Mgrlex)
    >>> dps=[_Differential_Polynomial(_, ctx) for _ in [h1,h2,h3,h4]]
    >>> cs = CompleteSystem(dps, ctx)
    >>> # things are sorted up
    >>> for _ in cs: _.show()
    diff(w(x, y, z), x, y)
    diff(w(x, y, z), x, y, z)
    diff(w(x, y, z), x, x, y)
    diff(w(x, y, z), x, y, z, z)
    diff(w(x, y, z), x, x, y, z)
    diff(w(x, y, z), x, x, x, y)
    diff(w(x, y, z), x, y, z, z, z)
    diff(w(x, y, z), x, x, y, z, z)
    diff(w(x, y, z), x, x, x, y, z)
    diff(w(x, y, z), x, x, x, y, y)
    diff(w(x, y, z), x, x, y, z, z, z)
    diff(w(x, y, z), x, x, x, z, z, z)
    diff(w(x, y, z), x, x, x, y, z, z)
    diff(w(x, y, z), x, x, x, y, y, z)
    diff(w(x, y, z), x, x, x, y, z, z, z)
    diff(w(x, y, z), x, x, x, y, y, z, z)
    >>> # example from Schwarz, pp 54
    >>> w = function("w")(x,y)
    >>> z = function("z")(x,y)
    >>> g1 = diff(z,y,y) + diff(z, y)/(2*y)
    >>> g5 = diff(z,x,x,x) + diff(w,y,y)*8*y**2 + diff(w,x,x)/y - diff(z,x,y)*4*y**2 - diff(z,x)*32*y-16*w
    >>> g6 = diff(z,x,x,y) - diff(z,y,y)*4*y**2 - diff(z,y)*8*y
    >>> ctx = Context((w,z),(x,y), Mgrlex)
    >>> dps=[_Differential_Polynomial(_, ctx) for _ in [g1,g5,g6]]
    >>> cs = CompleteSystem(dps, ctx)
    >>> for _ in cs: print(_)
    diff(z(x, y), y, y) + (1/2/y) * diff(z(x, y), y)
    diff(z(x, y), x, y, y) + (1/2/y) * diff(z(x, y), x, y)
    diff(z(x, y), x, x, y) + (-4*y^2) * diff(z(x, y), y, y) + (-8*y) * diff(z(x, y), y)
    diff(z(x, y), x, x, x) + (1/y) * diff(w(x, y), x, x) + (8*y^2) * diff(w(x, y), y, y) + (-4*y^2) * diff(z(x, y), x, y) + (-32*y) * diff(z(x, y), x) + (-16) * w(x, y)
    """
    s = bucket(S, key=lambda d: d.Lfunc())
    res = flatten([complete(s[k], context) for k in s])
    return Reorder(res, context, ascending=True)

示例#10

0

显示文件

文件： bond.py 项目： Amangukiya/QFT

 def get_spot(self, t: float) -> float:
     if not self.spots:
         raise RuntimeError("Cannot interpolate without spot rates")
     elif t in self.spots:
         return self.spots[t]
     else:
         groups = more_itertools.bucket(self.spots.keys(), key=lambda x: x > t)
         L = max(groups[False], default=None)  # closest key on the left
         R = min(groups[True], default=None)  # closest key on the right
         if L is None:  # if none are smaller
             return self.spots[R]  # flat interpolation
         elif R is None:  # if none are bigger
             return self.spots[L]  # flat interpolation
         else:
             m = (self.spots[R] - self.spots[L]) / (R - L)  # get slope
             return m * (t - L) + self.spots[L]  # linear interpolation

示例#11

0

显示文件

    async def handle_initial(self, filename):
        """
        Handles reading the special 'initial' file

        The file contains org unit data, as well as data on the associated details

        The initial org unit file contains historic data, so a minimal set of
        create/edit payloads are created accordingly
        """
        org_units = los_files.read_csv(filename, OrgUnit)

        await self.handle_addresses(org_units, filename)

        unit_payloads = self.create_unit_payloads(org_units)
        detail_payloads = await self.create_detail_payloads(org_units)
        payloads = list(unit_payloads) + list(detail_payloads)

        # Bucket all payloads referring to the same object
        uuid_buckets = bucket(payloads, key=lambda payload: payload["uuid"])
        sorted_buckets = map(
            lambda uuid_key: sorted(uuid_buckets[uuid_key],
                                    key=lambda x: x["validity"]["from"]),
            uuid_buckets,
        )
        consolidated_buckets = list(
            map(self.consolidate_payloads, sorted_buckets))

        split_lists = map(lambda x: (x[0], x[1:]), consolidated_buckets)
        heads, tails = unzip(split_lists)

        # OS2mo reads an object before performing an edit to it, so we need to ensure
        # that we don't perform multiple edits to an object in parallel, which could
        # cause one edit to be overwritten by another
        # We create layers containing at most one edit request for each org unit UUID,
        # and execute the layers sequentially, while allowing the importer to submit the
        # individual requests in a layer in parallel
        edit_payloads = map(partial(map, mo_payloads.convert_create_to_edit),
                            tails)
        edit_layers = zip_longest(*edit_payloads)
        edit_layers_filtered = map(partial(filter, None.__ne__), edit_layers)

        async with util.get_client_session() as session:
            await util.create_details(session, heads)
            for edit_layer in edit_layers_filtered:
                await util.edit_details(session, edit_layer)

示例#12

0

显示文件

        def get_kle(org_unit_uuid: str,
                    mh: MoraHelper) -> Tuple[List[str], List[str]]:
            present = mh._mo_lookup(org_unit_uuid,
                                    "ou/{}/details/kle?validity=present")
            future = mh._mo_lookup(org_unit_uuid,
                                   "ou/{}/details/kle?validity=future")
            kles = present + future

            def get_kle_tuples(
                kles: List[dict], ) -> Generator[Tuple[str, str], None, None]:
                for kle in kles:
                    number = kle["kle_number"]["user_key"]
                    for aspect in kle["kle_aspect"]:
                        yield number, aspect["scope"]

            kle_tuples = get_kle_tuples(kles)
            buckets = bucket(kle_tuples, key=itemgetter(1))

            interest = map(itemgetter(0), buckets["INDSIGT"])
            performing = map(itemgetter(0), buckets["UDFOERENDE"])

            return list(interest), list(performing)

示例#13

0

显示文件

文件： views.py 项目： ttingZheng/lookit-api

    def _get_forms(
        self,
    ) -> (forms.AccountUpdateForm, forms.PasswordChangeForm,
          forms.TOTPCheckForm):
        """Bind forms appropriately for method."""
        request = self.request
        # TODO: switch to normal attribute access after this is fixed
        #    https://youtrack.jetbrains.com/issue/PY-37457
        post_data: QueryDict = getattr(request, "POST")

        # Bucket into new QueryDicts based on prefix. Must use MultiValueDict.update
        # to enforce list containers for values.
        buckets = bucket(post_data.items(),
                         lambda pair: pair[0].partition("-")[0])
        account_update = QueryDict(mutable=True)
        account_update.update(dict(buckets[self.ACCOUNT_FORM_PREFIX]))
        password_change = QueryDict(mutable=True)
        password_change.update(dict(buckets[self.PASSWORD_FORM_PREFIX]))
        otp_check = QueryDict(mutable=True)
        otp_check.update(dict(buckets[self.OTP_FORM_PREFIX]))

        # When data is set to None, the form will not bind.
        return (
            self.update_account_form_class(
                instance=request.user,
                data=account_update or None,
                prefix=self.ACCOUNT_FORM_PREFIX,
            ),
            self.change_password_form_class(
                request.user,
                data=password_change or None,
                prefix=self.PASSWORD_FORM_PREFIX,
            ),
            self.otp_check_form_class(request=request,
                                      data=otp_check or None,
                                      prefix=self.OTP_FORM_PREFIX),
        )

示例#14

0

显示文件

    def create_employee_payloads(self, persons):
        cpr_buckets = bucket(persons, key=lambda x: x.cpr)

        # Every person row contains the same info, so we just pick one
        unique_persons = map(lambda key: first(cpr_buckets[key]), cpr_buckets)
        return map(self.generate_employee_payload, unique_persons)

示例#15

0

显示文件

    def get_items(self) -> Mirror.Results:
        from my import zotero

        errors = []
        good = []
        for a in zotero.annotations():
            if isinstance(a, Exception):
                errors.append(a)
            else:
                good.append(a)

        for e in errors:
            yield error(e)

        groups = bucket(good, key=lambda a: a.item)
        for item in groups:
            file_annotations = groups[item]

            def chit():
                for a in file_annotations:
                    parts = []
                    text = a.text
                    if text is not None:
                        # todo not sure about it here... maybe should rely on softwrap in emacs instead?
                        text = '\n'.join(
                            wrap(text, width=config.MAX_LINE_WIDTH))
                        text = literal(text)
                        parts.append(text)
                    comment = a.comment
                    if comment is not None:
                        parts.append(comment)
                    page1 = a.page + 1  # NOTE: zotero using 0-indexing, pdfview using 1-indexing
                    body = '\n'.join(parts)

                    color = a.color_human
                    tags = list(a.tags)
                    # todo not sure which is best?
                    tags.append(color)
                    properties = {
                        'ZOTERO_COLOR': color,
                    }
                    if len(a.tags) > 0:
                        # zotero tags cal be multi-word? guess worth adding just in case
                        properties['ZOTERO_TAGS'] = ', '.join(
                            a.tags)  # not sure what's the best separator...

                    # todo not sure about it...
                    mtodo: Optional[str] = None
                    if 'todo' in {t.lower() for t in tags}:
                        mtodo = 'TODO'

                    heading = docview_link(path=item.file,
                                           title=f'page {page1}',
                                           page1=page1)
                    if comment is not None:
                        # try to display first few words?
                        cline = wrap(comment, width=config.MAX_LINE_WIDTH)[0]
                        heading = heading + ' ' + cline
                    # todo would be nice to align tags, maybe...
                    yield node(
                        todo=mtodo,
                        heading=dt_heading(
                            a.added,
                            heading,
                        ),
                        tags=tags,
                        properties=properties,
                        body=body,
                    )

            body = ''
            if url := item.url:
                body = url
            yield node(heading=docview_link(path=item.file, title=item.title),
                       tags=item.tags,
                       body=body,
                       children=list(chit()))

示例#16

0

显示文件

文件： compute_hal_or_glove.py 项目： humlab/penelope

def compute_hal_or_glove_co_occurrences(
    stream: Iterable[Tuple[str, Iterable[str]]],
    *,
    document_index: DocumentIndex,
    token2id: Mapping[str, int],
    window_size: int,
    distance_metric: int,  # 0, 1, 2
    normalize: str = 'size',
    method: str = 'HAL',
    zero_diagonal: bool = True,
    direction_sensitive: bool = False,
    partition_column: str = 'year',
):
    """Computes co-occurrence as specified by either `Glove` or `Hyperspace Analogous to Hyperspace` (HAL)

        NOTE:
            - Passed document index MUST be in the same sequence as the passed sequence of tokens
    Parameters
    ----------
    corpus : Iterable[str,Iterable[str]]
        Sequence of tokens
    document_index : DocumentIndex
        Document catalogue
    window_size : int
        [description]
    distance_metric : int
        [description]
    1 : [type]
        [description]
    2normalize : str, optional
        [description], by default 'size'
    method : str, optional
        [description], by default 'HAL'
    zero_diagonal : bool, optional
        [description], by default True
    direction_sensitive : bool, optional
        [description], by default False

    Returns
    -------
    [type]
        [description]
    """

    # if issubclass(type(corpus), CorpusABC):
    #    doc_terms = [[t.lower().strip('_') for t in terms if len(t) > 2] for terms in corpus.get_texts()]

    if document_index is None:
        raise CoOccurrenceError("expected document index found None")

    if partition_column not in document_index.columns:
        raise CoOccurrenceError(
            f"expected `{partition_column}` not found in document index")

    if token2id is None:
        raise CoOccurrenceError("expected `token2id` found None")
        # token2id = generate_token2id(doc_terms)

    def get_bucket_key(item: Tuple[str, Iterable[str]]) -> int:

        if not isinstance(item, tuple):
            raise CoOccurrenceError(
                f"expected stream of (name,tokens) tuples found {type(item)}")

        filename = item[0]
        if not isinstance(str, filename):
            raise CoOccurrenceError(
                f"expected filename (str) ound {type(filename)}")

        return int(document_index.loc[filename][partition_column])

    total_results = []
    key_streams = more_itertools.bucket(stream,
                                        key=get_bucket_key,
                                        validator=None)
    keys = sorted(list(key_streams))

    metadata = []
    for i, key in tqdm(enumerate(keys), position=0, leave=True):

        key_stream: FilenameTokensTuple = key_streams[key]
        keyed_document_index = document_index[document_index[partition_column]
                                              == key]

        metadata.append(
            dict(
                document_id=i,
                filename='year_{year}.txt',
                document_name='year_{year}',
                year=key,
                n_docs=len(keyed_document_index),
            ))

        logger.info(f'Processing{key}...')

        tokens_stream = (tokens for _, tokens in key_stream)

        vectorizer = (HyperspaceAnalogueToLanguageVectorizer(
            token2id=token2id).fit(tokens_stream,
                                   size=window_size,
                                   distance_metric=distance_metric) if method
                      == "HAL" else GloveVectorizer(token2id=token2id).fit(
                          tokens_stream, size=window_size))

        co_occurrence = vectorizer.to_dataframe(
            normalize=normalize,
            zero_diagonal=zero_diagonal,
            direction_sensitive=direction_sensitive)

        co_occurrence[partition_column] = key

        total_results.append(
            co_occurrence[[
                'year', 'x_term', 'y_term', 'nw_xy', 'nw_x', 'nw_y', 'cwr'
            ]], )

        # if i == 5: break

    co_occurrences = pd.concat(total_results, ignore_index=True)

    co_occurrences['cwr'] = co_occurrences.cwr / np.max(co_occurrences.cwr,
                                                        axis=0)

    return co_occurrences

示例#17

0

显示文件

        [[3, 64, 96, 92], [64, 96, 19, 128, 0, 64, 0, 0, 0, 71, 38, 2, 250, 0, 160, 0, 27, 242, 118, 0, 0]],
        [[3, 64, 97, 91], [64, 97, 18, 128, 0, 78, 1, 10, 1, 11, 1, 17, 1, 18, 2, 4, 1, 0, 0, 59]],
        [[3, 64, 98, 90], [64, 98, 19, 128, 0, 128, 94, 1, 210, 0, 0, 1, 253, 255, 0, 100, 0, 0, 0, 0, 184]],
        [[3, 64, 99, 89], [64, 99, 10, 128, 0, 1, 112, 100, 50, 21, 1, 181]],
        [[3, 64, 100, 88], [64, 100, 14, 128, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 203]],
        [[3, 64, 97, 91], [64, 97, 18, 128, 0, 79, 1, 10, 1, 11, 1, 17, 1, 18, 2, 4, 1, 0, 0, 58]],

    ]
]

three_dollars = b'$$$'


# Some commands have multiple responses, in which case we might receive any one of them.
command_to_responses = (
    bucket(simulated_command_responses, key=lambda command_and_response: command_and_response[0])
)

command_to_responses = {
    k: [list(response[1]) for response in command_to_responses[k]]
    for k in command_to_responses
}


class DaikinSimulator:
    """ Simulates a Daikin Altherma's responses to serial commands. Used by tests. """

    response_buffer = b''

    def write(self, command):
        """ Write a simulated command.

示例#18

0

显示文件

async def bulk_ensure(
    ctx,
    dry_run: bool,
    filename: str,
):
    """Ensure the entries in the json file exists in MOX.

    Currently only bulk loads classes
    """
    mox_helper = await create_mox_helper(ctx.obj["mox.base"])

    # Load file and fetch
    with open(filename) as json_file:
        data = json.load(json_file)

    # Construct classes by applies __apply_to_all__ to all elements within a
    # single block of classes and flattening the structure to be a simple
    # list of classes
    def construct_entry(bvn, item, apply_to_all):
        return {**item, **apply_to_all, "bvn": bvn}

    def construct_block(block):
        apply_to_all = block.pop("__apply_to_all__", {})
        classes = map(lambda entry: construct_entry(*entry, apply_to_all),
                      block.items())
        return classes

    facets = []
    if "facets" in data:
        facets = flatten(map(construct_block, data["facets"]))
        facets = list(facets)

    classes = []
    if "classes" in data:
        classes = flatten(map(construct_block, data["classes"]))
        classes = list(classes)

    # Fetch default organisation
    org_uuid = None
    org_uuid = org_uuid or await mox_helper.read_element_organisation_organisation(
        bvn="%")

    def enrich_with_org_unit(entry):
        entry["org_uuid"] = org_uuid
        return entry

    # Enrich facets with default organisation
    facets = map(enrich_with_org_unit, facets)

    # Translate facet json to lora_facet
    facets = map(lambda facet: lora_facet(**facet), facets)

    # Prepare to output
    facets = list(facets)

    # Print for dry run
    if dry_run:
        for facet in facets:
            mox_helper.validate_klassifikation_facet(facet)
            message = json.dumps(facet, indent=4, sort_keys=True)
            click.secho(message, fg="green")
        return

    # POST for non-dry
    tasks = list(map(mox_helper.get_or_create_klassifikation_facet, facets))
    results = await asyncio.gather(*tasks)
    for uuid, created in results:
        print_created(uuid, created)

    # Find all unique facet bvns used by the classes, and translate to UUIDs
    required_facets = set(map(itemgetter("facet"), classes))

    async def construct_facet_bvn_to_uuid_map(facet_bvns):
        async def create_bvn_to_uuid_tuple(facet_bvn):
            return (
                facet_bvn,
                await
                mox_helper.read_element_klassifikation_facet(bvn=facet_bvn),
            )

        tasks = list(map(create_bvn_to_uuid_tuple, facet_bvns))
        return dict(await asyncio.gather(*tasks))

    facet_map = await construct_facet_bvn_to_uuid_map(required_facets)

    async def enrich_classes(classes):
        # Find all unique parent bvns used by the classes, and translate to UUIDs
        required_parents = set(
            {clazz["parent"]
             for clazz in classes if "parent" in clazz})

        async def construct_parent_bvn_to_uuid_map(parent_bvns):
            async def create_bvn_to_uuid_tuple(parent_bvn):
                return (
                    parent_bvn,
                    await mox_helper.read_element_klassifikation_klasse(
                        bvn=parent_bvn),
                )

            tasks = list(map(create_bvn_to_uuid_tuple, parent_bvns))
            return dict(await asyncio.gather(*tasks))

        parent_map = await construct_parent_bvn_to_uuid_map(required_parents)

        # Enrich classes with default organisation
        classes = map(enrich_with_org_unit, classes)

        # Translate class facet to facet_uuid
        def class_facet_to_facet_uuid(clazz):
            facet_bvn = clazz.pop("facet")
            clazz["facet_uuid"] = facet_map[facet_bvn]
            return clazz

        classes = map(class_facet_to_facet_uuid, classes)

        # Translate class parent to parent_uuid
        def class_parent_to_parent_uuid(clazz):
            parent_bvn = clazz.pop("parent", None)
            if parent_bvn:
                clazz["parent_uuid"] = parent_map[parent_bvn]
            return clazz

        classes = map(class_parent_to_parent_uuid, classes)

        return classes

    # Partition into buckets by layer
    def set_layer(clazz):
        if "__layer__" not in clazz:
            clazz["__layer__"] = 1
        return clazz

    classes = map(set_layer, classes)
    buckets = bucket(classes, key=itemgetter("__layer__"))
    layers = sorted(list(buckets))
    for layer in layers:
        classes = list(buckets[layer])
        classes = await enrich_classes(classes)

        # Remove the layer key
        def remove_key(key):
            def worker(clazz):
                del clazz[key]
                return clazz

            return worker

        classes = map(remove_key("__layer__"), classes)

        # Translate class json to lora_klasse
        classes = map(lambda clazz: lora_klasse(**clazz), classes)

        # Prepare to output
        classes = list(classes)

        # Print for dry run
        if dry_run:
            for clazz in classes:
                mox_helper.validate_klassifikation_klasse(clazz)
                message = json.dumps(clazz, indent=4, sort_keys=True)
                click.secho(message, fg="green")
            return

        # POST for non-dry
        tasks = list(
            map(mox_helper.get_or_create_klassifikation_klasse, classes))
        results = await asyncio.gather(*tasks)
        for uuid, created in results:
            print_created(uuid, created)

示例#19

0

显示文件

文件： itools.py 项目： miekmesserschmidt/frow

def bucket(container, bucket_key = lambda x: x, sort_key= None):
    b = more_itertools.bucket(container, bucket_key)
    return {k: sorted(list(b[k]), key=sort_key) for k in b}

示例#20

0

显示文件

文件： renumber_test_cases.py 项目： greg2git/nestedtext

"""

import docopt
import shutil
from tempfile import mkdtemp
from pathlib import Path
from more_itertools import bucket

ROOT_DIR = Path(__file__).parent
CASE_DIR = ROOT_DIR / 'test_cases'

import sys; sys.path.append(str(ROOT_DIR / 'api'))
import nestedtext_official_tests as official

if __name__ == '__main__':
    args = docopt.docopt(__doc__)
    tmp_dir = Path(mkdtemp(prefix='renumber_test_cases_'))
    cases = official.load_test_cases(args['<cases>'])

    families = bucket(cases, key=lambda x: x.family)
    for key in families:
        sorted_cases = sorted(families[key], key=lambda x: x.num)
        d = len(str(len(sorted_cases)))
        for i, case in enumerate(sorted_cases, 1):
            shutil.move(case.dir, tmp_dir / f'{case.family}_{i:0{d}}')

    for dir in tmp_dir.iterdir():
        shutil.move(dir, CASE_DIR / dir.name)

    tmp_dir.rmdir()

示例#21

0

显示文件

文件： ex.7.bucket.py 项目： loggar/py

# Split based on Object Type
import more_itertools


class Cube:
    pass


class Circle:
    pass


class Triangle:
    pass


shapes = [Circle(), Cube(), Circle(), Circle(), Cube(), Triangle(), Triangle()]
s = more_itertools.bucket(shapes, key=lambda x: type(x))
# s -> <more_itertools.more.bucket object at 0x7fa65323f210>
list(s[Cube])
#  [<__main__.Cube object at 0x7f394a0633c8>, <__main__.Cube object at 0x7f394a063278>]
list(s[Circle])
# [<__main__.Circle object at 0x7f394a063160>, <__main__.Circle object at 0x7f394a063198>, <__main__.Circle object at 0x7f394a063320>]

示例#22

0

显示文件

文件： offer_uw_testing.py 项目： seattleflu/id3c-customizations

def offer_uw_testing(*, at: str, log_offers: bool, db: DatabaseSession,
                     action: DatabaseSessionAction):
    LOG.debug(f"Offering UW Husky Coronavirus Testing @ {at}")

    dry_run = action is DatabaseSessionAction.DRY_RUN

    # This uses a mutable quota to track available vs. used testing capacity
    # for given time periods.  An alternate approach would be to use a
    # log/ledger (like we keep in receiving.* tables) which records credits
    # (tests capacity scheduled for release at a certain time) and debits
    # (tests offered at a certain time).  While this requires recalculating the
    # balance every run, we would be able to query when tests were released and
    # keep more metadata about that.  These same benefits could be realized by
    # turning our normal logging output into structured event logs.  I think
    # that's preferrable, so decided not to implement as a ledger right now.
    #   -trs, 17 Sept & 13 Oct 2020

    # Lookup the quota for the current time, locking it for update at the end
    # after we make offers.
    #
    # XXX TODO: As a future improvement, automatically pick up any remaining
    # quota left from _past_ timespans in the current day.
    #   -trs, 17 Sept 2020
    quota = db.fetch_row(
        """
        select
            name,
            timespan,
            max,
            used,
            max - used as remaining
        from
            operations.test_quota
        where
            name = 'uw' and timespan @> timestamp with time zone %s
        for update
        """, (at, ))

    if not quota:
        LOG.info(f"No quota row found, aborting")
        return

    if not quota.remaining > 0:
        LOG.info(
            f"No quota remaining for {quota.name} during {quota.timespan}, aborting"
        )
        return

    LOG.info(
        f"Quota for {quota.name} during {quota.timespan} "
        f"is now {quota.remaining:,} = {quota.max:,} - {quota.used:,} (remaining = max - used)"
    )

    # Offer testing to the top entries in our priority queue.
    next_in_queue = db.fetch_all(
        """
        select
            redcap_url,
            redcap_project_id,
            redcap_record_id,
            redcap_event_name,
            redcap_repeat_instance,
            priority,
            priority_reason
        from
            shipping.uw_priority_queue_v1
        limit
            %s
        """, (quota.remaining, ))

    if not next_in_queue:
        LOG.info(f"Nothing in the queue")
        return

    LOG.info(
        f"Fetched {len(next_in_queue):,} entries from the head of the queue")

    # Use the REDCap URL and project id from the queue rather than hardcoding.
    buckets = bucket(next_in_queue, lambda q:
                     (q.redcap_url, q.redcap_project_id))
    queued_by_project = {key: list(buckets[key]) for key in buckets}

    offer_count = 0

    for (url, project_id), queued in queued_by_project.items():
        offers = [offer(q) for q in queued]

        LOG.info(
            f"Making {len(offers):,} offers for {url} project {project_id} {'(dry run)' if dry_run else ''}"
        )

        if log_offers:
            dump_ndjson(offers)

        # Token will automatically come from the environment.  If we're doing a
        # dry run, then Project will make sure we update_records() doesn't
        # actually update records.
        project = Project(url, project_id, dry_run=dry_run)

        batches = list(chunked(offers, REDCAP_BATCH_SIZE))

        for i, batch in enumerate(batches, 1):
            LOG.info(
                f"Updating REDCap record batch {i:,}/{len(batches):,} of size {len(batch):,}"
            )
            offer_count += project.update_records(batch)

        # Insert synthetic DETs into our receiving table to trigger a new
        # import.  This helps complete the roundtrip data update for the REDCap
        # records we just updated since API imports don't trigger natural DETs.
        insert_dets(db, project, offers)

    # XXX TODO: Maybe also update an internal testing_offered flag (in
    # encounter.details?) to avoid delay of roundtrip thru REDCap?  If we don't
    # do this, then worst case we try to offer testing to the same records more
    # than once?  This is probably more complicated than we want to deal with
    # on the first iteration and involves cooperation between this command and
    # the priority queue definition.  I think timing will work out most of the
    # time and the worst case is we offer less testing than we can handle
    # (better than offering more!).  If it happens commonly, we can address
    # later.
    #   -trs, 17 Sept & 13 Oct 2020

    updated_quota = db.fetch_row(
        """
        update
            operations.test_quota
        set
            used = used + %s
        where
            (name, timespan) = (%s, %s)
        returning
            name,
            timespan,
            max,
            used,
            max - used as remaining
        """, (offer_count, quota.name, quota.timespan))

    LOG.info(
        f"Quota for {updated_quota.name} during {updated_quota.timespan} "
        f"is now {updated_quota.remaining:,} = {updated_quota.max:,} - {updated_quota.used:,} (remaining = max - used)"
    )

示例#23

0

显示文件

def split_by_function(S, context):
    s = bucket(S, key=lambda d: d.Lfunc())
    return flatten([FindIntegrableConditions(s[k], context) for k in s])

示例#24

0

显示文件

文件： gdpr.py 项目： karlicoss/HPI

def _entities() -> Iterator[Res[Union[User, _Message]]]:
    from ..core.kompress import ZipPath
    last = ZipPath(max(inputs()))
    # TODO make sure it works both with plan directory
    # idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here
    # e.g. possible options are:
    # - if packed things are detected, just return ZipPath
    # - if packed things are detected, possibly return match_structure_wrapper
    #   it might be a bit tricky because it's a context manager -- who will recycle it?
    # - if unpacked things are detected, just return the dir as it is
    #   (possibly detect them via match_structure? e.g. what if we have a bunch of unpacked dirs)
    #
    # I guess the goal for core.structure module was to pass it to other functions that expect unpacked structure
    # https://github.com/karlicoss/HPI/pull/175
    # whereas here I don't need it..
    # so for now will just implement this adhoc thing and think about properly fixing later

    j = json.loads((last / 'account_information/personal_information.json').read_text())
    [profile] = j['profile_user']
    pdata = profile['string_map_data']
    username = pdata['Username']['value']
    full_name = _decode(pdata['Name']['value'])

    # just make up something :shrug:
    self_id = username
    self_user = User(
        id=self_id,
        username=username,
        full_name=full_name,
    )
    yield self_user

    files = list(last.rglob('messages/inbox/*/message_*.json'))
    assert len(files) > 0, last

    buckets = bucket(files, key=lambda p: p.parts[-2])
    file_map = {k: list(buckets[k]) for k in buckets}

    for fname, ffiles in file_map.items():
        for ffile in sorted(ffiles, key=lambda p: int(p.stem.split('_')[-1])):
            j = json.loads(ffile.read_text())

            id_len = 10
            # NOTE: no match in android db/api responses?
            other_id = fname[-id_len:]
            # NOTE: no match in android db?
            other_username = fname[:-id_len - 1]
            other_full_name = _decode(j['title'])
            yield User(
                id=other_id,
                username=other_username,
                full_name=other_full_name,
            )

            # todo "thread_type": "Regular" ?
            for jm in j['messages']:
                # todo defensive?
                try:
                    mtype = jm['type']  # Generic/Share?
                    content = None
                    if 'content' in jm:
                        content = _decode(jm['content'])
                    else:
                        share  = jm.get('share')
                        photos = jm.get('photos')
                        videos = jm.get('videos')
                        cc = share or photos or videos
                        if cc is not None:
                            content = str(cc)
                    assert content is not None, jm
                    timestamp_ms = jm['timestamp_ms']
                    sender_name = _decode(jm['sender_name'])

                    user_id = other_id if sender_name == other_full_name else self_id
                    yield _Message(
                        created=datetime.fromtimestamp(timestamp_ms / 1000),
                        text=content,
                        user_id=user_id,
                        thread_id=fname, # meh.. but no better way?
                    )
                except Exception as e:
                    # TODO sometimes messages are just missing content?? even with Generic type
                    yield e