Python bucketize示例，boltons.iterutils.bucketize Python示例

示例#1

0

显示文件

文件： test_task_hyperparams.py 项目： allegroai/clearml-server

 def _param_dict_from_list(params: Sequence[dict]) -> dict:
     return {
         k: {v["name"]: v
             for v in values}
         for k, values in iterutils.bucketize(
             params, key=itemgetter("section")).items()
     }

示例#2

0

显示文件

文件： targets.py 项目： returntocorp/semgrep-action

    def get_dirty_paths_by_status(self) -> Dict[str, List[Path]]:
        """
        Returns all paths that have a git status, grouped by change type.

        These can be staged, unstaged, or untracked.
        """
        if self._dirty_paths_by_status is not None:
            return self._dirty_paths_by_status

        debug_echo("Initializing dirty paths")
        sub_out = subprocess.run(
            ["git", "status", "--porcelain", "-z", "':!.semgrep_logs/'"],
            timeout=GIT_SH_TIMEOUT,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        git_status_output = sub_out.stdout.decode("utf-8", errors="replace")
        debug_echo(f"Git status output: {git_status_output}")
        output = zsplit(git_status_output)
        debug_echo("finished getting dirty paths")

        dirty_paths = bucketize(
            output,
            key=lambda line: line[0],
            value_transform=lambda line: Path(line[3:]),
        )
        debug_echo(str(dirty_paths))

        # Cache dirty paths
        self._dirty_paths_by_status = dirty_paths
        return dirty_paths

示例#3

0

显示文件

    def _extract_results(data: dict, request_items: Sequence[StatItem],
                         split_by_variant: bool) -> dict:
        """
        Clean results returned from elastic search (remove "aggregations", "buckets" etc.),
        leave only aggregation types requested by the user and return a clean dictionary
        and return a "clean" dictionary of
        :param data: aggregation data retrieved from ES
        :param request_items: aggs types requested by the user
        :param split_by_variant: if False then aggregate by metric type, otherwise metric type + variant
        """
        if "aggregations" not in data:
            return {}

        items_by_key = bucketize(request_items, key=attrgetter("key"))
        aggs_per_metric = {
            key: [item.aggregation for item in items]
            for key, items in items_by_key.items()
        }

        def extract_date_stats(date: dict, metric_key) -> dict:
            return {
                "date": date["key"],
                "count": date["doc_count"],
                **{
                    agg: date[agg]["value"]
                    for agg in aggs_per_metric[metric_key]
                },
            }

        def extract_metric_results(metric_or_variant: dict,
                                   metric_key: str) -> Sequence[dict]:
            return [
                extract_date_stats(date, metric_key)
                for date in metric_or_variant["dates"]["buckets"]
                if date["doc_count"]
            ]

        def extract_variant_results(metric: dict) -> dict:
            metric_key = metric["key"]
            return {
                variant["key"]: extract_metric_results(variant, metric_key)
                for variant in metric["variants"]["buckets"]
            }

        def extract_worker_results(worker: dict) -> dict:
            return {
                metric["key"]:
                extract_variant_results(metric) if split_by_variant else
                extract_metric_results(metric, metric["key"])
                for metric in worker["metrics"]["buckets"]
            }

        return {
            worker["key"]: extract_worker_results(worker)
            for worker in data["aggregations"]["workers"]["buckets"]
        }

示例#4

0

显示文件

文件： hyperparams.py 项目： yjshen1982/trains-server

 def _db_dicts_from_list(
         cls, items: Sequence[HyperParamItem]) -> Dict[str, dict]:
     sections = iterutils.bucketize(items, key=attrgetter("section"))
     return {
         ParameterKeyEscaper.escape(section): {
             ParameterKeyEscaper.escape(param.name):
             ParamsItem(**param.to_struct())
             for param in params
         }
         for section, params in sections.items()
     }

示例#5

0

显示文件

    def get_dirty_paths_by_status(self) -> Dict[str, List[Path]]:
        """
        Returns all paths that have a git status, grouped by change type.

        These can be staged, unstaged, or untracked.
        """
        output = zsplit(git.status("--porcelain", "-z").stdout.decode())
        return bucketize(
            output,
            key=lambda line: line[0],
            value_transform=lambda line: Path(line[3:]),
        )

示例#6

0

显示文件

文件： workers.py 项目： yjshen1982/trains-server

    def _get_worker_metrics(stats: dict) -> Sequence[MetricStats]:
        """
        Convert the worker statistics data from the internal format of lists of structs
        to a more "compact" format for json transfer (arrays of dates and arrays of values)
        """
        # removed metrics that were requested but for some reason
        # do not exist in stats data
        metrics = [metric for metric in request.items if metric.key in stats]

        aggs_by_metric = bucketize(metrics,
                                   key=attrgetter("key"),
                                   value_transform=attrgetter("aggregation"))

        return list(
            itertools.chain.from_iterable(
                _get_metric_stats(metric, metric_stats, aggs_by_metric[metric])
                for metric, metric_stats in stats.items()))

示例#7

0

显示文件

    def _collect_lib_deps(self, sky_dep_paths):
        '''
        Fetch and reconcile all of the library dependencies
        (pip and conda dependencies of current service and sky dependencies)
        sky_dep_paths : {name: /path/to/sky.yaml}
        '''
        all_deps = collections.OrderedDict()
        overwrites = []

        def add_deps(dependencies, path):
            def get_deps(dependencies):
                for pkg_type, packages in dependencies.items():
                    if pkg_type == 'sky':
                        continue
                    for pkg in packages:
                        yield pkg, pkg_type

            with self.logger.info('pull_transitive_deps', config=path):
                for pkg, pkg_type in get_deps(dependencies):
                    pkg_name = pkg if isinstance(pkg, basestring) else pkg.pkg
                    if pkg_name in all_deps:
                        # TODO: something smart with overrides
                        overwrites.append((pkg_name, all_deps[pkg_name][2]))
                    all_deps[pkg_name] = (pkg_type, pkg, path)

        add_deps(META_REQUIREMENTS, '<SKY_REQUIREMENTS>')

        for path in sky_dep_paths.values():
            path += '/sky.yaml'
            with self.logger.info('parse_lib_config', path=path):
                lib_conf = config.parse(path)
                add_deps(lib_conf['library_deps'], path)

        add_deps(self.config['library_deps'], self.config_path)

        finalized_dependencies = iterutils.bucketize(all_deps.values(),
                                                     lambda e: e[0])
        if 'sky' in finalized_dependencies:
            del finalized_dependencies['sky']
        finalized_dependencies = {
            key: [e[1] for e in val]
            for key, val in finalized_dependencies.items()
        }

        return finalized_dependencies

示例#8

0

显示文件

    def get_list_field_query(cls, field: str, data: Sequence[Optional[str]]) -> Q:
        """
        Get a proper mongoengine Q object that represents an "or" query for the provided values
        with respect to the given list field, with support for "none of empty" in case a None value
        is included.

        - Exclusion can be specified by a leading "-" for each value (API versions <2.8)
            or by a preceding "__$not" value (operator)
        - AND can be achieved using a preceding "__$all" or "__$and" value (operator)
        """
        if not isinstance(data, (list, tuple)):
            raise MakeGetAllQueryError("expected list", field)

        # TODO: backwards compatibility only for older API versions
        helper = cls.ListFieldBucketHelper(legacy=True)
        actions = bucketize(
            data, key=helper.key, value_transform=helper.value_transform
        )

        allow_empty = None in actions.get("in", {})
        mongoengine_field = field.replace(".", "__")

        q = RegexQ()
        for action in filter(None, actions):
            q &= RegexQ(
                **{
                    f"{mongoengine_field}__{action}": list(
                        set(filter(None, actions[action]))
                    )
                }
            )

        if not allow_empty:
            return q

        return (
            q
            | Q(**{f"{mongoengine_field}__exists": False})
            | Q(**{mongoengine_field: []})
        )

示例#9

0

显示文件

文件： event_metrics.py 项目： shomratalon/trains-server

    def _split_metrics_by_max_aggs_count(
        self, task_metrics: Sequence[TaskMetric]
    ) -> Iterable[Sequence[TaskMetric]]:
        """
        Return task metrics in groups where amount of task metrics in each group
        is roughly limited by MAX_AGGS_ELEMENTS_COUNT. The split is done on metrics and
        variants while always preserving all their tasks in the same group
        """
        if len(task_metrics) < self.MAX_AGGS_ELEMENTS_COUNT:
            yield task_metrics
            return

        tm_grouped = bucketize(task_metrics, key=itemgetter(1, 2))
        groups = []
        for group in tm_grouped.values():
            groups.append(group)
            if sum(map(len, groups)) >= self.MAX_AGGS_ELEMENTS_COUNT:
                yield list(itertools.chain(*groups))
                groups = []

        if groups:
            yield list(itertools.chain(*groups))

        return

示例#10

0

显示文件

 def _update_variants_invalid_iterations(
     variants: Sequence[VariantScrollState], iterations: Sequence[dict]
 ) -> Sequence[dict]:
     """
     This code is currently not in used since the invalid iterations
     are calculated during MetricState initialization
     For variants that do not have recycle url marker set it from the
     first event
     For variants that do not have last_invalid_iteration set check if the
     recycle marker was reached on a certain iteration and set it to the
     corresponding iteration
     For variants that have a newly set last_invalid_iteration remove
     events from the invalid iterations
     Return the updated iterations list
     """
     variants_lookup = bucketize(variants, attrgetter("name"))
     for it in iterations:
         iteration = it["iter"]
         events_to_remove = []
         for event in it["events"]:
             variant = variants_lookup[event["variant"]][0]
             if (
                 variant.last_invalid_iteration
                 and variant.last_invalid_iteration >= iteration
             ):
                 events_to_remove.append(event)
                 continue
             event_url = event.get("url")
             if not variant.recycle_url_marker:
                 variant.recycle_url_marker = event_url
             elif variant.recycle_url_marker == event_url:
                 variant.last_invalid_iteration = iteration
                 events_to_remove.append(event)
         if events_to_remove:
             it["events"] = [ev for ev in it["events"] if ev not in events_to_remove]
     return [it for it in iterations if it["events"]]

示例#11

0

显示文件

文件： events.py 项目： ainoam/trains-server

            navigate_earlier=False,
            from_key_value=from_key_value,
            metric_variants=metric_variants,
            key=key,
        )
        if not res.events:
            break
        events.extend(res.events)
        from_key_value = str(events[-1][scalar_key.field])

    key = str(key)
    variants = {
        variant: extract_properties_to_lists(["value", scalar_key.field],
                                             events,
                                             target_keys=["y", key])
        for variant, events in bucketize(events, key=itemgetter(
            "variant")).items()
    }

    call.kpis["events"] = len(events)

    scroll = ScalarMetricsIterRawScroll(
        from_key_value=str(events[-1][scalar_key.field]) if events else None,
        total=total,
        request=request,
    )

    return make_response(
        returned=len(events),
        total=total,
        scroll_id=scroll.get_scroll_id(),
        variants=variants,