Пример #1
0
 def resolve_value(self, info, keys=None, k=None, last=None, window=None):
     path = join(Args.logdir, self.id[1:])
     realpath(path)
     _ = read_dataframe(path)
     if keys:
         df = _[keys].dropna()
         return {k: df[k].values.tolist() for k in keys}
     else:
         df = _.dropna()
         return {k: v.values.tolist() for k, v in df.items()}
Пример #2
0
 def resolve_keys(self, info):
     df = read_dataframe(join(Args.logdir, self.id[1:]))
     keys = df.dropna().keys()
     return list(keys)
Пример #3
0
def get_series(
        metrics_files=tuple(),
        prefix=None,
        head=None,
        tail=None,
        x_low=None,
        x_high=None,
        x_edge=None,  # OneOf('start', 'after', 'mid', 'mode')
        k=None,
        x_align=None,  # OneOf(int, 'left', 'right')
        x_key=None,
        y_key=None,
        y_keys=None,
        label=None):
    warning = None
    assert not y_key or not y_keys, "yKey and yKeys can not be trueful at the same time"
    assert y_key or y_keys, "yKey and yKeys can not be both falseful."
    assert head is None or tail is None, "head and tail can not be trueful at the same time"
    if not prefix:
        for id in metrics_files:
            assert isabs(
                id
            ), f"metricFile need to be absolute path is prefix is {prefix}. It is {id} instead."

    ids = [join(prefix or "", id) for id in metrics_files]
    dfs = [read_dataframe(join(Args.logdir, _id[1:])) for _id in ids]

    y_keys = y_keys or [y_key]
    join_keys = [k for k in {x_key, *y_keys} if k is not None]

    dataframes = []
    for df in dfs:
        if df is None:
            continue
        if x_key is not None:
            df.set_index(x_key)
            if x_align is None:
                pass
            elif x_align == "start":  # todo: this needs to be part of the join
                df[x_key] -= df[x_key][0]
            elif x_align == "end":
                df[x_key] -= df[x_key][-1]
            else:
                df[x_key] -= x_align
        else:
            df = df[y_keys]
            df['index'] = df.index
            df.set_index('index')

        # todo: maybe apply tail and head *after* dropna??
        if tail is not None:
            df = df.tail(tail)
        if head is not None:
            df = df.head(head)
        inds = True
        if x_low is not None:
            inds &= df[x_key or "index"] >= x_low
            print("x_low >>>", inds)
        if x_high is not None:
            inds &= df[x_key or "index"] <= x_high
            print("x_high >>>", inds)
        if inds is not True:
            df = df.loc[inds]

        # todo: only dropna if we are not using ranges. <need to test>
        try:
            if head is None and tail is None:
                dataframes.append(df[join_keys].dropna())
            else:
                dataframes.append(df[join_keys])
        except KeyError as e:
            raise KeyError(
                f"{join_keys} contain keys that is not in the dataframe. "
                f"Keys available include {df.keys()}") from e

    if not dataframes:  # No dataframe, return `null`.
        return None

    all = pd.concat(dataframes)

    if x_key:
        all = all.set_index(x_key)

    all.rank(method='first')

    if k is not None:
        bins = pd.qcut(all.index, k, duplicates='drop')
        grouped = all.groupby(bins)
        # df
    else:
        grouped = all.groupby(level=0)

    # treat all numbers in bin as equal. For raw (not averaged, or averaged)
    grouped[y_keys].agg(['count', 'mean', 'min', 'max'])
    df = grouped[y_keys].describe(
        percentiles=[0.25, 0.75, 0.5, 0.05, 0.95]).reset_index()

    if k is not None:
        if x_edge == "right" or x_edge is None:
            df['__x'] = df['index'].apply(lambda r: r.right)
        elif x_edge == "left":
            df['__x'] = df['index'].apply(lambda r: r.left)
        elif x_edge == "mean":
            df['__x'] = df['index'].apply(lambda r: 0.5 * (r.left + r.right))
        # todo: use mode of each bin
        else:
            raise KeyError(
                f"x_edge {[x_edge]} should be OneOf['start', 'after', 'mid', 'mode']"
            )
    else:
        df['__x'] = df.index

    return Series(metrics_files,
                  _df=df.sort_values(by="__x"),
                  metrics_files=metrics_files,
                  prefix=prefix,
                  x_key=x_key or "index",
                  y_key=y_key,
                  y_keys=y_keys,
                  label=label,
                  warning=warning)