Пример #1
0
def test_summarizer(config):
    pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))

    _ = format_summary(pps.summarize(config, pd.Series([1, 2, 3, 4, 5]), "Unsupported"))
    _ = format_summary(pps.summarize(config, pd.Series([1, 2, 3, 4, 5]), "Numeric"))
    _ = format_summary(
        pps.summarize(
            config,
            pd.Series(pd.date_range(start="1/1/2018", end="1/08/2018")),
            "DateTime",
        )
    )
    _ = format_summary(
        pps.summarize(config, pd.Series(["abc", "abc", "abba"]), "Categorical")
    )
    _ = format_summary(
        pps.summarize(config, pd.Series(["https://www.example.com"]), "URL")
    )
    _ = format_summary(
        pps.summarize(
            config,
            pd.Series(
                [
                    os.path.abspath(
                        base_path
                        + r"../../../src/pandas_profiling/model/typeset_does_not_exist.py"
                    )
                ]
            ),
            "Path",
        )
    )
    _ = format_summary(
        pps.summarize(
            config,
            pd.Series(
                [
                    os.path.abspath(
                        base_path + r"../../../src/pandas_profiling/model/typeset.py"
                    )
                ]
            ),
            "File",
        )
    )
    _ = format_summary(
        pps.summarize(
            config,
            pd.Series(
                [os.path.abspath(base_path + r"../../../docsrc/assets/lambda-labs.png")]
            ),
            "Image",
        )
    )
    _ = format_summary(
        pps.summarize(config, pd.Series([True, False, True, False, False]), "Boolean")
    )
Пример #2
0
    def _render_json(self):
        def encode_it(o):
            if isinstance(o, dict):
                return {encode_it(k): encode_it(v) for k, v in o.items()}
            else:
                if isinstance(o, (bool, int, float, str)):
                    return o
                elif isinstance(o, list):
                    return [encode_it(v) for v in o]
                elif isinstance(o, set):
                    return {encode_it(v) for v in o}
                elif isinstance(o, (pd.DataFrame, pd.Series)):
                    return o.to_json()
                elif isinstance(o, np.ndarray):
                    return encode_it(o.tolist())
                else:
                    return str(o)

        description = self.description_set

        disable_progress_bar = not config["progress_bar"].get(bool)
        with tqdm(total=1, desc="Render JSON",
                  disable=disable_progress_bar) as pbar:
            description = format_summary(description)
            description = encode_it(description)
            data = json.dumps(description, indent=4)
            pbar.update()
        return data
Пример #3
0
    def _render_json(self) -> str:
        def encode_it(o: Any) -> Any:
            if isinstance(o, dict):
                return {encode_it(k): encode_it(v) for k, v in o.items()}
            else:
                if isinstance(o, (bool, int, float, str)):
                    return o
                elif isinstance(o, list):
                    return [encode_it(v) for v in o]
                elif isinstance(o, set):
                    return {encode_it(v) for v in o}
                elif isinstance(o, (pd.DataFrame, pd.Series)):
                    return encode_it(o.to_dict(orient="records"))
                elif isinstance(o, np.ndarray):
                    return encode_it(o.tolist())
                elif isinstance(o, Sample):
                    return encode_it(o.dict())
                elif isinstance(o, np.generic):
                    return o.item()
                else:
                    return str(o)

        description = self.description_set

        with tqdm(total=1,
                  desc="Render JSON",
                  disable=not self.config.progress_bar) as pbar:
            description = format_summary(description)
            description = encode_it(description)
            data = json.dumps(description, indent=4)
            pbar.update()
        return data
Пример #4
0
def test_summarizer():
    pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet())

    _ = format_summary(pps.summarize(pd.Series([1, 2, 3, 4, 5]), Unsupported))
    _ = format_summary(pps.summarize(pd.Series([1, 2, 3, 4, 5]), Numeric))
    _ = format_summary(
        pps.summarize(
            pd.Series(pd.date_range(start="1/1/2018", end="1/08/2018")),
            DateTime))
    _ = format_summary(
        pps.summarize(pd.Series(["abc", "abc", "abba"]), Categorical))
    _ = format_summary(
        pps.summarize(pd.Series(["https://www.example.com"]), URL))
    _ = format_summary(
        pps.summarize(
            pd.Series([
                os.path.abspath(
                    base_path +
                    r"../../../src/pandas_profiling/model/typeset_does_not_exist.py"
                )
            ]),
            Path,
        ))
    _ = format_summary(
        pps.summarize(
            pd.Series([
                os.path.abspath(
                    base_path +
                    r"../../../src/pandas_profiling/model/typeset.py")
            ]),
            File,
        ))
    _ = format_summary(
        pps.summarize(
            pd.Series([
                os.path.abspath(base_path +
                                r"../../../docsrc/assets/lambda-labs.png")
            ]),
            Image,
        ))
    _ = format_summary(
        pps.summarize(pd.Series([True, False, True, False, False]), Boolean))