Python Clumper示例，clumper.Clumper Python示例

示例#1

0

显示文件

def test_write_missing_keys(tmp_path):
    """Test that function works with missing keys."""
    missing_key = [{"a": "1", "b": "2"}, {"c": "3"}]
    path = temp_file(tmp_path)
    Clumper(missing_key).write_csv(path)
    reader = Clumper.read_csv(path)
    assert Clumper(missing_key).equals(reader)

示例#2

0

显示文件

文件： test_grouping_utils.py 项目： afiqmuzaffar/clumper

def test_group_combos_one_group():
    prod = it.product([1, 2, 3, 4, 5], [-0.1, 0.0, 0.1], [True, False], ["a", "b"])
    clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod])
    res = clump.group_by("a")._group_combos()
    assert list(sorted(r["a"] for r in res)) == list(sorted([True, False]))
    res = clump.group_by("b")._group_combos()
    assert list(sorted(r["b"] for r in res)) == list(sorted(["a", "b"]))

示例#3

0

显示文件

文件： test_explode.py 项目： afiqmuzaffar/clumper

def test_correct_keys_kept():
    """
    Make sure that we keep the correct names of the keys.
    """
    data = [{
        "a": 1,
        "b": 1,
        "items": [1, 2]
    }, {
        "a": 2,
        "b": 1,
        "items": [3, 4]
    }]
    assert set(Clumper(data).explode("items").keys()) == {"items", "a", "b"}
    assert set(Clumper(data).explode("items", foobar="items").keys()) == {
        "items",
        "a",
        "b",
        "foobar",
    }
    assert set(
        Clumper(data).explode(items="items").keys()) == {"items", "a", "b"}
    assert set(
        Clumper(data).explode(item="items").keys()) == {"item", "a", "b"}
    assert set(Clumper(data).explode(a="items").keys()) == {"a", "b"}

示例#4

0

显示文件

文件： test_write_json.py 项目： afiqmuzaffar/clumper

def test_local_read_write_same_lines(tmp_path, lines, expected):
    """Test that an error is raised if the locally written files has the same number of lines as expected"""
    path = str(tmp_path / "pokemon_copy.json")
    writer = Clumper.read_json("tests/data/pokemon.json", lines)
    writer.write_json(path)
    reader = Clumper.read_json(path)
    assert len(reader) == len(writer)

示例#5

0

显示文件

文件： test_write_json.py 项目： afiqmuzaffar/clumper

def test_local_read_write_content_same(tmp_path):
    """Test that an error is raised if the written JSON file is not the same as what is read locally"""
    path = str(tmp_path / "pokemon_copy.json")
    writer = Clumper.read_json("tests/data/pokemon.json")
    writer.write_json(path)
    reader = Clumper.read_json(path)
    assert reader.collect() == writer.collect()

示例#6

0

显示文件

def test_length_list():
    """
    Basic tests to ensure that len() works as expected.
    """
    assert len(Clumper([])) == 0
    assert len(Clumper([{"a": 1}])) == 1
    assert len(Clumper([{"a": 1} for i in range(100)])) == 100

示例#7

0

显示文件

文件： __main__.py 项目： koaning/pytest-duration-insights

def explore(
        report_path: str = typer.Argument(...,
                                          help="Report log to visualise."),
        no_trim: bool = typer.Option(
            False, is_flag=True,
            help="Flag to not reduce parametrized calls."),
        port: int = typer.Option(8002, help="Port to serve the dashboard on."),
):
    """Starts up a pytest reportlog dashboard."""
    # Use clumper to arrange the data appropriately.
    res = (Clumper.read_jsonl(report_path).pipe(parse_test_info,
                                                trim=not no_trim).pipe(
                                                    to_hierarchy_dict,
                                                    hierarchy_col="hierarchy",
                                                    value_col="duration"))

    # We server everything as static files from a temporary folder
    tmpdir = tempfile.mkdtemp()
    orig = resource_filename("pytest_duration_insights", "static")
    shutil.copytree(src=orig, dst=Path(tmpdir) / "static")
    Clumper(res,
            listify=False).write_json(Path(tmpdir) / "static" / "data.json")
    tree_res = Node.from_dict(res).to_value_dict()
    Clumper(tree_res, listify=False).write_json(
        Path(tmpdir) / "static" / "treedata.json")

    # This a bit hacky but does the job
    subprocess.run([
        "python",
        "-m",
        "http.server",
        str(port),
        "--directory",
        str(Path(tmpdir) / "static"),
    ])

示例#8

0

显示文件

文件： test_write_jsonl.py 项目： afiqmuzaffar/clumper

def test_local_read_write_content_same(tmp_path):
    """Test that the written JSONL file is the same as what is read locally"""
    path = str(tmp_path / "cards_copy.jsonl")
    writer = Clumper.read_jsonl("tests/data/cards.jsonl")
    writer.write_jsonl(path)
    reader = Clumper.read_jsonl(path)
    assert reader.collect() == writer.collect()

示例#9

0

显示文件

def test_with_groups(n):
    """
    We should never count more rows than we have in the original data.
    """
    prod = it.product(range(1, n + 1), [-0.1, 0.0, 0.1], [True, False], ["a", "b"])
    clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod])
    length = len(clump)
    n_items = clump.group_by("a", "b").agg(r=("r", "sum")).sum("r")
    assert n_items == length

示例#10

0

显示文件

文件： test_grouping_utils.py 项目： afiqmuzaffar/clumper

def test_mutate_group_aware():
    """
    Does `row_number` reset during mutate if a group is active?
    """
    data = [{"bool": True if i % 2 else False} for i in range(20)]
    clump = Clumper(data).group_by("bool").mutate(r=row_number())
    assert len(clump) == len(data)
    assert clump.groups == ("bool",)
    assert set(clump.unique("r")) == {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}

示例#11

0

显示文件

文件： test_write_jsonl.py 项目： afiqmuzaffar/clumper

def test_local_read_write_same_lines(tmp_path, lines, expected):
    """Test that the locally written files has the same number of lines as expected"""
    path = tmp_path / "cards_copy.jsonl"

    writer = Clumper.read_jsonl("tests/data/cards.jsonl", lines)
    writer.write_jsonl(path)

    reader = Clumper.read_jsonl(str(path))
    assert len(reader) == len(writer)

示例#12

0

显示文件

def test_read_csv_nulls():
    """Test that null rows are discarded."""
    assert Clumper.read_csv("tests/data/null.csv").equals(
        Clumper([{
            "a": "1",
            "b": "2"
        }, {
            "a": "2",
            "c": "4"
        }]))

示例#13

0

显示文件

def test_no_mutate_query(pokemon):
    """
    This was an error that happened in the past.
    """
    r1 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate(
        type=lambda d: d["type"][0]))

    r2 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate(
        type=lambda d: d["type"][0]))

    assert len(r1) == len(r2)

示例#14

0

显示文件

文件： test_multifile.py 项目： synapticarbors/clumper

def test_read_multiple_yaml(tmp_path, copies):
    """
    Test that csv files can be read given a pattern
    """

    writer = Clumper.read_yaml("tests/data/demo-nested.yml")

    for i in range(copies):
        writer.write_yaml(tmp_path / f"demo-nested-{i}.yml")

    reader = Clumper.read_yaml(str(tmp_path / "*.yml"))
    assert len(reader) == copies * len(writer)

示例#15

0

显示文件

文件： test_multifile.py 项目： synapticarbors/clumper

def test_read_multiple_csv(tmp_path, copies):
    """
    Test that csv files can be read given a pattern
    """

    writer = Clumper.read_csv("tests/data/monopoly.csv")

    for i in range(copies):
        writer.write_csv(tmp_path / f"monopoly_copy_{i}.csv")

    reader = Clumper.read_csv(str(tmp_path / "*.csv"))
    assert len(reader) == copies * len(writer)

示例#16

0

显示文件

文件： test_multifile.py 项目： synapticarbors/clumper

def test_read_multiple_json(tmp_path, copies):
    """
    Test that json files can be read given a pattern
    """

    writer = Clumper.read_json("tests/data/pokemon.json")

    for i in range(copies):
        writer.write_json(tmp_path / f"pokemon_copy_{i}.json")

    reader = Clumper.read_json(str(tmp_path / "*.json"))
    assert len(reader) == copies * len(writer)

示例#17

0

显示文件

文件： test_return_values.py 项目： afiqmuzaffar/clumper

def test_case_zero():
    empty_c = Clumper([])
    assert empty_c.mean("i") is None
    assert empty_c.max("i") is None
    assert empty_c.min("i") is None
    assert empty_c.sum("i") is None
    assert empty_c.unique("i") == []
    assert empty_c.n_unique("i") == 0

示例#18

0

显示文件

文件： test_multifile.py 项目： afiqmuzaffar/clumper

def test_read_multiple_jsonl(tmp_path, copies):
    """
    Test that jsonl files can be read given a pattern
    """

    writer = Clumper.read_jsonl("tests/data/cards.jsonl")

    for i in range(copies):
        writer.write_jsonl(tmp_path / f"cards_copy_{i}.jsonl")

    reader = Clumper.read_jsonl(str(tmp_path / "*.jsonl"))
    assert len(reader) == copies * len(writer)

    reader = Clumper.read_jsonl(list(Path(tmp_path).glob("*.jsonl")))
    assert len(reader) == copies * len(writer)

示例#19

0

显示文件

def history(
    n: int = typer.Option(10, help="How many rows should the table show."),
    only_failures: bool = typer.Option(False,
                                       is_flag=True,
                                       help="Only show failures."),
    date: str = typer.Option(None,
                             is_flag=True,
                             help="Only show specific date."),
    name: str = typer.Option(None,
                             is_flag=True,
                             help="Only show jobs with specific name."),
):
    """Shows a table with job status."""
    clump = Clumper.read_jsonl(heartbeat_path()).sort(lambda _: _["start"],
                                                      reverse=True)
    if only_failures:
        clump = clump.keep(lambda _: _["status"] != "success")
    if name:
        clump = clump.keep(lambda _: name in _["name"])
    if date:
        clump = clump.keep(lambda _: date in _["start"])
    table = Table(title=None)
    table.add_column("status")
    table.add_column("date")
    table.add_column("name")
    table.add_column("logfile")
    for d in clump.head(n).collect():
        table.add_row(
            f"[{'red' if d['status'] == 'fail' else 'green'}]{d['status']}[/]",
            d["start"],
            d["name"],
            d["logpath"],
        )
    print(table)

示例#20

0

显示文件

文件： conftest.py 项目： afiqmuzaffar/clumper

def base_clumper():
    data = [{
        "data": [i for _ in range(2)],
        "i": i,
        "c": c
    } for i, c in enumerate("abcdefghijklmnopqrstuvwxyz")]
    return Clumper(data)

示例#21

0

显示文件

文件： test_impute.py 项目： afiqmuzaffar/clumper

def test_correct_values_value():
    list_dicts = [
        {
            "a": 1,
            "b": 2
        },
        {
            "a": 2,
            "b": 3
        },
        {
            "a": 3
        },
        {
            "a": 4,
            "b": 6
        },
        {
            "a": 5
        },
    ]

    res = (Clumper(list_dicts).mutate(
        b=impute("b", strategy="value", fallback=0)).collect())

    assert [d["b"] for d in res] == [2, 3, 0, 6, 0]

示例#22

0

显示文件

文件： test_impute.py 项目： afiqmuzaffar/clumper

def test_correct_values_prev():
    list_dicts = [
        {
            "a": 1,
            "b": 2
        },
        {
            "a": 2,
            "b": 3
        },
        {
            "a": 3
        },
        {
            "a": 4,
            "b": 6
        },
        {
            "a": 5
        },
    ]

    res = Clumper(list_dicts).mutate(b=impute("b", strategy="prev")).collect()

    assert [d["b"] for d in res] == [2, 3, 3, 6, 6]

示例#23

0

显示文件

def test_mutability_insurance():
    """
    We don't want to change the original data going in. Ever.
    """
    data = [{"a": 1}, {"b": 2}]
    blob = Clumper(data).blob
    assert id(data) != id(blob)

示例#24

0

显示文件

def test_multi_file_add_path_many():
    """We check the path again, but now for data that is flat."""
    c = Clumper.read_yaml("tests/data/demo-flat-*.yaml", add_path=True)
    paths = c.map(lambda d: d["read_path"]).collect()
    assert set(paths) == {
        "tests/data/demo-flat-1.yaml", "tests/data/demo-flat-2.yaml"
    }

示例#25

0

显示文件

def test_iteration():
    """
    Just make sure that we can iterate.
    """
    data = [1, 2, 3, 4, 5]
    blob = [i for i in Clumper(data)]
    assert data == blob

示例#26

0

显示文件

def notifications(request):
    c = request.course

    if not c:
        return HttpResponseRedirect('/accounts/login/')

    user = request.user
    if user.is_staff and request.GET.has_key('as'):
        user = get_object_or_404(User, username=request.GET['as'])

    class_feed = []

    #personal feed
    my_assets = {}
    for n in SherdNote.objects.filter(author=user, asset__course=c):
        my_assets[str(n.asset_id)] = 1
    for comment in Comment.objects.filter(user=user):
        if c == getattr(comment.content_object, 'course', None):
            my_assets[str(comment.object_pk)] = 1
    my_discussions = [
        d.collaboration_id for d in DiscussionIndex.objects.filter(
            participant=user,
            collaboration__context=request.collaboration_context)
    ]

    my_feed = Clumper(
        Comment.objects.filter(
            content_type=ContentType.objects.get_for_model(Asset),
            object_pk__in=my_assets.keys()).order_by(
                '-submit_date'),  #so the newest ones show up
        SherdNote.objects.filter(
            asset__in=my_assets.keys(),
            #no global annotations
            #warning: if we include global annotations
            #we need to stop it from autocreating one on-view
            #of the asset somehow
            range1__isnull=False).order_by('-added'),
        Project.objects.filter(Q(participants=user.pk) | Q(author=user.pk),
                               course=c).order_by('-modified'),
        DiscussionIndex.with_permission(
            request,
            DiscussionIndex.objects.filter(
                Q(Q(asset__in=my_assets.keys())
                  | Q(collaboration__in=my_discussions)
                  | Q(collaboration__user=request.user)
                  | Q(collaboration__group__user=request.user),
                  participant__isnull=False)).order_by('-modified')),
    )

    tags = Tag.objects.usage_for_queryset(
        SherdNote.objects.filter(asset__course=c), counts=True)

    #only top 10 tags
    tag_cloud = calculate_cloud(
        sorted(tags, lambda t, w: cmp(w.count, t.count))[:10])

    return {
        'my_feed': my_feed,
        'tag_cloud': tag_cloud,
    }

示例#27

0

显示文件

文件： test_explode.py 项目： afiqmuzaffar/clumper

def test_explode_many(n, k):
    """
    Ensure we do cartesian product elegantly with one nested set.
    """
    data = [{"i": i, "nested": [j for j in range(k)]} for i in range(n)]
    c = Clumper(data).explode(j="nested").count("j")
    assert c == n * k

示例#28

0

显示文件

文件： test_implode.py 项目： afiqmuzaffar/clumper

def test_not_keep_correct_keys():
    data = [
        {
            "a": 1,
            "b": 1,
            "item": 1
        },
        {
            "a": 1,
            "b": 1,
            "item": 2
        },
        {
            "a": 1,
            "b": 1,
            "item": 1
        },
        {
            "a": 2,
            "b": 2,
            "c": 2,
            "item": 3
        },
        {
            "a": 2,
            "b": 2,
            "c": 2,
            "item": 2
        },
    ]

    keys = Clumper(data).group_by("a", "b").implode(items="item").keys()
    assert set(keys) == {"a", "b", "c", "items"}

示例#29

0

显示文件

def test_keep_does_not_mutate():
    """
    The original data must not be changed. This happened originally.
    """
    data = [{"a": 1}, {"a": 2}]
    c = Clumper(data).keep(lambda d: d["a"] == 1)
    assert len(data) == 2
    assert len(c) == 1

示例#30

0

显示文件

def test_paths_are_added():
    """When add_path=True we need to add the path information."""
    paths = (Clumper.read_jsonl(
        "tests/data/*.jsonl",
        add_path=True).map(lambda d: d["read_path"]).collect())
    assert set(paths) == {
        "tests/data/cards.jsonl", "tests/data/cards-more.jsonl"
    }