Exemplo n.º 1
0
def test_datamation_groupby_multiple():
    df = small_salary().df
    df = DatamationFrame(df)

    # Group by Degree, Work
    mean = df.groupby(['Degree', 'Work']).mean()

    assert "groupby" in mean.operations
    assert "mean" in mean.operations

    assert len(mean.states) == 2
    assert df.equals(mean.states[0])

    assert mean.Salary.Masters.Academia == 84.0298831968801
    assert mean.Salary.Masters.Industry == 91.22576155606282
    assert mean.Salary.PhD.Academia == 85.55796571969728
    assert mean.Salary.PhD.Industry == 93.08335885824636

    # sum
    sum = df.groupby(['Degree', 'Work']).sum()

    assert "groupby" in sum.operations
    assert "sum" in sum.operations

    assert len(sum.states) == 2
    assert df.equals(sum.states[0])

    assert sum.Salary.Masters.Academia == 840.2988319688011
    assert sum.Salary.Masters.Industry == 5655.997216475895
    assert sum.Salary.PhD.Academia == 1540.043382954551
    assert sum.Salary.PhD.Industry == 930.8335885824636

    # product
    product = df.groupby(['Degree', 'Work']).prod()

    assert "groupby" in product.operations
    assert "product" in product.operations

    assert len(product.states) == 2
    assert df.equals(product.states[0])

    assert product.Salary.Masters.Academia == 1.753532557780977e+19
    assert product.Salary.Masters.Industry == 3.3602152421057308e+121
    assert product.Salary.PhD.Academia == 6.027761935702164e+34
    assert product.Salary.PhD.Industry == 4.8818435443657834e+19

    # Group by species, island, sex
    df = DatamationFrame(load_penguins())
    mean = df.groupby(['species', 'island', 'sex']).mean()

    assert "groupby" in mean.operations
    assert "mean" in mean.operations

    assert len(mean.states) == 2
    assert df.equals(mean.states[0])

    assert mean.bill_length_mm.Adelie.Biscoe.male == approx(40.5909090909091)
    assert mean.bill_length_mm.Adelie.Biscoe.female == approx(
        37.35909090909092)
def test_datamation_frame_groupby():
    df = small_salary().df
    df = DatamationFrame(df)

    grouped = df.groupby('Work')

    assert 'groupby' in grouped.operations
    assert df.equals(grouped.states[0])
Exemplo n.º 3
0
def test_small_salary(capsys):
    df = small_salary().df
    print(df.groupby('Work').mean())
    captured = capsys.readouterr()

    assert "Work" in captured.out
    assert "Salary" in captured.out
    assert "Academia" in captured.out
    assert "Industry" in captured.out
    assert "85.012222" in captured.out
    assert "91.483761" in captured.out
def test_datamation_frame_datamation_sanddance():
    df = small_salary().df
    df = DatamationFrame(df)

    datamation = df.groupby('Work').mean().datamation_sanddance()

    assert len(datamation.states) == 2
    assert len(datamation.operations) == 2

    assert df.equals(datamation.states[0])
    assert isinstance(datamation.states[1], DatamationGroupBy)

    assert datamation.operations[0] == 'groupby'
    assert datamation.operations[1] == 'mean'

    assert isinstance(datamation.output, DatamationFrame)

    assert datamation.output.Salary.Academia == 85.01222196154829
    assert datamation.output.Salary.Industry == 91.48376118136609

    assert 'Salary' in str(datamation)
def test_datamation_frame_specs():
    df = small_salary().df
    df = DatamationFrame(df)

    # Mean
    # Group by Degree
    specs = df.groupby('Degree').mean().specs()
    script_dir = os.path.dirname(__file__)
    with open(os.path.join(script_dir, '../../inst/specs/raw_spec.json'),
              'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Group by Work
    specs = df.groupby('Work').mean().specs()
    script_dir = os.path.dirname(__file__)
    with open(os.path.join(script_dir, '../../inst/specs/groupby_work.json'),
              'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Group by Degree, Work
    specs = df.groupby(['Degree', 'Work']).mean().specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/groupby_degree_work.json'),
            'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Group by Work, Degree
    specs = df.groupby(['Work', 'Degree']).mean().specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/groupby_work_degree.json'),
            'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    #Min
    # Group by Degree Min
    specs = df.groupby('Degree').min('Salary').specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(
                script_dir,
                '../../sandbox/custom_animations/custom-animations-min-R.json'
            ), 'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Group by Degree, Work Min
    specs = df.groupby(['Degree', 'Work']).min('Salary').specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/min_specs_two_columns.json'),
            'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    #Max
    # Group by Degree Max
    specs = df.groupby('Degree').max('Salary').specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(
                script_dir,
                '../../sandbox/custom_animations/custom-animations-max-R.json'
            ), 'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Group by Degree, Work Max
    specs = df.groupby(['Degree', 'Work']).max('Salary').specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/max_specs_two_columns.json'),
            'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Sum
    # Sum of Group by Degree
    specs = df.groupby('Degree').sum().specs()
    script_dir = os.path.dirname(__file__)
    with open(os.path.join(script_dir, '../../inst/specs/sum_specs.json'),
              'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Sum of Group by Degree, Work
    specs = df.groupby(['Degree', 'Work']).sum().specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/sum_specs_two_columns.json'),
            'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Product
    # Product of Group by Degree
    specs = df.groupby('Degree').prod().specs()
    script_dir = os.path.dirname(__file__)
    with open(os.path.join(script_dir, '../../inst/specs/prod_specs.json'),
              'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    # Product of Group by Degree, Work
    specs = df.groupby(['Degree', 'Work']).prod().specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/prod_specs_two_columns.json'),
            'r') as specs_file:
        compare_specs_with_file(specs, specs_file)

    #Count group by Degree
    count_spec = df.groupby('Degree').count('Salary').specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/count_specs_one_column.json'),
            'r') as specs_file:
        compare_specs_with_file(count_spec, specs_file)

    #Count group by Degree, Work
    count_spec = df.groupby(['Degree', 'Work']).count('Salary').specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/count_specs_two_columns.json'),
            'r') as specs_file:
        compare_specs_with_file(count_spec, specs_file)

    #Quantile group by Degree
    quant_spec = df.groupby('Degree').quantile('Salary', 0.01).specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(
                script_dir,
                '../../sandbox/custom_animations/custom-animations-quantile-R.json'
            ), 'r') as specs_file:
        compare_specs_with_file(quant_spec, specs_file)

    #Quantile group by Degree, Work
    quant_spec = df.groupby(['Degree', 'Work']).quantile('Salary',
                                                         0.01).specs()
    script_dir = os.path.dirname(__file__)
    with open(
            os.path.join(script_dir,
                         '../../inst/specs/quantile_specs_two_columns.json'),
            'r') as specs_file:
        compare_specs_with_file(quant_spec, specs_file)
Exemplo n.º 6
0
def test_datamation_groupby():
    df = small_salary().df
    df = DatamationFrame(df)

    # Group by Degree
    mean = df.groupby('Degree').mean()

    assert "groupby" in mean.operations
    assert "mean" in mean.operations

    assert len(mean.states) == 2
    assert df.equals(mean.states[0])

    assert mean.Salary.Masters == 90.22633400617633
    assert mean.Salary.PhD == 88.24560612632195

    # median
    median = df.groupby('Degree').median()

    assert "groupby" in median.operations
    assert "median" in median.operations

    assert len(median.states) == 2
    assert df.equals(median.states[0])

    assert median.Salary.Masters == 91.13211765489541
    assert median.Salary.PhD == 86.40630845562555

    # sum
    sum = df.groupby('Degree').sum()

    assert "groupby" in sum.operations
    assert "sum" in sum.operations

    assert len(sum.states) == 2
    assert df.equals(sum.states[0])

    assert sum.Salary.Masters == 6496.296048444696
    assert sum.Salary.PhD == 2470.8769715370145

    # product
    product = df.groupby('Degree').prod()

    assert "groupby" in product.operations
    assert "product" in product.operations

    assert len(product.states) == 2
    assert df.equals(product.states[0])

    assert product.Salary.PhD == 2.9426590692781414e+54
    assert product.Salary.Masters == 5.892246828184284e+140

    # Group by Work
    mean = df.groupby('Work').mean()

    assert "groupby" in mean.operations
    assert "mean" in mean.operations

    assert len(mean.states) == 2
    assert df.equals(mean.states[0])

    assert mean.Salary.Academia == 85.01222196154829
    assert mean.Salary.Industry == 91.48376118136609