示例#1
0
def test_source_without_identifer_raises_if_data_changes(cases):
    source = Source()
    source.stack(
        cases[0],
        markdown_to_df("""
            | date       | season      |
            | -          | -           |
            | 2001-09-08 | Fall 2001   |
            | 2002-01-09 | Spring 2002 |
        """),
    )

    with pytest.raises(dtspec.core.CannotStackStaticSourceError) as excinfo:
        source.stack(
            cases[0],
            markdown_to_df("""
                | date       | season      |
                | -          | -           |
                | 2002-06-01 | Summer 2002 |
                | 2002-09-07 | Fall 2002   |
            """),
        )

    # Error message contains a readable case name
    assert "TestCase1" in str(excinfo.value).split("\n")[0]
示例#2
0
def test_source_without_identifier_generates_data(cases):
    table = """
        | date       | season      |
        | -          | -           |
        | 2001-09-08 | Fall 2001   |
        | 2002-01-09 | Spring 2002 |
    """

    source = Source()
    source.stack(cases[0], markdown_to_df(table))

    actual = source.data
    expected = markdown_to_df(table)
    assert_frame_equal(actual, expected)
示例#3
0
def test_inheritance_w_multiple_composite_sources(sources):
    base_factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | first_name |
                | -  | -          |
                | s1 | Buffy      |
                | s2 | Willow     |
                """
            }
        },
        sources=sources,
    )

    modified_students_table = """
        | id | first_name | last_name |
        | -  | -          | -         |
        | s1 | Buffy      | Summers   |
        | s2 | Xander     | Harris    |
    """

    new_organizations_table = """
        | id | name           |
        | -  | -              |
        | o1 | Sunnydale High |
    """

    composite_factory = Factory(
        data={
            "students": {
                "table": deepcopy(modified_students_table)
            },
            "organizations": {
                "table": deepcopy(new_organizations_table)
            },
        },
        inherit_from=[base_factory],
        sources=sources,
    )

    expected = markdown_to_df(modified_students_table)
    actual = composite_factory.data["students"]["dataframe"]
    assert_frame_equal(actual, expected)

    expected = markdown_to_df(new_organizations_table)
    actual = composite_factory.data["organizations"]["dataframe"]
    assert_frame_equal(actual, expected)
示例#4
0
def test_incompatible_keys_raise_specific_exception(target, case):
    expected_table = """
        | id | name   |
        | -  | -      |
        | 1  | Buffy  |
        | 2  | Willow |
        | 3  | Xander |
        """

    actual_data = markdown_to_df("""
        | id  | name      |
        | -   | -         |
        | 0.0 | The First |
        | 1.0 | Buffy     |
        | 2.0 | Willow    |
        | 3.0 | Xander    |
        """)

    expectation = DataExpectation(target,
                                  expected_table,
                                  by=["id"],
                                  compare_via="keys")
    expectation.load_actual(actual_data)
    with pytest.raises(MissingExpectedKeysAssertionError):
        expectation.assert_expected(case)
示例#5
0
def test_actuals_are_loaded(api_w_actuals):
    api = api_w_actuals

    expected = markdown_to_df(
        """
        | card_id | name   | school_name | class_name        | season       |
        | -       | -      | -           | -                 | -            |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Fall 2001    | # BasicDenormalization
        | stu2    | Willow | Sunnydale   |     Good Spells   | Spring 2002  |
        | stu3    | Bill   | San Dimas   |         Station   | Fall 2002    |
        | stu4    | Ted    | San Dimas   | Being Excellent   | Fall 2002    |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Summer 2002  | # MissingClasses
        | stu2    | Willow | Sunnydale   |     Good Spells   | Summer 2002  |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Summer 2002  |
        | stu2    | Willow | Sunnydale   |     Good Spells   | Summer 2002  | # MultipleClasses
        | stu2    | Willow | Sunnydale   | Season 6 Spells   | Summer 2002  |
        | stu3    | Bill   | San Dimas   |         Station   | Summer 2002  |
        | stu4    | Ted    | San Dimas   | Being Excellent   | Summer 2002  |
        | stu4    | Ted    | San Dimas   |         Station   | Summer 2002  |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Fall 2001    | # IdConcatenation
        | stu2    | Willow | Sunnydale   |     Good Spells   | Spring 2002  |
        | stu3    | Bill   | San Dimas   |         Station   | Fall 2002    |
        | stu4    | Ted    | San Dimas   | Being Excellent   | Fall 2002    |
        """
    )

    actual = api.spec["targets"]["student_classes"].data[expected.columns]
    assert_frame_equal(actual, expected)
示例#6
0
def test_inheritance_w_new_data(sources):
    base_factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | first_name |
                | -  | -          |
                | s1 | Buffy      |
                | s2 | Willow     |
                """
            }
        },
        sources=sources,
    )

    modified_table = """
        | id | first_name | last_name |
        | -  | -          | -         |
        | s1 | Buffy      | Summers   |
        | s2 | Xander     | Harris    |
    """

    composite_factory = Factory(
        data={"students": {
            "table": deepcopy(modified_table)
        }},
        inherit_from=[base_factory],
        sources=sources,
    )

    expected = markdown_to_df(modified_table)
    actual = composite_factory.data["students"]["dataframe"]
    assert_frame_equal(actual, expected)
示例#7
0
def test_null_identifiers_go_to_the_right_case(multiple_identifier_target, stu,
                                               cases):
    """
    If an identifying column can be null, then there is no way to
    associate it with a case unless there is another non-null identifying column.
    """

    multiple_identifier_target.load_actual([
        {
            "id": stu["c1stu1"]["id"],
            "uuid": stu["c1stu1"]["uuid"],
            "first_name": "Buffy",
        },
        {
            "id": stu["c2stu2"]["id"],
            "uuid": None,
            "first_name": "Willow"
        },
    ])

    actual = multiple_identifier_target.case_data(cases[1])
    expected = markdown_to_df("""
        | id   | uuid   | first_name |
        | -    | -      | -          |
        | stu2 | {NULL} | Willow     |
        """)

    assert_frame_equal(actual, expected)
示例#8
0
def test_setting_values(identifiers, cases):
    source = Source(id_mapping={
        "id": {
            "identifier": identifiers["student"],
            "attribute": "id"
        }
    })

    source.stack(
        cases[0],
        markdown_to_df("""
        | id | first_name |
        | -  | -          |
        | s1 | Bob        |
        | s2 | Nancy      |
        """),
        values={"last_name": "Summers"},
    )

    actual = source.data
    expected = markdown_to_df("""
        | id   | first_name | last_name |
        | -    | -          | -         |
        | {s1} | Bob        | Summers   |
        | {s2} | Nancy      | Summers   |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
    ))
    assert_frame_equal(actual, expected)
示例#9
0
def test_null_in_source_translated_correctly(simple_source, identifiers,
                                             cases):
    simple_source.stack(
        cases[0],
        markdown_to_df("""
        | id     | first_name |
        | -      | -          |
        | s1     | {NULL}     |
        | {NULL} | Nancy      |
        """),
    )

    actual = simple_source.serialize()
    expected = [
        {
            "id":
            identifiers["student"].generate(case=cases[0],
                                            named_id="s1")["id"],
            "first_name":
            None,
        },
        {
            "id": None,
            "first_name": "Nancy"
        },
    ]

    assert actual == expected
示例#10
0
def test_multiple_identifers_are_translated(source_w_multiple_ids, identifiers,
                                            cases):
    source_w_multiple_ids.stack(
        cases[0],
        markdown_to_df("""
        | id | uuid | organization_id |first_name  |
        | -  | -    | -               | -          |
        | s1 | s1   | o1              | Bob        |
        | s2 | s2   | o1              | Nancy      |
        """),
    )

    actual = source_w_multiple_ids.data
    expected = markdown_to_df("""
        | id   | uuid  | organization_id | first_name |
        | -    | -     | -               | -          |
        | {s1} | {su1} | {o1}            | Bob        |
        | {s2} | {su2} | {o1}            | Nancy      |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
        su1=identifiers["student"].generate(case=cases[0],
                                            named_id="s1")["uuid"],
        su2=identifiers["student"].generate(case=cases[0],
                                            named_id="s2")["uuid"],
        o1=identifiers["organization"].generate(case=cases[0],
                                                named_id="o1")["id"],
    ))
    assert_frame_equal(actual, expected)
示例#11
0
def test_multiple_embedded_identifiers_are_translated(identifiers, cases):
    source = Source(
        id_mapping={
            "id": {
                "identifier": identifiers["student"],
                "attribute": "id"
            }
        },
        identifiers=identifiers,
    )

    source.stack(
        cases[0],
        markdown_to_df("""
        | id | prefixed_id                            | first_name |
        | -  | -                                      | -          |
        | s1 | {organization.id[o1]}-{student.id[s1]} | Bob        |
        | s2 | {organization.id[o1]}-{student.id[s2]} | Nancy      |
        """),
    )

    actual = source.data
    expected = markdown_to_df("""
        | id   | prefixed_id | first_name |
        | -    | -           | -          |
        | {s1} | {o1}-{s1}   | Bob        |
        | {s2} | {o1}-{s2}   | Nancy      |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
        o1=identifiers["organization"].generate(case=cases[0],
                                                named_id="o1")["id"],
    ))
    assert_frame_equal(actual, expected)
示例#12
0
def test_data_converts_to_json(simple_source, identifiers, cases):
    simple_source.stack(
        cases[0],
        markdown_to_df("""
        | id | first_name |
        | -  | -          |
        | s1 | Bob        |
        | s2 | Nancy      |
        """),
    )

    actual = simple_source.serialize()
    expected = [
        {
            "id":
            identifiers["student"].generate(case=cases[0],
                                            named_id="s1")["id"],
            "first_name":
            "Bob",
        },
        {
            "id":
            identifiers["student"].generate(case=cases[0],
                                            named_id="s2")["id"],
            "first_name":
            "Nancy",
        },
    ]
示例#13
0
def test_overriding_defaults(identifiers, cases):
    source = Source(
        defaults={"last_name": "Jones"},
        id_mapping={
            "id": {
                "identifier": identifiers["student"],
                "attribute": "id"
            }
        },
    )

    source.stack(
        cases[0],
        markdown_to_df("""
        | id | first_name | last_name |
        | -  | -          | -         |
        | s1 | Bob        | Not Jones |
        | s2 | Nancy      | Not Jones |
        """),
    )

    actual = source.data
    expected = markdown_to_df("""
        | id   | first_name | last_name |
        | -    | -          | -         |
        | {s1} | Bob        | Not Jones |
        | {s2} | Nancy      | Not Jones |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
    ))
    assert_frame_equal(actual, expected)
示例#14
0
def test_setting_constant_values(expected_table, target, case):
    expectation = DataExpectation(target,
                                  expected_table,
                                  values={"school_name": "Sunnydale High"})
    actual_data = markdown_to_df(expected_table)
    actual_data["school_name"] = "Sunnydale High"
    expectation.load_actual(actual_data.copy())
    expectation.assert_expected(case)
示例#15
0
    def _build_expected_data(self, table):
        try:
            expected_df = markdown_to_df(table)
        except BadMarkdownTableError as err:
            raise BadMarkdownTableError(
                f"Unable to generate data for target {self.target}:\n{err}"
            )

        self._add_constants(expected_df)
        return expected_df
示例#16
0
def test_empty_data_can_be_loaded_with_columns_specified(simple_target):
    simple_target.load_actual([], columns=["id", "first_name"])

    actual = simple_target.data.drop(columns="__dtspec_case__")
    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        """)

    assert_frame_equal(actual, expected)
示例#17
0
def test_defaults_override_identifiers(identifiers, cases):
    """
    If a column is marked as an identifier, but is given a default, then
    the default will be used (e.g., it will not revert to anonymous id generation).
    """

    source = Source(
        id_mapping={
            "id": {
                "identifier": identifiers["student"],
                "attribute": "id"
            }
        },
        defaults={"id": "stu1"},
    )

    source.stack(
        cases[0],
        markdown_to_df("""
            | first_name |
            | -          |
            | Bob        |
            | Still Bob  |
            """),
    )

    generated_id = list(identifiers["student"].cached_ids[id(
        cases[0])].named_ids.values())[0]["id"]

    actual = source.data
    expected = markdown_to_df("""
        | first_name | id   |
        | -          | -    |
        | Bob        | {s1} |
        | Still Bob  | {s1} |
        """.format(s1=generated_id))
    assert_frame_equal(actual, expected)

    generated_name_id = list(identifiers["student"].cached_ids[id(
        cases[0])].named_ids.keys())[0]
    assert generated_name_id == source.defaults["id"]
示例#18
0
def test_target_can_be_split_into_case(simple_target, simple_data, cases):
    simple_target.load_actual(simple_data)

    actual = simple_target.case_data(cases[1])
    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        | stu1 | Faith      |
        | stu2 | Willow     |
        """)

    assert_frame_equal(actual, expected)
示例#19
0
def test_sources_stack(simple_source, identifiers, cases):
    simple_source.stack(
        cases[0],
        markdown_to_df("""
        | id | first_name |
        | -  | -          |
        | s1 | Bob        |
        | s2 | Nancy      |
        """),
    )

    simple_source.stack(
        cases[1],
        markdown_to_df("""
        | id | first_name |
        | -  | -          |
        | s1 | Bobob      |
        | s2 | Nanci      |
        """),
    )

    actual = simple_source.data
    expected = markdown_to_df("""
        | id    | first_name |
        | -     | -          |
        | {s11} | Bob        |
        | {s12} | Nancy      |
        | {s21} | Bobob      |
        | {s22} | Nanci      |
        """.format(
        s11=identifiers["student"].generate(case=cases[0],
                                            named_id="s1")["id"],
        s12=identifiers["student"].generate(case=cases[0],
                                            named_id="s2")["id"],
        s21=identifiers["student"].generate(case=cases[1],
                                            named_id="s1")["id"],
        s22=identifiers["student"].generate(case=cases[1],
                                            named_id="s2")["id"],
    ))
    assert_frame_equal(actual, expected)
示例#20
0
def test_actual_data_is_loaded_ids_translated(simple_target, simple_data):
    simple_target.load_actual(simple_data)

    actual = simple_target.data.drop(columns=["__dtspec_case__"])
    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        | stu1 | Buffy      |
        | stu2 | Willow     |
        | stu1 | Faith      |
        | stu2 | Willow     |
        """)

    assert_frame_equal(actual, expected)
示例#21
0
def test_identifiers_generate_defaults(identifiers, cases):
    """
    If a column is marked as an identifier column, but is not given
    a specific named id, then "anonymous" named ids will be generated
    when the data is stacked.
    """

    source = Source(id_mapping={
        "id": {
            "identifier": identifiers["student"],
            "attribute": "id"
        }
    })

    source.stack(
        cases[0],
        markdown_to_df("""
            | first_name |
            | -          |
            | Bob        |
            | Nancy      |
            """),
    )

    anonymous_ids = [
        v["id"] for v in identifiers["student"].cached_ids[id(
            cases[0])].named_ids.values()
    ]

    actual = source.data
    expected = markdown_to_df("""
        | first_name | id   |
        | -          | -    |
        | Bob        | {s1} |
        | Nancy      | {s2} |
        """.format(s1=anonymous_ids[0], s2=anonymous_ids[1]))
    assert_frame_equal(actual, expected)
示例#22
0
def test_ignores_trailing_comments():
    given = """
        | id | name  |
        | -  | -     |
        | 1  | one   |
        | 2  | two   | # Some comment
        | 3  | three |
        """

    expected = pd.DataFrame({
        "id": ["1", "2", "3"],
        "name": ["one", "two", "three"]
    })
    actual = markdown_to_df(given)

    assert_frame_equal(actual, expected)
示例#23
0
def test_convert_table_to_df():
    given = """
        | id | name  |
        | -  | -     |
        | 1  | one   |
        | 2  | two   |
        | 3  | three |
        """

    expected = pd.DataFrame({
        "id": ["1", "2", "3"],
        "name": ["one", "two", "three"]
    })
    actual = markdown_to_df(given)

    assert_frame_equal(actual, expected)
示例#24
0
def test_honors_embedded_octothorpes():

    given = """
        | id | name  |
        | -  | -     |
        | 1  | one   |
        | 2  | #2    |
        | 3  | three |
        """

    expected = pd.DataFrame({
        "id": ["1", "2", "3"],
        "name": ["one", "#2", "three"]
    })
    actual = markdown_to_df(given)

    assert_frame_equal(actual, expected)
示例#25
0
def test_cases_assert_expectations():  # (sources, student_factory):
    table = """
        | id | name   |
        | -  | -      |
        | 1  | Buffy  |
        | 2  | Willow |
        | 3  | Xander |
    """

    actual_data = markdown_to_df(table)
    actual_data["name"].iloc[1] = "Evil Willow"

    expectation = DataExpectation(Target(), table)
    expectation.load_actual(actual_data)

    case = Case(expectations=[expectation])
    with pytest.raises(AssertionError):
        case.assert_expectations()
示例#26
0
def actual_data(expected_table):
    return markdown_to_df(expected_table)