def test_determine_join_columns():
    df_1 = pd.DataFrame(
        [], columns=["year", "source_detail1", "source_detail2", "value"])
    df_2 = pd.DataFrame(
        [],
        columns=["year", "GES", "source_detail1", "source_detail2", "value"])
    df_3 = pd.DataFrame(
        [], columns=["year", "source_detail1", "source_detail2", "value"])
    frames_inp = [df_1, df_2, df_3]
    output = QueryOutputTransformer._determine_join_columns(frames_inp)
    expected_output = set(["year"])
    assert output == expected_output

    df_1 = pd.DataFrame([],
                        columns=[
                            "year", "NAT", "GES", "source_detail1",
                            "source_detail2", "value"
                        ])
    df_2 = pd.DataFrame(
        [],
        columns=["year", "GES", "source_detail1", "source_detail2", "value"])
    frames_inp = [df_1, df_2]
    output = QueryOutputTransformer._determine_join_columns(frames_inp)
    expected_output = set(["year", "GES"])
    assert output == expected_output
def test_output_transformer():
    """ prepare test of output transformer """

    testquery = buildQuery()

    query_result = runQuery(testquery)
    """ start test of output transformer """
    qOutTrans = QueryOutputTransformer(query_result)

    # test whether input data arrive in correct format
    assert type(query_result) == dict, "input data not dict type"

    data_transformed = qOutTrans.transform()

    # test whether transformed output data is a dataframe
    assert (type(data_transformed) == pandas.DataFrame
            ), "transformed data is not a dataframe"

    assert "id" in data_transformed.columns, "no id colum"
    assert "name" in data_transformed.columns, "no name colum"
    assert "year" in data_transformed.columns, "no year colum"

    # columns of outdata should not contain json format
    lenlist = len(data_transformed.columns)
    checklist = ["." in data_transformed.columns[x] for x in range(lenlist)]
    assert True not in checklist, "hierarchy not properly transformed"
def test_dumplicate_removal_multi(query_duplicates_for_states_multi_stat):
    qOutTrans = QueryOutputTransformer(query_duplicates_for_states_multi_stat)
    data_transformed = qOutTrans.transform(remove_duplicates=False)
    assert all(data_transformed.name.value_counts() == 4)

    data_transformed = qOutTrans.transform(remove_duplicates=True)
    assert all(data_transformed.name.value_counts() == 1)
def test_output_transformer_with_one_statistic_and_units(
        query_results_one_statistic_with_units):
    """check if units were added correctly"""
    qOutTrans = QueryOutputTransformer(query_results_one_statistic_with_units)
    data_transformed = qOutTrans.transform(add_units=True)

    assert data_transformed.loc[0, "TIE003_unit"] == "Anzahl"
    assert data_transformed.columns[5] == "TIE003_unit"
Пример #5
0
def test_build_execute_transform_integration_all_regions(query_all_regions):
    """
    Smoke test covering all_regions
    """
    q_exec = QueryExecutioner()

    res = q_exec.run_query(query_all_regions)

    output_transf = QueryOutputTransformer(res)
    output_transf.transform()
def test_output_transformer_auto_join_enum(
        query_result_with_autojoin_and_one_enum):
    qOutTrans = QueryOutputTransformer(query_result_with_autojoin_and_one_enum)
    data_transformed = qOutTrans.transform(verbose_enum_values=False)
    assert "BEVSTD_GES" in data_transformed
    assert data_transformed.columns.get_loc("BEVSTD_GES") <= 8
    assert list(data_transformed.BEVSTD_GES.unique()) == [None]

    data_transformed = qOutTrans.transform(verbose_enum_values=True)
    assert "BEVSTD_GES" in data_transformed
    assert list(data_transformed.BEVSTD_GES.unique()) == ["Gesamt"]
def test_output_transformer_format_options_multi_enum(
        query_results_with_mult_enum):
    qOutTrans = QueryOutputTransformer(query_results_with_mult_enum)
    data_transformed = qOutTrans.transform(verbose_enum_values=False)
    assert data_transformed["ADVNW2"].iloc[0] == "ADVTN420"
    assert data_transformed["ADVNW1"].iloc[0] is None

    data_transformed = qOutTrans.transform(verbose_enum_values=True)
    print(data_transformed.head())
    assert data_transformed["ADVNW2"].iloc[0] == "Grünanlage"
    assert data_transformed["ADVNW1"].iloc[0] == "Gesamt"
Пример #8
0
def test_build_execute_transform_integration_multi_region(query_multi_regions):
    """
    Smoke test covering multiple regions in
    region query.
    """

    q_exec = QueryExecutioner()

    res = q_exec.run_query(query_multi_regions)

    output_transf = QueryOutputTransformer(res)
    output_transf.transform()
Пример #9
0
    def results(self,
                verbose_statistics: bool = False,
                verbose_enums: bool = False) -> DataFrame:
        """Runs the query and returns a Pandas DataFrame with the results.
           It also fills the instance variable result_meta_data with meta
           data specific to the query instance.

        Arguments:
            verbose_statistics -- Toggles whether statistic column names
            displayed with their short description in the result data frame
            verbose_enums -- Toggles whether enum values are displayed
            with their short description in the result data frame

        :raises RuntimeError: If the query fails raise RuntimeError.
        :return: A DataFrame with the queried data.
        :rtype: DataFrame
        """

        result = QueryExecutioner(statistics_meta_data_provider=self.
                                  _stat_meta_data_provider).run_query(self)
        if result:
            # It is currently assumed that all graphql queries
            # that are generated internally for the Query instance
            # at hand yield the same meta data.
            self.result_meta_data = result[0].meta_data
            return QueryOutputTransformer(result).transform(
                verbose_statistic_names=verbose_statistics,
                verbose_enum_values=verbose_enums,
            )
        else:
            raise RuntimeError("No results could be returned for this Query.")
Пример #10
0
def test_determine_column_order():
    input_columns = ["source_A", "source_B", "stat_A_value", "stat_B_value", "year"]
    input_frame = pd.DataFrame([], columns=input_columns)
    join_columns = set(["year"])

    output = QueryOutputTransformer._determine_column_order(input_frame, join_columns)
    expected_output = ["year", "stat_A_value", "stat_B_value", "source_A", "source_B"]

    assert output == expected_output
Пример #11
0
def test_prefix_frame_columns():
    cols = ["year", "stat_value", "source"]
    df = pd.DataFrame([], columns=cols)
    output = list(
        QueryOutputTransformer._prefix_frame_cols(df,
                                                  prefix="A",
                                                  exceptions=["year"]).columns)
    expected_output = ["year", "A_stat_value", "A_source"]

    assert output == expected_output
Пример #12
0
def test_get_general_fields():
    meta_dict = {"stat_1": "stat_1 description", "stat_2": "stat_2 description"}
    region_json = {
        "id": "11",
        "stat_1": [],
        "name": "Berlin",
        "stat_2": [{"year": 2000, "value": 1}, {"year": 2001, "value": 2}],
    }
    output = QueryOutputTransformer._get_general_fields(region_json, meta_dict)
    expected_output = ["id", "name"]
    assert output == expected_output
Пример #13
0
    def results(
        self,
        verbose_statistics: bool = False,
        verbose_enums: bool = False,
        add_units: bool = False,
        remove_duplicates: bool = True,
    ) -> DataFrame:
        """Runs the query and returns a Pandas DataFrame with the results.
           It also fills the instance variable result_meta_data with meta
           data specific to the query instance.

        :param verbose_statistics: Toggles whether statistic column names
            displayed with their short description in the result data frame
        :param verbose_enums: Toggles whether enum values are displayed
            with their short description in the result data frame
        :param add_units: Adds units available in the metadata to the
            result dataframe. Care should be taken, because not every
            statistic specifies these corretly. When in doubt one should
            refer to the statistic description.
        :param remove_duplicates: Removes duplicates from query results, i.e. if the
            exact same number has been reported for the same statistic, year, region
            etc. from the same source it gets removed. Such duplications are sometimes
            caused on the API side and this is convenience functionality to remove them.
            The removal happens before potentially joining several different statistics.
            Unless diagnosing the API the default (True) is generally in the users
            interest.

        :raises RuntimeError: If the query fails raise RuntimeError.
        :return: A DataFrame with the queried data.
        :rtype: DataFrame
        """
        if not self._contains_statistic_field():
            raise Exception(
                "No statistic field is defined in query, please add statistic field "
                "via method add_field.")

        result = QueryExecutioner(statistics_meta_data_provider=self.
                                  _stat_meta_data_provider).run_query(self)
        if result:
            # It is currently assumed that all graphql queries
            # that are generated internally for the Query instance
            # at hand yield the same meta data.
            if self._query_result_contains_undefined_region(result):
                raise ValueError("Queried region is invalid.")
            self.result_meta_data = result[0].meta_data
            return QueryOutputTransformer(result).transform(
                verbose_statistic_names=verbose_statistics,
                verbose_enum_values=verbose_enums,
                add_units=add_units,
                remove_duplicates=remove_duplicates,
            )
        else:
            raise RuntimeError("No results could be returned for this Query.")
def test_output_transformer_defaults(query_result):
    """ start test of output transformer """
    qOutTrans = QueryOutputTransformer(query_result)

    data_transformed = qOutTrans.transform()

    # test whether transformed output data is a dataframe
    assert type(data_transformed
                ) == pd.DataFrame, "transformed data is not a dataframe"

    assert "id" in data_transformed.columns, "no id colum"
    assert "name" in data_transformed.columns, "no name colum"
    assert "year" in data_transformed.columns, "no year colum"
    assert "BEVMK3" in data_transformed.columns, "statistic values are missing"
    assert (
        "BEVMK3_value"
        not in data_transformed.columns), "old statistics name still present"

    # columns of outdata should not contain json format
    lenlist = len(data_transformed.columns)
    checklist = ["." in data_transformed.columns[x] for x in range(lenlist)]
    assert not any(checklist), "hierarchy not properly transformed"
def test_output_transformer_with_multiple_statistics_and_units(
        query_results_multiple_statistics_with_units):
    """check if units were added correctly"""
    qOutTrans = QueryOutputTransformer(
        query_results_multiple_statistics_with_units)
    data_transformed = qOutTrans.transform(add_units=True)

    assert data_transformed.iloc[1, range(4, 15, 2)].to_list() == [
        "Prozent",
        "Prozent",
        "Prozent",
        "Prozent",
        "Prozent",
        "kg",
    ]
    assert data_transformed.columns[range(4, 15, 2)].to_list() == [
        "AI0203_unit",
        "AI0204_unit",
        "AI0205_unit",
        "AI0206_unit",
        "AI0207_unit",
        "AI1902_unit",
    ]
Пример #16
0
    def results(self) -> DataFrame:
        """Runs the query and returns a Pandas DataFrame with the results.

        Raises:
            RuntimeError: If the Query did not return any results.
            E.g. if the Query was ill-formed.

        Returns:
            DataFrame --
            A DataFrame with the queried data.
            If the query fails raise RuntimeError.
        """
        result = QueryExecutioner().run_query(self)
        if result:
            # TODO: adapt QueryOutputTransformer to process list of results
            return QueryOutputTransformer(result[0].query_results[0]).transform()
        else:
            raise RuntimeError("No results could be returned for this Query.")
def test_output_transformer_format_options(query_result,
                                           query_results_with_enum):

    qOutTrans = QueryOutputTransformer(query_result)
    data_transformed = qOutTrans.transform(verbose_statistic_names=True)
    assert (
        # "Von der Scheidung betroffene Kinder (BEVMK3)" in data_transformed.columns
        "BEVMK3 (BEVMK3)" in
        data_transformed.columns), "statistic values are missing"

    enum_values = {
        "AFD",
        "B90_GRUENE",
        "CDU",
        "DIELINKE",
        "FDP",
        "SONSTIGE",
        "SPD",
        "GESAMT",
        None,
    }
    enum_descriptions = {
        "AfD",
        "GRÜNE",
        "CDU/CSU",
        "DIE LINKE",
        "FDP",
        "Sonstige Parteien",
        "SPD",
        "Gesamt",
    }

    qOutTrans = QueryOutputTransformer(query_results_with_enum)
    data_transformed = qOutTrans.transform()
    assert set(data_transformed["PART04"]).issubset(enum_values)

    qOutTrans = QueryOutputTransformer(query_results_with_enum)
    data_transformed = qOutTrans.transform(verbose_enum_values=True)
    assert set(data_transformed["PART04"]).issubset(enum_descriptions)

    qOutTrans = QueryOutputTransformer(query_results_with_enum)
    data_transformed = qOutTrans.transform(verbose_enum_values=True,
                                           verbose_statistic_names=True)
    # assert "Gültige Zweitstimmen (WAHL09)" in data_transformed
    assert "WAHL09 (WAHL09)" in data_transformed