Пример #1
0
 def _get_date_time_code_list_for_dimension(
         self, column_title: str,
         new_dimension: NewQbDimension) -> CompositeQbCodeList:
     csvw_safe_column_title = csvw_column_name_safe(column_title)
     assert isinstance(new_dimension.code_list, NewQbCodeList)
     return CompositeQbCodeList(
         CatalogMetadata(new_dimension.label),
         [
             DuplicatedQbConcept(
                 existing_concept_uri=uritemplate.expand(
                     self.value,
                     {csvw_safe_column_title: c.label},
                 ),
                 label=c.label,
                 code=c.code,
             ) for c in new_dimension.code_list.concepts
         ],
     )
Пример #2
0
def _get_code_list(
    column_label: str,
    maybe_code_list: Optional[Union[bool, str]],
    info_json_parent_dir: Path,
    maybe_parent_uri: Optional[str],
    column_data: PandasDataTypes,
    maybe_property_value_url: Optional[str],
) -> Optional[QbCodeList]:
    is_date_time_code_list = (
        (maybe_code_list is None or
         (isinstance(maybe_code_list, bool) and maybe_code_list))
        and maybe_parent_uri
        == "http://purl.org/linked-data/sdmx/2009/dimension#refPeriod"
        and maybe_property_value_url is not None and
        maybe_property_value_url.startswith("http://reference.data.gov.uk/id/")
    )

    if is_date_time_code_list:
        column_name_csvw_safe = csvw_column_name_safe(column_label)
        columnar_data = pandas_input_to_columnar_str(column_data)
        concepts = [
            DuplicatedQbConcept(
                existing_concept_uri=uritemplate.expand(
                    maybe_property_value_url, {column_name_csvw_safe: c}),
                label=c,
            ) for c in sorted(set(columnar_data))
        ]
        return CompositeQbCodeList(
            CatalogMetadata(column_label),
            concepts,
        )
    elif maybe_code_list is not None:
        if isinstance(maybe_code_list, str):
            return ExistingQbCodeList(maybe_code_list)
        elif isinstance(maybe_code_list, bool) and not maybe_code_list:
            return None
        else:
            raise Exception(f"Unexpected codelist value '{maybe_code_list}'")

    return NewQbCodeListInCsvW(info_json_parent_dir / "codelists" /
                               f"{uri_safe(column_label)}.csv-metadata.json")
Пример #3
0
 def map_to_multi_measure_dimension(
         self, column_title: str,
         data: PandasDataTypes) -> QbMultiMeasureDimension:
     csvw_column_name = csvw_column_name_safe(column_title)
     return QbMultiMeasureDimension.existing_measures_from_data(
         data, csvw_column_name, self.value)
Пример #4
0
 def map_to_qb_multi_units(self, data: PandasDataTypes,
                           column_title: str) -> QbMultiUnits:
     return QbMultiUnits.existing_units_from_data(
         data, csvw_column_name_safe(column_title), self.value)
Пример #5
0
def _get_column_for_metadata_config(
    column_name: str,
    col_config: Optional[Union[dict, bool]],
    column_data: PandasDataTypes,
    info_json_parent_dir: Path,
) -> CsvColumn:
    if isinstance(col_config, dict):
        if col_config.get("type") is not None:
            return v1point1.map_column_to_qb_component(column_name, col_config,
                                                       column_data,
                                                       info_json_parent_dir)
        csv_safe_column_name = csvw_column_name_safe(column_name)

        maybe_dimension_uri = col_config.get("dimension")
        maybe_property_value_url = col_config.get("value")
        maybe_parent_uri = col_config.get("parent")
        maybe_description = col_config.get("description")
        maybe_label = col_config.get("label")
        maybe_attribute_uri = col_config.get("attribute")
        maybe_unit_uri = col_config.get("unit")
        maybe_measure_uri = col_config.get("measure")
        maybe_data_type = col_config.get("datatype")

        if maybe_dimension_uri is not None and maybe_property_value_url is not None:
            if maybe_dimension_uri == "http://purl.org/linked-data/cube#measureType":
                # multi-measure cube
                defined_measure_types: List[str] = col_config.get("types", [])
                if maybe_property_value_url is not None:
                    defined_measure_types = [
                        uritemplate.expand(maybe_property_value_url,
                                           {csv_safe_column_name: d})
                        for d in defined_measure_types
                    ]

                if len(defined_measure_types) == 0:
                    raise Exception(
                        f"Property 'types' was not defined in measure types column '{column_name}'."
                    )

                measures = QbMultiMeasureDimension(
                    [ExistingQbMeasure(t) for t in defined_measure_types])
                return QbColumn(column_name, measures,
                                maybe_property_value_url)
            else:
                return QbColumn(
                    column_name,
                    ExistingQbDimension(maybe_dimension_uri),
                    maybe_property_value_url,
                )
        elif (maybe_parent_uri is not None or maybe_description is not None
              or maybe_label is not None):
            label: str = column_name if maybe_label is None else maybe_label
            code_list = _get_code_list(
                label,
                col_config.get("codelist"),
                info_json_parent_dir,
                maybe_parent_uri,
                column_data,
                maybe_property_value_url,
            )
            new_dimension = NewQbDimension(
                label,
                description=maybe_description,
                parent_dimension_uri=maybe_parent_uri,
                source_uri=col_config.get("source"),
                code_list=code_list,
            )
            csv_column_value_url_template = (None if isinstance(
                code_list, CompositeQbCodeList) else maybe_property_value_url)
            return QbColumn(
                column_name,
                new_dimension,
                csv_column_value_url_template,
            )
        elif maybe_attribute_uri is not None and maybe_property_value_url is not None:
            if (maybe_attribute_uri ==
                    "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure"
                ):
                distinct_unit_uris = [
                    uritemplate.expand(maybe_property_value_url,
                                       {csv_safe_column_name: u})
                    for u in set(pandas_input_to_columnar_str(column_data))
                ]
                dsd_component = QbMultiUnits(
                    [ExistingQbUnit(u) for u in distinct_unit_uris])
            else:
                dsd_component = ExistingQbAttribute(maybe_attribute_uri)

            return QbColumn(column_name, dsd_component,
                            maybe_property_value_url)
        elif maybe_unit_uri is not None and maybe_measure_uri is not None:
            measure_component = ExistingQbMeasure(maybe_measure_uri)
            unit_component = ExistingQbUnit(maybe_unit_uri)
            observation_value = QbSingleMeasureObservationValue(
                measure=measure_component,
                unit=unit_component,
                data_type=maybe_data_type or "decimal",
            )
            return QbColumn(column_name, observation_value)
        elif maybe_data_type is not None:
            return QbColumn(column_name,
                            QbMultiMeasureObservationValue(maybe_data_type))
        else:
            raise Exception(f"Unmatched column definition: {col_config}")
    elif isinstance(col_config, bool) and col_config:
        return SuppressedCsvColumn(column_name)
    else:
        # If not a known/expected type/value (or is a string), treat it as a dimension.
        maybe_description: Optional[str] = None
        if isinstance(col_config, str):
            maybe_description = col_config

        new_dimension = NewQbDimension.from_data(column_name,
                                                 column_data,
                                                 description=maybe_description)
        return QbColumn(column_name, new_dimension)