def _ExpandCodeList(self, dim): """Load a code list from CSV and return a list of JSON-LD objects.""" codeList = [] dimProps = [] tableMappings = {} for dimProp in AsList(GetSchemaProp(dim, 'dimensionProperty')): dimProps.append(dimProp) for tableMapping in AsList(GetSchemaProp(dim, 'tableMapping')): tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f: reader = DictReader(f) for row in reader: entry = {k: v for k, v in row.items()} if GetSchemaProp(dim, 'equivalentType'): entry['@type'] = ['DimensionValue'] entry['@type'] += AsList( GetSchemaProp(dim, 'equivalentType')) else: entry['@type'] = 'DimensionValue' entry['@id'] = GetSchemaId(dim) + '=' entry['@id'] += row['codeValue'] entry['dimension'] = GetSchemaId(dim) for dimProp in dimProps: propId = GetSchemaProp(dimProp, 'propertyID') value = dimProp.get('value') if propId: if value: entry[dimProp['propertyID']] = value continue columnId = propId dimPropId = GetSchemaId(dimProp) if dimPropId: tableMapping = tableMappings.get(dimPropId) if tableMapping and 'columnIdentifier' in tableMapping: columnId = tableMapping.get('columnIdentifier') else: columnId = propId for field in row: if field == columnId: if columnId != propId: entry[propId] = entry[columnId] del entry[columnId] elif field.startswith(columnId + '.'): entry[columnId] = entry.get( columnId, {'@type': dimProp['propertyType']}) if isinstance(entry[columnId], str): entry[columnId] = { '@type': dimProp['propertyType'], 'name': row['columnId'] } entry[columnId][field[len(columnId) + 1:]] = entry[field] del entry[field] codeList.append(entry) return codeList
def _ExpandFootnotes(self, filename, json_val): """Load footnotes from CSV and return a list of JSON-LD objects.""" footnotes = [] with self.getter.Fetch(filename) as f: reader = DictReader(f) for row in reader: row['@type'] = 'StatisticalAnnotation' row['@id'] = GetSchemaId(json_val) + '#footnote=' row['@id'] += row['codeValue'] row['dataset'] = GetSchemaId(json_val) footnotes.append(row) return footnotes
def CheckDimension(warnings, dim, dsid): _CheckIdPresent(warnings, 'Dimension', dim) _CheckType(warnings, 'Dimension', dim, ['TimeDimension', 'CategoricalDimension']) _CheckUrlPresent(warnings, 'Dimension', dim, 'dataset', 'required for id ' + GetSchemaId(dim), dsid) type = GetSchemaType(dim) if type == 'TimeDimension': _CheckPropertyPresent(warnings, 'Dimension', dim, 'dateFormat', 'required for id ' + GetSchemaId(dim)) elif type == 'CategoricalDimension': _CheckPropertyPresent(warnings, 'Dimension', dim, 'codeList', 'required for id ' + GetSchemaId(dim))
def CheckStatisticalDataset(warnings, dataset): _CheckType(warnings, 'StatisticalDataset', dataset, ['StatisticalDataset']) _CheckIdPresent(warnings, 'StatisticalDataset', dataset) _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'dimension', 'required') for dim in AsList(GetSchemaProp(dataset, 'dimension')): CheckDimension(warnings, dim, GetSchemaId(dataset)) _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'measure', 'required') for measure in AsList(GetSchemaProp(dataset, 'measure')): CheckMeasure(warnings, measure, GetSchemaId(dataset)) _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'slice', 'required') for slice in AsList(GetSchemaProp(dataset, 'slice')): CheckSlice(warnings, slice, GetSchemaId(dataset))
def _ExpandCodeList(self, dim): """Load a code list from CSV and return a list of JSON-LD objects.""" codeList = [] with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f: reader = DictReader(f) for row in reader: if GetSchemaProp(dim, 'equivalentType'): row['@type'] = ['DimensionValue', GetSchemaProp(dim, 'equivalentType')] else: row['@type'] = 'DimensionValue' row['@id'] = GetSchemaId(dim) + '=' row['@id'] += row['codeValue'] row['dimension'] = GetSchemaId(dim) codeList.append(row) return codeList
def CheckMeasure(warnings, measure, dsid): _CheckIdPresent(warnings, 'Measure', measure) _CheckType(warnings, 'Measure', measure, ['StatisticalMeasure']) _CheckUrlPresent(warnings, 'Measure', measure, 'dataset', 'required for id ' + GetSchemaId(measure), dsid) _CheckAnyPropertyPresent(warnings, 'Measure', measure, ['unitType', 'unitText'], 'recommended')
def CheckSlice(warnings, slice, dsid): _CheckIdPresent(warnings, 'Slice', slice) slice_id = GetSchemaId(slice) _CheckType(warnings, 'Slice', slice, ['DataSlice']) _CheckUrlPresent(warnings, 'Slice', slice, 'dataset', 'required for id ' + slice_id, dsid) _CheckPropertyPresent(warnings, 'Slice', slice, 'dimension', 'required') dims = AsList(GetSchemaProp(slice, 'dimension')) for dim in dims: url = GetUrl(dim) if url is None: warnings.append( f'Slice property "dimension" values must have URLs for {slice_id}' ) _CheckPropertyPresent(warnings, 'Slice', slice, 'measure', 'required') measures = AsList(GetSchemaProp(slice, 'measure')) for measure in measures: url = GetUrl(measure) if url is None: warnings.append( f'Slice property "measure" values must have URLs for {slice_id}' ) _CheckPropertyPresent(warnings, 'Slice', slice, 'data', 'required') data = GetSchemaProp(slice, 'data') if not isinstance(data, str): if isinstance(data, dict): CheckSliceData(warnings, data, slice_id) elif isinstance(data, list): for datum in data: CheckSliceData(warnings, datum, slice_id)
def _ExpandSliceData(self, slice, dim_defs_by_id): data = [] with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f: reader = DictReader(f) for row in reader: val = {} val['@type'] = 'Observation' val['slice'] = GetSchemaId(slice) val['dimensionValues'] = [] val['measureValues'] = [] for dim in AsList(GetSchemaProp(slice, 'dimension')): dim = GetUrl(dim) fragment = urlparse(dim).fragment dim_val = { '@type': 'DimensionValue', 'dimension': dim, } dim_def = dim_defs_by_id.get(dim) if dim_def: if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension': dim_val['codeValue'] = row[fragment] elif GetSchemaProp(dim_def, '@type') == 'TimeDimension': if GetSchemaProp(dim_def, 'equivalentType'): dim_val['value'] = { '@type': GetSchemaProp(dim_def, 'equivalentType'), '@value': row[fragment] } else: val['dimensionValues'][-1]['value'] = row[fragment] val['dimensionValues'].append(dim_val) for measure in AsList(GetSchemaProp(slice, 'measure')): measure = GetUrl(measure) fragment = urlparse(measure).fragment val['measureValues'].append({ '@type': 'MeasureValue', 'measure': measure, 'value': row[fragment] }) if row.get(fragment + '*'): val['measureValues'][-1]['footnote'] = [ { '@type': 'StatisticalAnnotation', 'codeValue': footnote } for footnote in row[fragment + '*'].split(';') ] data.append(val) return data
def _CheckIdPresent(warnings, name, obj): if GetSchemaId(obj) is None: warnings.append(f'{name} has no "@id"')
def test_GetSchemaId(self): self.assertEqual(GetSchemaId({'@id': 'val'}), 'val') self.assertEqual(GetSchemaId({'id': 'val'}), 'val') self.assertEqual(GetSchemaId({'schema:id': 'val'}), 'val')
def _ExpandSliceData(self, slice, dim_defs_by_id, meas_defs_by_id): data = [] tableMappings = {} for tableMapping in AsList(GetSchemaProp(slice, 'tableMapping')): tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f: reader = DictReader(f) for row in reader: val = {} val['@type'] = 'Observation' val['slice'] = GetSchemaId(slice) val['dimensionValue'] = [] val['measureValue'] = [] for dim in AsList(GetSchemaProp(slice, 'dimension')): dim = GetUrl(dim) dim_def = dim_defs_by_id.get(dim) if dim_def is None: raise RuntimeError( "Unable to find definition for dimension " + dim) tableMapping = tableMappings.get(dim) if tableMapping: col_id = tableMapping['columnIdentifier'] else: col_id = urlparse(dim).fragment dim_val = { '@type': 'DimensionValue', 'dimension': dim, } if dim_def: if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension': dim_val['codeValue'] = row[col_id] elif GetSchemaProp(dim_def, '@type') == 'TimeDimension': if GetSchemaProp(dim_def, 'equivalentType'): dim_val['value'] = { '@type': GetSchemaProp(dim_def, 'equivalentType'), '@value': row[col_id] } else: dim_val['value'] = row[col_id] val['dimensionValue'].append(dim_val) for measure in AsList(GetSchemaProp(slice, 'measure')): measure = GetUrl(measure) meas_def = meas_defs_by_id.get(measure) tableMapping = tableMappings.get(measure) if tableMapping: col_id = tableMapping['columnIdentifier'] else: col_id = urlparse(measure).fragment val['measureValue'].append({ '@type': 'MeasureValue', 'measure': measure, 'value': row[col_id] }) if row.get(col_id + '*'): val['measureValue'][-1]['footnote'] = [{ '@type': 'StatisticalAnnotation', 'codeValue': footnote } for footnote in row[col_id + '*'].split(';')] data.append(val) return data