def CheckSlice(warnings, slice, dsid): _CheckIdPresent(warnings, 'Slice', slice) slice_id = GetSchemaId(slice) _CheckType(warnings, 'Slice', slice, ['DataSlice']) _CheckUrlPresent(warnings, 'Slice', slice, 'dataset', 'required for id ' + slice_id, dsid) _CheckPropertyPresent(warnings, 'Slice', slice, 'dimension', 'required') dims = AsList(GetSchemaProp(slice, 'dimension')) for dim in dims: url = GetUrl(dim) if url is None: warnings.append( f'Slice property "dimension" values must have URLs for {slice_id}' ) _CheckPropertyPresent(warnings, 'Slice', slice, 'measure', 'required') measures = AsList(GetSchemaProp(slice, 'measure')) for measure in measures: url = GetUrl(measure) if url is None: warnings.append( f'Slice property "measure" values must have URLs for {slice_id}' ) _CheckPropertyPresent(warnings, 'Slice', slice, 'data', 'required') data = GetSchemaProp(slice, 'data') if not isinstance(data, str): if isinstance(data, dict): CheckSliceData(warnings, data, slice_id) elif isinstance(data, list): for datum in data: CheckSliceData(warnings, datum, slice_id)
def _ExpandSliceData(self, slice, dim_defs_by_id): data = [] with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f: reader = DictReader(f) for row in reader: val = {} val['@type'] = 'Observation' val['slice'] = GetSchemaId(slice) val['dimensionValues'] = [] val['measureValues'] = [] for dim in AsList(GetSchemaProp(slice, 'dimension')): dim = GetUrl(dim) fragment = urlparse(dim).fragment dim_val = { '@type': 'DimensionValue', 'dimension': dim, } dim_def = dim_defs_by_id.get(dim) if dim_def: if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension': dim_val['codeValue'] = row[fragment] elif GetSchemaProp(dim_def, '@type') == 'TimeDimension': if GetSchemaProp(dim_def, 'equivalentType'): dim_val['value'] = { '@type': GetSchemaProp(dim_def, 'equivalentType'), '@value': row[fragment] } else: val['dimensionValues'][-1]['value'] = row[fragment] val['dimensionValues'].append(dim_val) for measure in AsList(GetSchemaProp(slice, 'measure')): measure = GetUrl(measure) fragment = urlparse(measure).fragment val['measureValues'].append({ '@type': 'MeasureValue', 'measure': measure, 'value': row[fragment] }) if row.get(fragment + '*'): val['measureValues'][-1]['footnote'] = [ { '@type': 'StatisticalAnnotation', 'codeValue': footnote } for footnote in row[fragment + '*'].split(';') ] data.append(val) return data
def _CheckUrlPresent(warnings, name, obj, prop, category, expected=None): val = GetUrl(GetSchemaProp(obj, prop)) if val is None: warnings.append(f'{name} property "{prop}" is {category}') elif expected and val != expected: warnings.append( f'{name} property "{prop}" has value "{val}" but expected "{expected}"' )
def _ExpandCodeList(self, dim): """Load a code list from CSV and return a list of JSON-LD objects.""" codeList = [] dimProps = [] tableMappings = {} for dimProp in AsList(GetSchemaProp(dim, 'dimensionProperty')): dimProps.append(dimProp) for tableMapping in AsList(GetSchemaProp(dim, 'tableMapping')): tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f: reader = DictReader(f) for row in reader: entry = {k: v for k, v in row.items()} if GetSchemaProp(dim, 'equivalentType'): entry['@type'] = ['DimensionValue'] entry['@type'] += AsList( GetSchemaProp(dim, 'equivalentType')) else: entry['@type'] = 'DimensionValue' entry['@id'] = GetSchemaId(dim) + '=' entry['@id'] += row['codeValue'] entry['dimension'] = GetSchemaId(dim) for dimProp in dimProps: propId = GetSchemaProp(dimProp, 'propertyID') value = dimProp.get('value') if propId: if value: entry[dimProp['propertyID']] = value continue columnId = propId dimPropId = GetSchemaId(dimProp) if dimPropId: tableMapping = tableMappings.get(dimPropId) if tableMapping and 'columnIdentifier' in tableMapping: columnId = tableMapping.get('columnIdentifier') else: columnId = propId for field in row: if field == columnId: if columnId != propId: entry[propId] = entry[columnId] del entry[columnId] elif field.startswith(columnId + '.'): entry[columnId] = entry.get( columnId, {'@type': dimProp['propertyType']}) if isinstance(entry[columnId], str): entry[columnId] = { '@type': dimProp['propertyType'], 'name': row['columnId'] } entry[columnId][field[len(columnId) + 1:]] = entry[field] del entry[field] codeList.append(entry) return codeList
def test_GetUrl(self): self.assertEqual(GetUrl({'@id': 'val'}), 'val') self.assertEqual(GetUrl('val'), 'val')
def _ExpandSliceData(self, slice, dim_defs_by_id, meas_defs_by_id): data = [] tableMappings = {} for tableMapping in AsList(GetSchemaProp(slice, 'tableMapping')): tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f: reader = DictReader(f) for row in reader: val = {} val['@type'] = 'Observation' val['slice'] = GetSchemaId(slice) val['dimensionValue'] = [] val['measureValue'] = [] for dim in AsList(GetSchemaProp(slice, 'dimension')): dim = GetUrl(dim) dim_def = dim_defs_by_id.get(dim) if dim_def is None: raise RuntimeError( "Unable to find definition for dimension " + dim) tableMapping = tableMappings.get(dim) if tableMapping: col_id = tableMapping['columnIdentifier'] else: col_id = urlparse(dim).fragment dim_val = { '@type': 'DimensionValue', 'dimension': dim, } if dim_def: if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension': dim_val['codeValue'] = row[col_id] elif GetSchemaProp(dim_def, '@type') == 'TimeDimension': if GetSchemaProp(dim_def, 'equivalentType'): dim_val['value'] = { '@type': GetSchemaProp(dim_def, 'equivalentType'), '@value': row[col_id] } else: dim_val['value'] = row[col_id] val['dimensionValue'].append(dim_val) for measure in AsList(GetSchemaProp(slice, 'measure')): measure = GetUrl(measure) meas_def = meas_defs_by_id.get(measure) tableMapping = tableMappings.get(measure) if tableMapping: col_id = tableMapping['columnIdentifier'] else: col_id = urlparse(measure).fragment val['measureValue'].append({ '@type': 'MeasureValue', 'measure': measure, 'value': row[col_id] }) if row.get(col_id + '*'): val['measureValue'][-1]['footnote'] = [{ '@type': 'StatisticalAnnotation', 'codeValue': footnote } for footnote in row[col_id + '*'].split(';')] data.append(val) return data