def _read_stat_var(): """Read all the statistical variables""" sv_dcid = dc.get_sv_dcids() """ example of triples for one statsitical variable ('dc/014es05x0d5l', 'measurementMethod', 'CensusACS5yrSurvey') ('dc/014es05x0d5l', 'constraintProperties', 'income') ('dc/014es05x0d5l', 'income', 'USDollar75000Onwards') ('dc/014es05x0d5l', 'age', 'Years15Onwards') ('dc/014es05x0d5l', 'statType', 'measuredValue') ('dc/014es05x0d5l', 'placeOfBirth', 'BornInStateOfResidence') ('dc/014es05x0d5l', 'measuredProperty', 'count') ('dc/014es05x0d5l', 'incomeStatus', 'WithIncome') ('dc/014es05x0d5l', 'constraintProperties', 'placeOfBirth') ('dc/014es05x0d5l', 'typeOf', 'StatisticalVariable') ('dc/014es05x0d5l', 'populationType', 'Person') ('dc/014es05x0d5l', 'provenance', 'dc/cweckx1') ('dc/014es05x0d5l', 'constraintProperties', 'incomeStatus') ('dc/014es05x0d5l', 'constraintProperties', 'age') """ sv_triples = dc.get_triples(sv_dcid) stat_vars = collections.defaultdict(list) for dcid, triples in sv_triples.items(): constraint_properties = [] sv_dict = collections.defaultdict(str) for dcid_, prop, val in triples: if dcid_ != dcid: # triples include measurementDenomator info of other statsvars # eg. we will get "dc/gywfwwmg5gsrg, measurementDenominator, Count_Person" # in triples of "Count_Peron" continue if prop == "constraintProperties": constraint_properties.append(val) else: sv_dict[prop] = val prop_val = {} for property in constraint_properties: if property not in sv_dict: raise Exception( 'constraint property:{} not found in statistical' 'variable with dcid: {}'.format(property, dcid)) prop_val[property] = sv_dict[property] if "measurementDenominator" in sv_dict: prop_val["md"] = sv_dict["measurementDenominator"] se = {} # Super enum if 'crimeType' in prop_val: v = prop_val.get('crimeType', '') if v in [ 'AggravatedAssault', 'ForcibleRape', 'Robbery', 'MurderAndNonNegligentManslaughter' ]: se = {'crimeType': 'ViolentCrime'} elif v in ['MotorVehicleTheft', 'LarcenyTheft', 'Burglary']: se = {'crimeType': 'PropertyCrime'} sv = StatVar(sv_dict["populationType"], sv_dict["measuredProperty"], sv_dict["statType"], prop_val, dcid, se) stat_vars[sv.key].append(sv) stat_vars = removeDuplicateStatsVar(stat_vars) return stat_vars
def _read_stat_var(): """Read all the statistical variables""" sv_dcid = dc.get_sv_dcids() """ example of triples for one statsitical variable ('dc/014es05x0d5l', 'measurementMethod', 'CensusACS5yrSurvey') ('dc/014es05x0d5l', 'constraintProperties', 'income') ('dc/014es05x0d5l', 'income', 'USDollar75000Onwards') ('dc/014es05x0d5l', 'age', 'Years15Onwards') ('dc/014es05x0d5l', 'statType', 'measuredValue') ('dc/014es05x0d5l', 'placeOfBirth', 'BornInStateOfResidence') ('dc/014es05x0d5l', 'measuredProperty', 'count') ('dc/014es05x0d5l', 'incomeStatus', 'WithIncome') ('dc/014es05x0d5l', 'constraintProperties', 'placeOfBirth') ('dc/014es05x0d5l', 'typeOf', 'StatisticalVariable') ('dc/014es05x0d5l', 'populationType', 'Person') ('dc/014es05x0d5l', 'provenance', 'dc/cweckx1') ('dc/014es05x0d5l', 'constraintProperties', 'incomeStatus') ('dc/014es05x0d5l', 'constraintProperties', 'age') """ sv_triples = dc.get_triples(sv_dcid) stat_vars = collections.defaultdict(list) for dcid, triples in sv_triples.items(): constraint_properties = [] sv_dict = collections.defaultdict(str) for _, prop, val in triples: if prop == "constraintProperties": constraint_properties.append(val) else: sv_dict[prop] = val prop_val = {} for property in constraint_properties: if property not in sv_dict: raise Exception( 'constraint property:{} not found in statistical' 'variable with dcid: {}'.format(property, dcid)) prop_val[property] = sv_dict[property] sv = StatVar(sv_dict["populationType"], sv_dict["measuredProperty"], sv_dict["statType"], prop_val, dcid) stat_vars[sv.key].append(sv) return stat_vars
def _read_placeType_mapping(): sv_dcid = dc.get_sv_dcids() place_mapping = {} for dcid in sv_dcid: place_mapping[dcid] = PLACE_TYPES return place_mapping
def read_stat_var(): """ Read all the statistical variables """ sv_dcid = dc.get_sv_dcids() """ example of triples for one statsitical variable ('dc/014es05x0d5l', 'measurementMethod', 'CensusACS5yrSurvey') ('dc/014es05x0d5l', 'constraintProperties', 'income') ('dc/014es05x0d5l', 'income', 'USDollar75000Onwards') ('dc/014es05x0d5l', 'age', 'Years15Onwards') ('dc/014es05x0d5l', 'statType', 'measuredValue') ('dc/014es05x0d5l', 'placeOfBirth', 'BornInStateOfResidence') ('dc/014es05x0d5l', 'measuredProperty', 'count') ('dc/014es05x0d5l', 'incomeStatus', 'WithIncome') ('dc/014es05x0d5l', 'constraintProperties', 'placeOfBirth') ('dc/014es05x0d5l', 'typeOf', 'StatisticalVariable') ('dc/014es05x0d5l', 'populationType', 'Person') ('dc/014es05x0d5l', 'provenance', 'dc/cweckx1') ('dc/014es05x0d5l', 'constraintProperties', 'incomeStatus') ('dc/014es05x0d5l', 'constraintProperties', 'age') """ # trunk statsVar dcids into smaller size and # get the triples trunk_size = 10000 n_trunk = len(sv_dcid) // trunk_size sv_triples = {} for i in range(n_trunk + 1): if i == n_trunk: trunk_triples = dc.get_triples_processed(sv_dcid[i * trunk_size:]) sv_triples.update(trunk_triples) else: trunk_triples = dc.get_triples_processed( sv_dcid[i * trunk_size:(i + 1) * trunk_size]) sv_triples.update(trunk_triples) # group all the statsVars according to the triples stat_vars = collections.defaultdict(list) for dcid, triples in sv_triples.items(): constraint_properties = [] # sv_dict keeps all the triples of the statsVar sv_dict = collections.defaultdict(str) for dcid_, prop, val in triples: if dcid_ != dcid: # triples include measurementDenomator info of other statsvars # eg. we will get "dc/gywfwwmg5gsrg, measurementDenominator, # Count_Person" in triples of "Count_Peron" continue if prop == "constraintProperties": constraint_properties.append(val) else: sv_dict[prop] = val # prop_val keeps all the constraint pv pairs prop_val = {} for property in constraint_properties: if property not in sv_dict: raise Exception( 'constraint property:{} not found in statistical' 'variable with dcid: {}'.format(property, dcid)) prop_val[property] = sv_dict[property] # create super enum, i.e. group statsvars with different p-v pairs: # (p,v1); (p,v2) by adding a common value: (p, v), # so that v1, v2 would be leaf nodes for value node v; se = {} if 'crimeType' in prop_val: v = prop_val.get('crimeType', '') if v in [ 'AggravatedAssault', 'ForcibleRape', 'Robbery', 'MurderAndNonNegligentManslaughter' ]: se = {'crimeType': 'ViolentCrime'} elif v in ['MotorVehicleTheft', 'LarcenyTheft', 'Burglary']: se = {'crimeType': 'PropertyCrime'} if 'testResult' in prop_val: v = prop_val.get('testResult', '') if v in ['Negative', 'Positive', 'Ready']: se = {'testResult': 'TestResults'} if 'medicalStatus' in prop_val: v = prop_val.get('medicalStatus', '') if v in [ 'ConfirmedCase', 'ConfirmedOrProbableCase', 'PatientDeceased', 'PatientHospitalized', 'PatientInICU', 'PatientOnVentilator', 'PatientRecovered' ]: se = {'medicalStatus': 'PatientStatus'} # create the statsVar object sv = StatsVar(sv_dict["populationType"], sv_dict["measuredProperty"], sv_dict["statType"], sv_dict["measurementQualifier"], sv_dict["measurementDenominator"], prop_val, dcid, se) stat_vars[sv.key].append(sv) stat_vars = removeDuplicateStatsVar(stat_vars) return stat_vars