def __init__(self, service, name='', position=geopy.Point(45.4641, 9.1919), ageDiffusionIn={}, scaleIn=1, attributesIn={}): assert isinstance(position, geopy.Point), 'Position must be a geopy Point' assert isinstance(service, ServiceType), 'Service must belong to the Eum' assert isinstance(name, str), 'Name must be a string' assert (np.isscalar(scaleIn)) & (scaleIn>0) , 'Scale must be a positive scalar' assert isinstance(attributesIn, dict), 'Attributes can be provided in a dict' self.name = name self.service = service # A ServiceType can have many sites, so each unit has its own. # Moreover, a site is not uniquely assigned to a service self.site = position self.scale = scaleIn # store scale info self.attributes = attributesIn# dictionary # how the service availablity area varies for different age groups if ageDiffusionIn==None: self.ageDiffusion = {g: ( 1 + .005*np.round(np.random.normal(),2))*self.scale for g in AgeGroup.all()} else: assert set(ageDiffusionIn.keys()) <= set(AgeGroup.all()), 'Diffusion keys should be AgeGroups' #assert all self.ageDiffusion = ageDiffusionIn # define kernel taking scale into account self.kernel = {g: gaussKern(length_scale=l*self.scale) for g, l in self.ageDiffusion.items()}
def __init__(self, mappedPositions): assert isinstance(mappedPositions, MappedPositionsFrame), 'Expected MappedPositionsFrame' self.mappedPositions = mappedPositions # initialise for all service types super().__init__({service: pd.DataFrame( np.zeros([mappedPositions.shape[0], len(AgeGroup.all())]), index=mappedPositions.index, columns=AgeGroup.all()) for service in ServiceType})
def compute_kpi_for_localized_services(self): assert self.bEvaluated, 'Have you evaluated service values before making averages for KPIs?' # get mean service levels by quartiere, weighting according to the number of citizens for service, data in self.serviceValues.items(): checkRange = {} for col in self.agesFrame.columns: # iterate over columns as Enums are not orderable... if col in service.demandAges: self.weightedValues[service][col] = pd.Series.multiply( data[col], self.agesFrame[col]) else: self.weightedValues[service][col] = np.nan*data[col] checkRange = (data.groupby(common_cfg.IdQuartiereColName).min()-np.finfo(float).eps, data.groupby(common_cfg.IdQuartiereColName).max()+np.finfo(float).eps) # sum weighted fractions by neighbourhood weightedSums = self.weightedValues[service].groupby(common_cfg.IdQuartiereColName).sum() # set to NaN value the AgeGroups that have no people or there is no demand for the service weightedSums[self.agesTotals == 0] = np.nan weightedSums.iloc[:, ~weightedSums.columns.isin(service.demandAges)] = np.nan self.quartiereKPI[service] = (weightedSums/self.agesTotals).reindex( columns=AgeGroup.all(), copy=False) # check that the weighted mean lies between min and max in the neighbourhood for col in self.quartiereKPI[service].columns: bGood = (self.quartiereKPI[service][col].between( checkRange[0][col], checkRange[1][col]) | self.quartiereKPI[service][col].isnull()) assert all(bGood), 'Unexpected error in mean computation' return self.quartiereKPI
def __init__(self, dfIn, bDuplicatesCheck=True): assert isinstance(dfIn, pd.DataFrame), 'Input DataFrame expected' # initialise and assign base DataFrame properties super().__init__() self.__dict__.update(dfIn.copy().__dict__) # prepare the AgeGroups cardinalities groupsCol = 'ageGroup' peopleBySampleAge = common_cfg.fill_sample_ages_in_cpa_columns(self) dataByGroup = peopleBySampleAge.rename(AgeGroup.find_AgeGroup, axis='columns').T dataByGroup.index.name = groupsCol # index is now given by AgeGroup items dataByGroup = dataByGroup.reset_index() # extract to convert to categorical and groupby dataByGroup[groupsCol] = dataByGroup[groupsCol].astype('category') agesBySection = dataByGroup.groupby(groupsCol).sum().T # self['Ages'] = pd.Series(agesBySection.T.to_dict()) # assign dict to each section self['PeopleTot'] = agesBySection.sum(axis=1) # report all ages for col in AgeGroup.all(): self[col] = agesBySection.get(col, np.zeros_like(self.iloc[:, 0])) # assign centroid as position geopyValues = self['geometry'].apply( lambda pos: geopy.Point(pos.centroid.y, pos.centroid.x)) self[common_cfg.positionsCol] = geopyValues if bDuplicatesCheck: # check no location is repeated - takes a while assert not any(self[common_cfg.positionsCol].duplicated()), 'Repeated position found'
def __init__(self, df_input, b_duplicates_check=True): assert isinstance(df_input, pd.DataFrame), 'Input DataFrame expected' # initialise and assign base DataFrame properties # FIXME: this is not nice at all. Refactor to properly inherit from df super().__init__() self.__dict__.update(df_input.copy().__dict__) # prepare the AgeGroups cardinalities groups_col = 'ageGroup' people_by_sample_age = common_cfg.fill_sample_ages_in_cpa_columns(self) data_by_group = people_by_sample_age.rename(AgeGroup.find_age_group, axis='columns').T # index is now given by AgeGroup items data_by_group.index.name = groups_col # extract to convert to categorical and groupby data_by_group = data_by_group.reset_index() data_by_group[groups_col] = \ data_by_group[groups_col].astype('category') ages_by_section = data_by_group.groupby(groups_col).sum().T self['PeopleTot'] = ages_by_section.sum(axis=1) # report all ages for col in self.OUTPUT_AGES: self[col] = ages_by_section.get(col, np.zeros_like(self.iloc[:, 0])) # extract long and lat and build geopy locations self[common_cfg.coord_col_names[0]] = self['geometry'].apply( lambda pos: pos.centroid.x) self[common_cfg.coord_col_names[1]] = self['geometry'].apply( lambda pos: pos.centroid.y) self[common_cfg.positions_col] = [ geopy.Point(yx) for yx in zip(self[common_cfg.coord_col_names[::-1]].as_matrix()) ] if b_duplicates_check: # check no location is repeated - takes a while assert not any(self[common_cfg.positions_col].duplicated()),\ 'Repeated position found' # cache ages frame and mapped positions for quicker access age_multi_index = [ self[common_cfg.id_quartiere_col_name], self[common_cfg.positions_col].apply(tuple) ] self.ages_frame = self[AgeGroup.all()].set_index(age_multi_index) self.mapped_positions = MappedPositionsFrame( long=self[common_cfg.coord_col_names[0]], lat=self[common_cfg.coord_col_names[1]], geopy_pos=self[common_cfg.positions_col].tolist(), id_quartiere=self[common_cfg.id_quartiere_col_name].tolist())
def compute_kpi_for_istat_values(self): allQuartiere = self.demand.groupby(common_cfg.IdQuartiereColName).sum() dropColumns = [c for c in AgeGroup.all()+common_cfg.excludedColumns \ if c in allQuartiere.columns] quartiereData = allQuartiere.drop(dropColumns , axis=1) kpiFrame = istat_kpi.wrangle_istat_cpa2011(quartiereData, self.city) self.istatKPI = kpiFrame self.istatVitality = istat_kpi.compute_vitality_cpa2011(quartiereData) return self.istatKPI, self.istatVitality
def load(self, meanRadius): assert meanRadius, 'Please provide a reference radius for the mean library size' (propertData, locations) = super().extract_locations() nameCol = 'denominazioni.ufficiale' typeCol = 'tipologia-funzionale' # Modifica e specifica che per le fasce d'età typeAgeDict = {'Specializzata': {group:1 for group in AgeGroup.all()}, 'Importante non specializzata': {group:1 for group in AgeGroup.all()}, 'Pubblica': {group:1 for group in AgeGroup.all()}, 'NON SPECIFICATA': {AgeGroup.ChildPrimary:1}, 'Scolastica': {AgeGroup.ChildPrimary:1}, 'Istituto di insegnamento superiore': {AgeGroup.ChildPrimary:1}, 'Nazionale': {AgeGroup.ChildPrimary:1},} libraryTypes = propertData[typeCol].unique() assert set(libraryTypes) <= set(typeAgeDict.keys()), 'Unrecognized types in input' unitList = [] for libType in libraryTypes: bThisGroup = propertData[typeCol]==libType typeData = propertData[bThisGroup] typeLocations = [l for i,l in enumerate(locations) if bThisGroup[i]] for iUnit in range(typeData.shape[0]): rowData = typeData.iloc[iUnit,:] attrDict = {'level':libType} thisUnit = ServiceUnit(self.servicetype, name=rowData[nameCol], position=typeLocations[iUnit], ageDiffusionIn=typeAgeDict[libType], attributesIn=attrDict) unitList.append(thisUnit) return unitList
def evaluate_services_at(self, mappedPositions): assert isinstance(mappedPositions, MappedPositionsFrame), 'Expected MappedPositionsFrame' # set all age groups as output default outputAgeGroups = AgeGroup.all() # initialise output with dedicated class valuesStore = ServiceValues(mappedPositions) # loop over different services for thisServType in self.outputServices: serviceUnits = [u for u in self.units if u.service == thisServType] if not serviceUnits: continue else: for thisAgeGroup in outputAgeGroups: unitValues = np.stack(list(map( lambda x: x.evaluate( valuesStore.positions, thisAgeGroup), serviceUnits)), axis=-1) # aggregate unit contributions according to the service type norm valuesStore[thisServType][thisAgeGroup] = thisServType.aggregate_units(unitValues) return valuesStore
def load(self, meanRadius): assert meanRadius, 'Please provide a reference radius for urban green' (propertData, locations) = super().extract_locations() nameCol = 'CODICEIDENTIFICATIVOFARMACIA' colAttributes = {'Descrizione': 'DESCRIZIONEFARMACIA', 'PartitaIva': 'PARTITAIVA'} unitList = [] for iUnit in range(propertData.shape[0]): rowData = propertData.iloc[iUnit, :] attrDict = {name: rowData[col] for name, col in colAttributes.items()} thisUnit = ServiceUnit(self.servicetype, name=rowData[nameCol].astype(str), position=locations[iUnit], ageDiffusionIn={g: 1 for g in AgeGroup.all()}, scaleIn=meanRadius, attributesIn=attrDict) unitList.append(thisUnit) return unitList
def make_serviceareas_output(self, precision=4): out = dict() # tool to format frame data that does not depend on age def prepare_frame_data(frame_in): frame_in = frame_in.round(precision) orig_type = frame_in.index.dtype.type data_dict = frame_in.reset_index().to_dict(orient='records') # restore type as pandas has a bug and casts to float if int for quartiere_data in data_dict: old_value = quartiere_data[common_cfg.id_quartiere_col_name] if orig_type in (np.int32, np.int64, int): quartiere_data[common_cfg.id_quartiere_col_name] = int( old_value) return data_dict # make istat layer out[common_cfg.istat_layer_name] = prepare_frame_data(self.istat_data) # make vitality layer out[common_cfg.vitality_layer_name] = prepare_frame_data( self.vitality_data) # make layers for area, layers in self.areas_tree.items(): layer_list = [] for service in layers: data = self.layers_data[service].round(precision) layer_list.append( pd.Series(data[AgeGroup.all()].as_matrix().tolist(), index=data.index, name=service.name)) area_data = pd.concat(layer_list, axis=1).reset_index() out[area.value] = area_data.to_dict(orient='records') return out
def __init__(self, demandFrame, serviceUnits, cityName): assert cityName in city_settings.cityNamesList, 'Unrecognized city name %s' % cityName assert isinstance(demandFrame, DemandFrame),'Demand frame expected' assert all([isinstance(su, ServiceUnit) for su in serviceUnits]),'Service units list expected' self.city = cityName self.demand = demandFrame self.sources = serviceUnits # initialise the service evaluator self.evaluator = ServiceEvaluator(serviceUnits) self.servicePositions = self.evaluator.servicePositions # initialise output values self.serviceValues = ServiceValues(self.demand.mappedPositions) self.bEvaluated = False self.weightedValues = ServiceValues(self.demand.mappedPositions) self.quartiereKPI = {} self.istatKPI = pd.DataFrame() # derive Ages frame ageMIndex = [demandFrame[common_cfg.IdQuartiereColName], demandFrame[common_cfg.positionsCol].apply(tuple)] self.agesFrame = demandFrame[AgeGroup.all()].set_index(ageMIndex) self.agesTotals = self.agesFrame.groupby(level=0).sum()
def load(self, mean_radius=None): assert mean_radius, 'Please provide a reference radius for pharmacies' (propert_data, locations) = super().extract_locations() col_attributes = { 'Descrizione': 'DESCRIZIONEFARMACIA', 'PartitaIva': 'PARTITAIVA' } unit_list = [] # We assume all pharmacies share the same scale, so only one # threshold is necessary cached_thresholds = None for i_unit in range(propert_data.shape[0]): row_data = propert_data.iloc[i_unit, :] attr_dict = { name: row_data[col] for name, col in col_attributes.items() } this_unit = ServiceUnit( self.servicetype, name=row_data[self.name_col].astype(str), unit_id=row_data[self.id_col], position=locations[i_unit], scale=mean_radius, age_diffusion={g: 1 for g in AgeGroup.all()}, kernel_thresholds=cached_thresholds, attributes=attr_dict) unit_list.append(this_unit) # if there were no thresholds, cache the computed ones if not cached_thresholds: cached_thresholds = this_unit.ker_thresholds return unit_list
def __init__(self, service, name, unit_id, position, scale, age_diffusion=None, kernel_thresholds=None, attributes=None): assert isinstance(position, geopy.Point), 'Position must be a geopy Point' assert isinstance(service, ServiceType), 'Service must belong to the Eum' assert isinstance(name, str), 'Name must be a string' assert (np.isscalar(scale)) & \ (scale > 0), 'Scale must be a positive scalar' assert set(age_diffusion.keys()) <= set( AgeGroup.all()), 'Diffusion keys should be AgeGroups' if not attributes: attributes = {} assert isinstance(attributes, dict), 'Attributes have to be provided in a dict' if kernel_thresholds: assert set(kernel_thresholds.keys()) >= set(age_diffusion.keys()),\ 'Kernel thresholds if provided must' \ ' be defined for every age diffusion key' b_thresholds_input = True else: b_thresholds_input = False self.name = name self.id = unit_id self.service = service # A ServiceType can have many sites, so each unit has its own. # Moreover, a site is not uniquely assigned to a service self.site = position self.coord_tuple = (position.latitude, position.longitude) self.scale = scale # store scale info self.attributes = attributes # dictionary # how the service availability area varies for different age groups self.age_diffusion = age_diffusion # define kernel taking scale into account self.kernel = { g: gaussKern(length_scale=l * self.scale) for g, l in self.age_diffusion.items() } # precompute kernel threshold per AgeGroup # initialise to Inf self.ker_thresholds = {g: np.Inf for g in AgeGroup.all()} if b_thresholds_input: assert all([isinstance(kern, gaussKern) for kern in self.kernel.values()]),\ 'Unexpected kernel type in ServiceUnit' assert all([val > 0 for val in kernel_thresholds.values()]), \ 'Thresholds must be positive' self.ker_thresholds.update(kernel_thresholds) else: self._compute_kernel_thresholds() # initialise attendance self.attendance = np.nan
class DemandFrame(pd.DataFrame): """A class to store demand units in row and make them available for aggregation""" OUTPUT_AGES = AgeGroup.all() _metadata = ['ages_frame', 'mapped_positions'] def __init__(self, df_input, b_duplicates_check=True): assert isinstance(df_input, pd.DataFrame), 'Input DataFrame expected' # initialise and assign base DataFrame properties # FIXME: this is not nice at all. Refactor to properly inherit from df super().__init__() self.__dict__.update(df_input.copy().__dict__) # prepare the AgeGroups cardinalities groups_col = 'ageGroup' people_by_sample_age = common_cfg.fill_sample_ages_in_cpa_columns(self) data_by_group = people_by_sample_age.rename(AgeGroup.find_age_group, axis='columns').T # index is now given by AgeGroup items data_by_group.index.name = groups_col # extract to convert to categorical and groupby data_by_group = data_by_group.reset_index() data_by_group[groups_col] = \ data_by_group[groups_col].astype('category') ages_by_section = data_by_group.groupby(groups_col).sum().T self['PeopleTot'] = ages_by_section.sum(axis=1) # report all ages for col in self.OUTPUT_AGES: self[col] = ages_by_section.get(col, np.zeros_like(self.iloc[:, 0])) # extract long and lat and build geopy locations self[common_cfg.coord_col_names[0]] = self['geometry'].apply( lambda pos: pos.centroid.x) self[common_cfg.coord_col_names[1]] = self['geometry'].apply( lambda pos: pos.centroid.y) self[common_cfg.positions_col] = [ geopy.Point(yx) for yx in zip(self[common_cfg.coord_col_names[::-1]].as_matrix()) ] if b_duplicates_check: # check no location is repeated - takes a while assert not any(self[common_cfg.positions_col].duplicated()),\ 'Repeated position found' # cache ages frame and mapped positions for quicker access age_multi_index = [ self[common_cfg.id_quartiere_col_name], self[common_cfg.positions_col].apply(tuple) ] self.ages_frame = self[AgeGroup.all()].set_index(age_multi_index) self.mapped_positions = MappedPositionsFrame( long=self[common_cfg.coord_col_names[0]], lat=self[common_cfg.coord_col_names[1]], geopy_pos=self[common_cfg.positions_col].tolist(), id_quartiere=self[common_cfg.id_quartiere_col_name].tolist()) def get_age_sample(self, age_group=None, n_sample=1000): if age_group is not None: coord, n_repeat = self.mapped_positions.align( self.ages_frame[age_group], axis=0) else: coord, n_repeat = self.mapped_positions.align( self.ages_frame.sum(axis=1), axis=0) idx = np.repeat(range(coord.shape[0]), n_repeat) coord = coord[common_cfg.coord_col_names].iloc[idx] sample = coord.sample(int(n_sample)).as_matrix() return sample[:, 0], sample[:, 1] @classmethod def create_from_istat_cpa(cls, city_name): """Constructor caller for DemandFrame""" city_config = city_settings.get_city_config(city_name) return cls(city_config.istat_cpa_data, b_duplicates_check=False)
def agesFrame(self): ageMIndex = [self[common_cfg.IdQuartiereColName], self[common_cfg.positionsCol].apply(tuple)] return self[AgeGroup.all()].set_index(ageMIndex)