def dissimilarity(distribution, classes=None): """ Compute the inter-class dissimilarity index The dissimilarity index between two categories `\alpha` and `\beta` is defined as ..math:: D_{\alpha \beta} = \frac{1}{2} \sum_{i=1}^{T} \left| \frac{n_\alpha(t)}{N_\alpha} - \frac{n_\beta(t)}{N_\beta} \right| Its value ranges from 0 to 1. Parameters ---------- distribution: nested dictionaries Number of people per class, per areal unit as given in the raw data (ungrouped). The dictionary must have the following formatting: > {areal_id: {class_id: number}} classes: dictionary of lists When the original categories need to be aggregated into different classes. {class: [categories belonging to this class]} This can be arbitrarily imposed, or computed with uncover_classes function of this package. Returns ------- dissimilarity: nested dictionary Classes matrix with dissimilarity as values > {alpha: {beta: D_{\alpha \beta}}} """ ## Regroup into classes if specified if classes is not None: distribution = regroup_per_class(distribution, classes) else: classes = return_categories(distribution) ## Compute total numbers of individuals per class and areal unit N_unit, N_class, N_tot = compute_totals(distribution, classes) ## Compute the dissimilarity matrix # Only half of the values are computed (the matrix is symmetric) dissimilarity = collections.defaultdict(dict) for alpha, beta in itertools.combinations_with_replacement(classes, 2): dissimilarity[alpha][beta] = _pair_dissimilarity( distribution, N_class, alpha, beta) # Symmetrize the output for c0 in dissimilarity.iterkeys(): for c1 in dissimilarity[c0].iterkeys(): if c0 not in dissimilarity[c1]: dissimilarity[c1][c0] = dissimilarity[c0][c1] return dissimilarity
def representation(distribution, classes=None): """ Compute the representation of the different classes in all areal units Parameters ---------- distribution: nested dictionaries Number of people per class, per areal unit as given in the raw data (ungrouped). The dictionary must have the following formatting: > {areal_id: {class_id: number}} classes: dictionary of lists When the original categories need to be aggregated into different classes. > {class: [categories belonging to this class]} This can be arbitrarily imposed, or computed with uncover_classes function of this package. Returns ------- representation: nested dictionnaries Representation of each category in each areal unit. > {areal_id: {class_id: (representation_values, variance of the null model)}} """ # Regroup into classes if specified. Otherwise return categories indicated # in the data if classes: distribution = regroup_per_class(distribution, classes) else: classes = return_categories(distribution) # Compute the total numbers per class and per individual N_unit, N_class, N_tot = compute_totals(distribution, classes) # Compute the representation and standard deviation for all areal units representation = {au:{cl:(single_representation(dist_au[cl], N_unit[au], N_class[cl], N_tot), single_variance(N_unit[au], N_class[cl], N_tot) ) for cl in classes} for au, dist_au in distribution.iteritems()} return representation
def representation(distribution, classes=None): """ Compute the representation of the different classes in all areal units Parameters ---------- distribution: nested dictionaries Number of people per class, per areal unit as given in the raw data (ungrouped). The dictionary must have the following formatting: > {areal_id: {class_id: number}} classes: dictionary of lists When the original categories need to be aggregated into different classes. > {class: [categories belonging to this class]} This can be arbitrarily imposed, or computed with uncover_classes function of this package. Returns ------- representation: nested dictionnaries Representation of each category in each areal unit. > {areal_id: {class_id: (representation_values, variance of the null model)}} """ # Regroup into classes if specified. Otherwise return categories indicated # in the data if classes: distribution = regroup_per_class(distribution, classes) else: classes = return_categories(distribution) # Compute the total numbers per class and per individual N_unit, N_class, N_tot = compute_totals(distribution, classes) # Compute the representation and standard deviation for all areal units representation = { au: { cl: (single_representation(dist_au[cl], N_unit[au], N_class[cl], N_tot), single_variance(N_unit[au], N_class[cl], N_tot)) for cl in classes } for au, dist_au in distribution.iteritems() } return representation
def dissimilarity(distribution, classes=None): """ Compute the inter-class dissimilarity index The dissimilarity index between two categories `\alpha` and `\beta` is defined as ..math:: D_{\alpha \beta} = \frac{1}{2} \sum_{i=1}^{T} \left| \frac{n_\alpha(t)}{N_\alpha} - \frac{n_\beta(t)}{N_\beta} \right| Its value ranges from 0 to 1. Parameters ---------- distribution: nested dictionaries Number of people per class, per areal unit as given in the raw data (ungrouped). The dictionary must have the following formatting: > {areal_id: {class_id: number}} classes: dictionary of lists When the original categories need to be aggregated into different classes. {class: [categories belonging to this class]} This can be arbitrarily imposed, or computed with uncover_classes function of this package. Returns ------- dissimilarity: nested dictionary Classes matrix with dissimilarity as values > {alpha: {beta: D_{\alpha \beta}}} """ ## Regroup into classes if specified if classes is not None: distribution = regroup_per_class(distribution, classes) else: classes = return_categories(distribution) ## Compute total numbers of individuals per class and areal unit N_unit, N_class, N_tot = compute_totals(distribution, classes) ## Compute the dissimilarity matrix # Only half of the values are computed (the matrix is symmetric) dissimilarity = collections.defaultdict(dict) for alpha, beta in itertools.combinations_with_replacement(classes, 2): dissimilarity[alpha][beta] = _pair_dissimilarity(distribution, N_class, alpha, beta) # Symmetrize the output for c0 in dissimilarity.iterkeys(): for c1 in dissimilarity[c0].iterkeys(): if c0 not in dissimilarity[c1]: dissimilarity[c1][c0] = dissimilarity[c0][c1] return dissimilarity
def exposure(distribution, classes=None): """ Compute the exposure between classes The exposure between two categories `\alpha` and `\beta` is defined as ..math:: E_{\alpha \beta} = \frac{1}{N} \sum_{t=1}^{T} n(t) r_\alpha(t) r_\beta(t) where `r_\alpha(t)` is the representation of the class `\alpha` in the areal unit `t`, `n(t)` the total population of `t`, and `N` the total population in the considered system. The exposure of a class to itself `E_{\alpha \alpha}` measures the **isolation** of this class. The variance is computed on the null model which corresponds to the unsegregated configuration, that is when the spatial repartition of people of different income classes is no different from that that would be obtained if they scattered at random across the city. Parameters ---------- distribution: nested dictionaries Number of people per class, per areal unit as given in the raw data (ungrouped). The dictionary must have the following formatting: > {areal_id: {class_id: number}} classes: dictionary of lists When the original categories need to be aggregated into different classes. {class: [categories belonging to this class]} This can be arbitrarily imposed, or computed with uncover_classes function of this package. Returns ------- exposure: nested dictionaries Matrix of exposures between categories. > {class_id0: {class_id1: (exposure_01, variance null model)}} """ ## Regroup into classes if specified. if classes: distribution = regroup_per_class(distribution, classes) else: classes = return_categories(distribution) ## Compute the total numbers per class and per areal unit N_unit, N_class, N_tot = compute_totals(distribution, classes) ## Compute representation for all areal unit representation = mb.representation(distribution) ## Compute the exposure matrix # Only half of the values are computed (the matrix is symmetric) exposure = collections.defaultdict(dict) for alpha, beta in itertools.combinations_with_replacement(classes, 2): exposure[alpha][beta] = (pair_exposure(representation, N_unit, N_tot, alpha, beta), pair_variance(representation, N_unit, N_class, N_tot, alpha, beta)) # Symmetrize the output for c0 in exposure.iterkeys(): for c1 in exposure[c0].iterkeys(): if c0 not in exposure[c1]: exposure[c1][c0] = exposure[c0][c1] return exposure