Exemplo n.º 1
0
def spacetime2(reg, sReg, df, ko, omega_age_smooth, lambda_time_smooth,
               lambda_time_smooth_nodata, zeta_space_smooth, zeta_space_smooth_nodata):
    """
    Compute the spacetime weight matrix for a super region. Full data set tells
    which values need weights, train data set are the residuals which need
    weighting.
    """
    full_sub = df[(df.region == reg)]
    train_sub = df[(df.super_region == sReg) & (ko)]
    year_start = np.min(df.year)
    year_end = np.max(df.year)
    Wat = timeW(full_sub, train_sub, omega_age_smooth, lambda_time_smooth,
        lambda_time_smooth_nodata, year_start, year_end).astype("float32")
    NR, SN, C, R, SR = matCRS(full_sub, train_sub)
    xi_mat = calculate_xi_matrix(full_sub, train_sub, zeta_space_smooth,
                                 zeta_space_smooth_nodata).astype("float32")
    NR = weight_matrix(NR, xi_mat[:,0], Wat).astype("float32")
    SN = weight_matrix(SN, xi_mat[:,1], Wat).astype("float32")
    C = weight_matrix(C, xi_mat[:,2], Wat).astype("float32")
    R = weight_matrix(R, xi_mat[:,3], Wat).astype("float32")
    SR = weight_matrix(SR, xi_mat[:,4], Wat).astype("float32")
    final = EV("NR + SN + C + R + SR").astype("float32")
    del NR, SN, C, R, SR
    account_missing = final.sum(0)
    account_missing[account_missing == .0] = 1.
    return EV("final / account_missing").astype("float32")
Exemplo n.º 2
0
def weight_matrix(valid_positions, xi_vector, weight_matrix):
    """
    (matrix, vector, matrix)

    Given a matrix of valid positions for an analytic region (valid_positions),
    a vector of appropriate xi weights to use for each column in that vector
    (xi_vector), and an age year weighted matrix generated by timeW will return
    a matrix re-weighted so that each column adds up to the corresponding xi
    value in the xi_vector.
    """
    weights = EV("valid_positions * weight_matrix")
    sum_of_weights = weights.sum(0)
    sum_of_weights[sum_of_weights == .0] = 1.
    return EV("(weights / sum_of_weights) * xi_vector")
Exemplo n.º 3
0
def calculate_xi_matrix(full, train, zeta_space_smooth,
                        zeta_space_smooth_nodata):
    '''
    (data frame, data frame, float, float) -> array

    Given two data frames ("full", "train") where train is a subset of full used
    to train a model and two float values for possible use of xi value
    ("zeta_space_smooth", "zeta_space_smooth_nodata") returns a matrix of xi values with a number
    of rows equal to the number of observations in the training data and 5
    columns. Each cell is given a value depending on the weighting that should
    be used for each observation in the full set in comparison to training
    observations if they share the same sub_national, country, region or
    super region for columns 2 through 5. The first column is the weight of
    data that is to the most specific level for that observation but not
    representative. Each row sum should add up to 1.
    '''
    depths = location_depth(full, train)

    def f(x):
        return calc_xi_vec(x, zeta_space_smooth, zeta_space_smooth_nodata)

    base = np.array(map(f, depths))
    train_copy = train.set_index("location_id")
    non_rep_loc = train[train.national != 1].location_id.unique()
    non_rep = full.location_id.map(lambda x: x in non_rep_loc)
    non_rep_vec = EV(
        "zeta_space_smooth * (non_rep - non_rep * zeta_space_smooth)")
    # keep track of the place that only have no rep data so we can give them the full location weight
    only_non_rep_loc = np.setdiff1d(
        non_rep_loc, train[train.national == 1].location_id.unique())
    only_non_rep = full.location_id.map(lambda x: x in only_non_rep_loc)
    modify_SN = non_rep_vec * (base[:, 0] != 0).astype(int)
    modify_C = non_rep_vec * (base[:, 0] == 0).astype(int)
    base[:, 0] = base[:, 0] - modify_SN
    base[:, 1] = base[:, 1] - modify_C
    base = np.append(non_rep_vec.reshape(len(base), 1), base, 1)
    base[only_non_rep.values & (depths == 3), 0] = \
        base[only_non_rep.values & (depths == 3), :][:, [0, 2]].sum(axis=1)
    base[only_non_rep.values & (depths == 3), 2] = 0
    base[only_non_rep.values & (depths == 4), 0] = \
        base[only_non_rep.values & (depths == 4), :][:, [0, 1]].sum(axis=1)
    base[only_non_rep.values & (depths == 4), 1] = 0
    return base