示例#1
0
def calculate_inblock_semivariance(points_within_area, semivariance_model):
    """
    Function calculates semivariances of points inside all given areal blocks.
    :param points_within_area: (numpy array / list of lists) [area_id, array of points within area and their values],
    :param semivariance_model: (TheoreticalSemivariogram) Theoretical Semivariogram object,
    :return areas_inblock_semivariance: (numpy array) [area_id, inblock_semivariance]
    """

    areas_inblock_semivariance = []
    for block in points_within_area:

        # Get area id
        area_id = block[0]

        # Calculate inblock semivariance for a given id
        number_of_points_within_block = len(block[1])
        squared_no_points = number_of_points_within_block * number_of_points_within_block
        semivars = []
        for point in block[1][:, :-1]:
            avg_sem = []
            for point2 in block[1][:, :-1]:
                dist = calc_point_to_point_distance([point], [point2])
                smv = semivariance_model.predict(dist)
                avg_sem.append(smv)
            semivars.append(np.mean(avg_sem))
        avg_inblock_semivariance = np.sum(semivars) / squared_no_points

        areas_inblock_semivariance.append([area_id, avg_inblock_semivariance])
    return areas_inblock_semivariance
示例#2
0
def block_pair_semivariance(block_a, block_b, semivariogram_model):
    """
    Function calculates average semivariance between two blocks based on the counts inside the block.
    :param block_a: block A points in the form of array [[point x1A, point y1A, value v1A],
                                                         [point x2A, point y2A, value v2A],
                                                         [...]
                                                         [point xnA, point ynA, value vnA]]
        All coordinates from the array must be placed inside the block!
    :param block_b: block B points in the form of array [[point x1B, point y1B, value v1B],
                                                         [point x2B, point y2B, value v2B],
                                                         [...]
                                                         [point xnB, point ynB, value vnB]]
        All coordinates from the array must be placed inside the block!
    :param semivariogram_model: (TheoreticalSemivariogram) theoretical semivariance model from TheoreticalSemivariance
        class. Model must be fitted and calculated.
    :return semivariance_mean: (float) Average semivariance between blocks divided by points.
    """
    distances_between_points = calc_point_to_point_distance(block_a, block_b).flatten()

    semivariances = []
    for dist in distances_between_points:
        semivariances.append(
            semivariogram_model.predict(dist)
        )

    semivariance_mean = np.sum(semivariances) / len(semivariances)

    return semivariance_mean
def prepare_distances_list_unknown_area(unknown_area_points):
    """
    Function prepares distances list of unknown (single) area
    :param unknown_area_points: [pt x, pt y, val]
    :return: [point value 1, point value 2,  distance between points]
    """
    dists = calc_point_to_point_distance(unknown_area_points[:, :-1])
    vals = unknown_area_points[:, -1]

    merged = _merge_vals_and_distances(vals, vals, dists)
    return np.array(merged)
示例#4
0
def interpolate_raster(data,
                       dim=1000,
                       number_of_neighbors=4,
                       semivariogram_model=None):
    """
    Function interpolates raster from data points using ordinary kriging.
    :param data: (numpy array / list) [coordinate x, coordinate y, value]
    :param dim: (int) number of pixels (points) of a larger dimension (it could be width or height),
    :param number_of_neighbors: (int) default=16, number of points used to interpolate data,
    :param semivariogram_model: (TheoreticalSemivariance) default=None, Theoretical Semivariogram model,
        if not provided then it is estimated from a given dataset.
    :return: (numpy arrays) [numpy array of interpolated values, numpy array of interpolation errors],
        and list of properties [pixel size, min x, max x, min y, max y]
    """

    # Set dimension

    if type(data) is list:
        data = np.array(data)

    cols_coords, rows_coords, props = _set_dims(data[:, 0], data[:, 1], dim)

    # Calculate semivariance if not provided

    if semivariogram_model is None:
        distances = calc_point_to_point_distance(data[:, :-1])

        maximum_range = np.max(distances)
        number_of_divisions = 100
        step_size = maximum_range / number_of_divisions
        lags = np.arange(0, maximum_range, step_size)

        semivariance = calculate_semivariance(data, lags, step_size)

        ts = TheoreticalSemivariogram(data, semivariance, False)
        ts.find_optimal_model(False, number_of_neighbors)

    else:
        ts = semivariogram_model

    # Interpolate data point by point

    k = Krige(ts, data)

    kriged_matrix, kriged_errors = update_interpolation_matrix(
        rows_coords, cols_coords, k, number_of_neighbors, True)

    return [kriged_matrix, kriged_errors], props
示例#5
0
def calculate_covariance(data, lags, step_size):
    """Function calculates semivariance of a given set of points.
        Equation for calculation is:

            covariance = 1 / (N) * SUM(i=1, N) [z(x_i + h) * z(x_i)] - u^2

        where:
            N - number of observation pairs,
            h - distance (lag),
            z(x_i) - value at location z_i,
            (x_i + h) - location at a distance h from x_i,
            u - mean observation at a given lag distance.

        INPUT:
        :param data: array of coordinates and their values,
        :param lags: array of lags between points,
        :param step_size: distance which should be included in the gamma parameter which enhances range of interest.

        OUTPUT:
        :return: covariance: numpy array of pair of lag and covariance values where
                 covariance[0] = array of lags
                 covariance[1] = array of lag's values
                 covariance[2] = array of number of points in each lag.
    """

    distances = calc_point_to_point_distance(data[:, :-1])

    # Get only upper diagonal of distances, rest set to -1
    covar = []
    covariance = []

    for idx, h in enumerate(lags):
        distances_in_range = select_values_in_range(distances, h, step_size)
        cov = (data[distances_in_range[0], 2] * data[distances_in_range[1], 2])
        u_mean = (data[distances_in_range[0], 2] +
                  data[distances_in_range[1], 2])
        u_mean = u_mean / (2 * len(u_mean))
        cov_value = np.sum(cov) / (len(cov)) - np.sum(u_mean)**2
        covar.append([cov_value, len(cov)])
        if covar[idx][0] > 0:
            covariance.append([h, covar[idx][0], covar[idx][1]])
        else:
            covariance.append([h, 0, 0])

    covariance = np.vstack(covariance)

    return covariance
def calculate_semivariance(data, lags, step_size):
    """Function calculates semivariance of a given set of points.
        Equation for calculation is:

            semivariance = 1 / (2 * N) * SUM(i=1, N) [z(x_i + h) - z(x_i)]^2

        where:
            N - number of observation pairs,
            h - distance (lag),
            z(x_i) - value at location z_i,
            (x_i + h) - location at a distance h from x_i.

        INPUT:
        :param data: array of coordinates and their values,
        :param lags: array of lags between points,
        :param step_size: distance which should be included in the gamma parameter which enhances range of interest.

        OUTPUT:
        :return: semivariance: numpy array of pair of lag and semivariance values where
                 semivariance[0] = array of lags
                 semivariance[1] = array of lag's values
                 semivariance[2] = array of number of points in each lag.
    """

    distances = calc_point_to_point_distance(data[:, :-1])

    semivariance = []

    # Calculate semivariances
    for h in lags:
        distances_in_range = select_values_in_range(distances, h, step_size)
        sem = (data[distances_in_range[0], 2] -
               data[distances_in_range[1], 2])**2
        if len(sem) == 0:
            sem_value = 0
        else:
            sem_value = np.sum(sem) / (2 * len(sem))
        semivariance.append([h, sem_value, len(sem)])

    semivariance = np.vstack(semivariance)
    return semivariance
def prepare_ata_known_areas(list_of_points_of_known_areas):
    """
    Function prepares known areas data for prediction.
    :param list_of_points_of_known_areas: (numpy array) list of all areas' points and their values used for
        the prediction,
    :return: (numpy array) list of arrays with areas and distances between them:
        [id base,
            [
                id other,
                [base point value, other point value,  distance between points]
            ]
        ]
    """
    all_distances_list = []
    for pt1 in list_of_points_of_known_areas:

        id_base = pt1[0][0]
        list_of_distances_from_base = [id_base, []]

        points_in_base_area = pt1[0][1][:, :-1]
        vals_in_base_area = pt1[0][1][:, -1]

        for pt2 in list_of_points_of_known_areas:

            id_other = pt2[0][0]
            points_in_other_area = pt2[0][1][:, :-1]
            vals_in_other_area = pt2[0][1][:, -1]

            distances_array = calc_point_to_point_distance(
                points_in_base_area, points_in_other_area)
            merged = _merge_vals_and_distances(vals_in_base_area,
                                               vals_in_other_area,
                                               distances_array)

            list_of_distances_from_base[1].append([id_other, merged])
        all_distances_list.append(list_of_distances_from_base)

    return np.array(all_distances_list)
def prepare_kriging_data(unknown_position,
                         data_array,
                         number_of_neighbours=10):
    """
    :param unknown_position: array with position of unknown value,
    :param data_array: array with known positions and their values,
    :param number_of_neighbours: number of the closest locations to the unknown position
    :return output_data: prepared dataset which contains:
    [[known_position_x, known_position_y, value, distance_to_unknown_position], [...]]
    """

    # Distances to unknown point
    r = np.array([unknown_position])

    known_pos = data_array[:, :-1]
    dists = calc_point_to_point_distance(r, known_pos)

    # Prepare data for kriging
    kriging_output_array = np.c_[data_array, dists.T]
    kriging_output_array = kriging_output_array[
        kriging_output_array[:, -1].argsort()]
    prepared_data = kriging_output_array[:number_of_neighbours]

    return prepared_data
示例#9
0
    def predict(self,
                unknown_location,
                unknown_location_points,
                number_of_neighbours,
                max_search_radius,
                weighted,
                test_anomalies=True):
        """
        Function predicts areal value in a unknown location based on the centroid-based Poisson Kriging
        :param unknown_location: (numpy array) array of unknown area in the form:
            [area_id, areal_polygon, centroid coordinate x, centroid coordinate y]
        :param unknown_location_points: (numpy array) array of points within an unknown area in the form:
            [area_id, [point_position_x, point_position_y, value]]
        :param number_of_neighbours: (int) minimum number of neighbours to include in the algorithm,
        :param max_search_radius: (float) maximum search radius (if number of neighbours within this search radius is
            smaller than number_of_neighbours parameter then additional neighbours are included up to number of
            neighbors).
        :param weighted: (bool) distances weighted by population (True) or not (False),
        :param test_anomalies: (bool) check if weights are negative,
        :return: prediction, error, estimated mean, weights:
            [value in unknown location, error, estimated mean, weights]
        """

        self.prepared_data = prepare_poisson_kriging_data(
            unknown_area=unknown_location,
            points_within_unknown_area=unknown_location_points,
            known_areas=self.known_areas,
            points_within_known_areas=self.known_areas_points,
            number_of_neighbours=number_of_neighbours,
            max_search_radius=max_search_radius,
            weighted=weighted
        )  # [id (known), coo_x, coo_y, val, dist_to_unknown, total population]

        n = self.prepared_data.shape[0]
        unknown_distances = self.prepared_data[:, -2]
        k = self.model.predict(
            unknown_distances)  # predicted values from distances to unknown
        k = k.T
        k_ones = np.ones(1)[0]
        k = np.r_[k, k_ones]

        data_for_distance = self.prepared_data[:, 1:3]
        dists = calc_point_to_point_distance(data_for_distance)

        predicted_weights = self.model.predict(dists.ravel())
        predicted = np.array(predicted_weights.reshape(n, n))

        # Add weights to predicted values (diagonal)

        weights_mtx = self.calculate_weight_arr()
        predicted = predicted + weights_mtx

        # Prepare weights matrix

        p_ones = np.ones((predicted.shape[0], 1))
        predicted_with_ones_col = np.c_[predicted, p_ones]
        p_ones_row = np.ones((1, predicted_with_ones_col.shape[1]))
        p_ones_row[0][-1] = 0.
        weights = np.r_[predicted_with_ones_col, p_ones_row]

        # Solve Kriging system
        try:
            w = np.linalg.solve(weights, k)
        except TypeError:
            weights = weights.astype(np.float)
            k = k.astype(np.float)
            w = np.linalg.solve(weights, k)

        zhat = self.prepared_data[:, 3].dot(w[:-1])

        # Test for anomalies
        if test_anomalies:
            if zhat < 0:
                user_input_message = 'Estimated value is below zero and it is: {}. \n'.format(
                    zhat)
                text_error = user_input_message + 'Program is terminated. Try different semivariogram model. ' \
                                                  '(Did you use gaussian model? \
                            If so then try to use other models like linear or exponential) and/or analyze your data \
                            for any clusters which may affect the final estimation'

                raise ValueError(text_error)

        sigmasq = (w.T * k)[0]
        if sigmasq < 0:
            sigma = 0
        else:
            sigma = np.sqrt(sigmasq)
        return zhat, sigma, w[-1], w
示例#10
0
    def simple_kriging(self,
                       unknown_location,
                       number_of_neighbours,
                       mu=None,
                       test_anomalies=True):
        """
        Function predicts value at unknown location.
        :param unknown_location: (tuple) position of unknown location,
        :param number_of_neighbours: (int) number of the closest locations to the unknown position which should be
            included in the modeling,
        :param mu: (float) global mean which should be known before processing. If not given then it is calculated
            from the sample but then it may cause a relative large errors (this mean is expectation of the random field,
            so without knowledge of the ongoing processes it is unknown).
        :param test_anomalies: (bool) check if weights are negative,
        :return model_output: (array)
            for ordinary kriging:
                zhat, sigma, w[-1][0], w:
                [value in unknown location, error, estimated mean, weights]

            for simple kriging:
                zhat, sigma, area_mean, w
                [value in unknown location, error, mean, weights]
        """

        prepared_data = prepare_kriging_data(
            unknown_position=unknown_location,
            data_array=self.dataset,
            number_of_neighbours=number_of_neighbours)
        n = number_of_neighbours

        if mu is None:
            vals = self.dataset[:, -1]
            mu = np.sum(vals)
            mu = mu / len(vals)

        unknown_distances = prepared_data[:, -1]
        k = self.model.predict(unknown_distances)
        k = k.T

        dists = calc_point_to_point_distance(prepared_data[:, :-2])
        predicted_weights = self.model.predict(dists.ravel())
        predicted = np.array(predicted_weights.reshape(n, n))

        w = np.linalg.solve(predicted, k)
        r = prepared_data[:, -2] - mu
        zhat = r.dot(w)
        zhat = zhat + mu

        # Test for anomalies
        if test_anomalies:
            if zhat < 0:
                user_input_message = 'Estimated value is below zero and it is: {}. \n'.format(
                    zhat)
                text_error = user_input_message + 'Program is terminated. Try different semivariogram model. ' \
                                                  '(Did you use gaussian model? \
                            If so then try to use other models like linear or exponential) and/or analyze your data \
                            for any clusters which may affect the final estimation'

                raise ValueError(text_error)

        sigmasq = (w.T * k)[0]
        if sigmasq < 0:
            sigma = 0
        else:
            sigma = np.sqrt(sigmasq)
        return zhat, sigma, mu, w
示例#11
0
    def ordinary_kriging(self,
                         unknown_location,
                         number_of_neighbours,
                         test_anomalies=True):
        """
        Function predicts value at unknown location.
        :param unknown_location: (tuple) position of unknown location,
        :param number_of_neighbours: (int) number of the closest locations to the unknown position which should be
            included in the modeling,
        :param test_anomalies: (bool) check if weights are negative,
        :return model_output: (array)
            for ordinary kriging:
                zhat, sigma, w[-1][0], w:
                [value in unknown location, error, estimated mean, weights]

            for simple kriging:
                zhat, sigma, area_mean, w
                [value in unknown location, error, mean, weights]
        """

        prepared_data = prepare_kriging_data(
            unknown_position=unknown_location,
            data_array=self.dataset,
            number_of_neighbours=number_of_neighbours)
        n = number_of_neighbours
        unknown_distances = prepared_data[:, -1]
        k = self.model.predict(unknown_distances)
        k = k.T
        k_ones = np.ones(1)[0]
        k = np.r_[k, k_ones]

        dists = calc_point_to_point_distance(prepared_data[:, :-2])

        predicted_weights = self.model.predict(dists.ravel())
        predicted = np.array(predicted_weights.reshape(n, n))
        p_ones = np.ones((predicted.shape[0], 1))
        predicted_with_ones_col = np.c_[predicted, p_ones]
        p_ones_row = np.ones((1, predicted_with_ones_col.shape[1]))
        p_ones_row[0][-1] = 0.
        weights = np.r_[predicted_with_ones_col, p_ones_row]

        w = np.linalg.solve(weights, k)
        zhat = prepared_data[:, -2].dot(w[:-1])

        # Test for anomalies
        if test_anomalies:
            if zhat < 0:
                user_input_message = 'Estimated value is below zero and it is: {}. \n'.format(
                    zhat)
                text_error = user_input_message + 'Program is terminated. Try different semivariogram model. ' \
                                                  '(Did you use gaussian model? \
                            If so then try to use other models like linear or exponential) and/or analyze your data \
                            for any clusters which may affect the final estimation'

                raise ValueError(text_error)

        sigmasq = (w.T * k)[0]
        if sigmasq < 0:
            sigma = 0
        else:
            sigma = np.sqrt(sigmasq)
        return zhat, sigma, w[-1], w
def prepare_poisson_kriging_data(unknown_area,
                                 points_within_unknown_area,
                                 known_areas,
                                 points_within_known_areas,
                                 number_of_neighbours,
                                 max_search_radius,
                                 weighted=False):
    """
    Function prepares data for centroid based Poisson Kriging.
    :param unknown_area: (numpy array) unknown area in the form:
        [area_id, areal_polygon, centroid coordinate x, centroid coordinate y]
    :param points_within_unknown_area: (numpy array) array of points and their values within the given area:
        [area_id, [point_position_x, point_position_y, value]]
    :param known_areas: (numpy array) array of known areas in the form:
        [area_id, areal_polygon, centroid coordinate x, centroid coordinate y, value at specific location]
    :param points_within_known_areas: (numpy array) array of points and their values within the given area:
        [[area_id, [point_position_x, point_position_y, value]], ...]
    :param number_of_neighbours: (int) minimum number of neighbours to include in the algorithm,
    :param max_search_radius: (float) maximum search radius (if number of neighbours within this search radius is
        smaller than number_of_neighbours parameter then additional neighbours are included up to number of neighbors).
    :param weighted: (bool) distances weighted by population (True) or not (False),
    :return output_data: (numpy array) array of distances from known locations to the unknown location:
        [id (known), coo_x, coo_y, val, dist_to_unknown, sum_of_vals_within_area]
    """

    # Prepare data
    cx_cy = unknown_area[2:-1]
    r = np.array(cx_cy)

    known_centroids = known_areas.copy()
    kc_ids = known_centroids[:, 0]
    kc_vals = known_centroids[:, -1]
    kc_pos = known_centroids[:, 2:-1]

    # Build set for Poisson Kriging

    if weighted:
        known_areas_pts = points_within_known_areas.copy()

        dists = []  # [id_known, dist]

        for pt in known_areas_pts:
            d = calc_block_to_block_distance([pt, points_within_unknown_area])
            dists.append([d[0][0][1]])
        s = np.ravel(np.array(dists)).T
        kriging_data = np.c_[kc_ids, kc_pos, kc_vals,
                             s]  # [id, coo_x, coo_y, val, dist_to_unkn]
    else:
        dists = calc_point_to_point_distance(kc_pos, [r])
        dists = dists.ravel()
        s = dists.T
        kriging_data = np.c_[kc_ids, kc_pos, kc_vals,
                             s]  # [id, coo_x, coo_y, val, dist_to_unkn]

    # sort by distance
    kriging_data = kriging_data[kriging_data[:, -1].argsort()]

    # Get distances in max search radius
    max_search_pos = np.argmax(kriging_data[:, -1] > max_search_radius)
    output_data = kriging_data[:max_search_pos]

    # check number of observations

    if len(output_data) < number_of_neighbours:
        output_data = kriging_data[:number_of_neighbours]

    # get total points' value in each id from prepared datasets and append it to the array

    points_vals = []
    for rec in output_data:
        areal_id = rec[0]
        points_in_area = points_within_known_areas[
            points_within_known_areas[:, 0] == areal_id]
        total_val = get_total_value_of_area(points_in_area[0])
        points_vals.append(total_val)

    output_data = np.c_[output_data, np.array(points_vals)]
    return output_data
def prepare_atp_data(points_within_unknown_area, known_areas,
                     points_within_known_areas, number_of_neighbours,
                     max_search_radius):
    """
    Function prepares data for Area to Point Poisson Kriging.
    :param points_within_unknown_area: (numpy array) array of points and their values within the given area:
        [area_id, [point_position_x, point_position_y, value of point]]
    :param known_areas: (numpy array) array of known areas in the form:
        [area_id, areal_polygon, centroid coordinate x, centroid coordinate y, value at specific location]
    :param points_within_known_areas: (numpy array) array of points and their values within the given area:
        [[area_id, [point_position_x, point_position_y, value of point]], ...]
    :param number_of_neighbours: (int) minimum number of neighbours to include in the algorithm,
    :param max_search_radius: (float) maximum search radius (if number of neighbours within this search radius is
        smaller than number_of_neighbours parameter then additional neighbours are included up to number of neighbors).
    :return output_data: (numpy array) array of distances from known locations to the unknown location:
        [id (known), areal value - count, [known_point_1 value, unknown_point_1 value, distance_1], total point value]
    """

    # Initialize set

    kriging_areas_ids = known_areas[:, 0]
    kriging_areal_values = known_areas[:, -1]

    # Build set for Area to Area Poisson Kriging - sort areas with distance

    known_areas_pts = points_within_known_areas.copy()

    dists = []  # [id_known, dist to unknown]

    for pt in known_areas_pts:
        d = calc_block_to_block_distance([pt, points_within_unknown_area])
        dists.append([d[0][0][1]])
    s = np.ravel(np.array(dists)).T
    kriging_data = np.c_[kriging_areas_ids, kriging_areal_values,
                         s]  # [id, areal val, dist_to_unkn]

    # sort by distance
    kriging_data = kriging_data[kriging_data[:, -1].argsort()]

    # Get distances in max search radius
    max_search_pos = np.argmax(kriging_data[:, -1] > max_search_radius)
    output_data = kriging_data[:max_search_pos]

    # check number of observations

    if len(output_data) < number_of_neighbours:
        output_data = kriging_data[:number_of_neighbours]

    # for each of prepared id prepare distances list with points' weights for semivariogram calculation

    points_vals = []
    points_and_vals_in_unknown_area = points_within_unknown_area[1]
    for rec in output_data:
        areal_id = rec[0]
        areal_value = rec[1]
        known_area = points_within_known_areas[points_within_known_areas[:, 0]
                                               == areal_id]
        known_area = known_area[0]
        points_in_known_area = known_area[1][:, :-1]
        vals_in_known_area = known_area[1][:, -1]

        # Set distances array from each point of unknown area
        merged_points_array = []
        for u_point in points_and_vals_in_unknown_area:
            u_point_dists = calc_point_to_point_distance(
                points_in_known_area, [u_point[:-1]])
            u_point_val = u_point[-1]
            merged = _merge_point_val_and_distances(u_point_val,
                                                    vals_in_known_area,
                                                    u_point_dists)
            merged_points_array.append(merged)

        total_val = np.sum(known_area[1][:, 2])
        generated_array = [
            areal_id, areal_value, merged_points_array, total_val
        ]  # [[id, value, [
        # [unknown point value,
        #     [known points values,
        #      distances between points]],
        # ...],
        #  total known points value],
        # [list of uknown point coords]]
        points_vals.append(generated_array)

    output_data = np.array(points_vals)
    return [output_data, points_within_unknown_area[1][:, :-1]]
def calculate_weighted_semivariance(data, lags, step_size):
    """Function calculates weighted semivariance following Monestiez et al.:

        A. Monestiez P, Dubroca L, Bonnin E, Durbec JP, Guinet C: Comparison of model based geostatistical methods
        in ecology: application to fin whale spatial distribution in northwestern Mediterranean Sea.
        In Geostatistics Banff 2004 Volume 2. Edited by: Leuangthong O, Deutsch CV. Dordrecht, The Netherlands,
        Kluwer Academic Publishers; 2005:777-786.

        B. Monestiez P, Dubroca L, Bonnin E, Durbec JP, Guinet C: Geostatistical modelling of spatial distribution
        of Balenoptera physalus in the northwestern Mediterranean Sea from sparse count data and heterogeneous
        observation efforts. Ecological Modelling 2006 in press.

        Equation for calculation is:

        s(h) = [1 / (2 * SUM(a=1, N(h)) (n(u_a) * n(u_a + h)) /...
                                      / (n(u_a) + n(u_a + h))
                    )
               ] * SUM(a=1, N(h)) {

               [(n(u_a) * n(u_a + h)) / (n(u_a) + n(u_a + h))] *...
               * [(z(u_a) - z(u_a + h))^2] - m'

               }

        where:

        s(h) - Semivariogram of the risk,
        n(u_a) - size of the population at risk in the unit a,
        z(u_a) - mortality rate at the unit a,
        u_a + h - area at the distance (h) from the analyzed area,
        m' - population weighted mean of rates.

        INPUT:
        :param data: (numpy array) [coordinate x, coordinate y, value, weight],
        :param lags: (array) of lags [lag1, lag2, lag...]
        :param step_size: step size of search radius.


        OUTPUT:
        :return: semivariance: numpy array of pair of lag and semivariance values where
                 semivariance[0] = array of lags
                 semivariance[1] = array of lag's values
                 semivariance[2] = array of number of points in each lag.
    """

    # TEST: test if any 0-weight is inside the dataset

    _test_weights(data[:, -1])

    # Calculate distance

    distances = calc_point_to_point_distance(data[:, :-2])

    # Prepare semivariance arrays
    smv = []
    semivariance = []

    # Calculate semivariances
    for idx, h in enumerate(lags):
        distances_in_range = select_values_in_range(distances, h, step_size)

        # Weights
        weight1 = data[distances_in_range[0], 3]
        weight2 = data[distances_in_range[1], 3]

        weights = (weight1 * weight2) / (weight1 + weight2)
        weights_sum = np.sum(weights)

        # Values
        val1 = data[distances_in_range[0], 2]
        val2 = data[distances_in_range[1], 2]

        # Weighted mean of values
        weighted_mean = ((weight1 * val1) + (weight2 * val2)) / weights_sum

        sem = weights * (data[distances_in_range[0], 2] -
                         data[distances_in_range[1], 2])**2
        sem_value = (np.sum(sem) -
                     np.sum(weighted_mean)) / (2 * np.sum(weights_sum))
        smv.append([sem_value, len(sem)])
        if smv[idx][0] > 0:
            semivariance.append([h, smv[idx][0], smv[idx][1]])
        else:
            semivariance.append([h, 0, 0])

    semivariance = np.vstack(semivariance)
    return semivariance