Python RipleysKEstimator示例，astropy.stats.RipleysKEstimator Python示例

示例#1

0

显示文件

def ripley_ci(locs_path, num_simulations=100, max_dist=200, method='ripley'):
    """
    Use Monte Carlo to estimate the confidence interval
    """
    with ClustersHDFStore(locs_path) as ct:
        _, xy = ct.get_points_for_clustering()
        n_locs = xy.shape[0]
        x_min = np.min(xy[:, 0])
        x_max = np.max(xy[:, 0])
        y_min = np.min(xy[:, 1])
        y_max = np.min(xy[:, 1])
        area = (x_max - x_min) * (y_max - y_min)
        box = [x_min, x_max, y_min, y_max]
        dist_scale = np.linspace(0, max_dist, 100)
        lrand = np.zeros((dist_scale.shape[0], num_simulations))
        kest = RipleysKEstimator(area,
                                 x_min=box[0],
                                 x_max=box[1],
                                 y_min=box[2],
                                 y_max=box[3])
        for s in range(num_simulations):
            rand_datax = np.random.uniform(box[0], box[1], n_locs)
            rand_datay = np.random.uniform(box[2], box[3], n_locs)
            rand_xy = np.stack((rand_datax.T, rand_datay.T), axis=-1)
            lrand[:, s] = kest.Hfunction(rand_xy, dist_scale, mode=method)

        meanl = np.mean(lrand, axis=1)
        stdl = np.std(lrand, axis=1)
        ci_plus = meanl + 2 * stdl
        ci_minus = meanl - 2 * stdl

    return (meanl, ci_plus, ci_minus)

示例#2

0

显示文件

文件： OptimalProjection.py 项目： jairoruizsaenz/scattertext

def ripley_poisson_difference(points):
    try:
        from astropy.stats import RipleysKEstimator
    except:
        raise Exception("Please install astropy")

    r = np.linspace(0, np.sqrt(2), 100)
    ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)

    return np.sum(np.abs(ripley(points, r, mode='ripley') - ripley.poisson(r)))

示例#3

0

显示文件

文件： CategoryProjectorEvaluator.py 项目： JasonKessler/scattertext

    def evaluate(self, category_projection):
        assert type(category_projection) == CategoryProjection
        try:
            from astropy.stats import RipleysKEstimator
        except:
            raise Exception("Please install astropy")

        ripley_estimator = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)
        proj = category_projection.projection[:, [category_projection.x_dim, category_projection.y_dim]]
        scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T
        radii = np.linspace(0, self.max_distance, 1000)
        deviances = np.abs(ripley_estimator(scaled_proj, radii, mode='ripley') - ripley_estimator.poisson(radii))
        return np.trapz(deviances, x=radii)

示例#4

0

显示文件

文件： OptimalProjection.py 项目： yangyang0477/scattertext

def get_optimal_category_projection(
        corpus,
        n_dims=3,
        n_steps=10,
        projector=lambda n_terms, n_dims: CategoryProjector(
            AssociationCompactor(n_terms, scorer=RankDifference),
            projector=PCA(n_dims)),
        verbose=False):
    try:
        from astropy.stats import RipleysKEstimator
    except:
        raise Exception("Please install astropy")

    ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)
    min_dev = None
    best_k = None
    best_x = None
    best_y = None
    best_projector = None
    for k in np.power(
            2,
            np.linspace(
                np.log(corpus.get_num_categories()) / np.log(2),
                np.log(corpus.get_num_terms()) / np.log(2),
                n_steps)).astype(int):
        r = np.linspace(0, np.sqrt(2), 100)
        category_projector = projector(k, n_dims)
        category_projection = category_projector.project(corpus)
        for dim_1 in range(0, n_dims):
            for dim_2 in range(dim_1 + 1, n_dims):
                proj = category_projection.projection[:, [dim_1, dim_2]]
                scaled_proj = np.array(
                    [stretch_0_to_1(proj.T[0]),
                     stretch_0_to_1(proj.T[1])]).T
                dev = np.sum(
                    np.abs(
                        ripley(scaled_proj, r, mode='ripley') -
                        ripley.poisson(r)))
                if min_dev is None or dev < min_dev:
                    min_dev = dev
                    best_k = k
                    best_projector = category_projector
                    best_x, best_y = (dim_1, dim_2)
                if verbose:
                    print(k, dim_1, dim_2, dev, best_k, best_x, best_y,
                          min_dev)
    if verbose:
        print(best_k, best_x, best_y)
    return best_projector.project(corpus, best_x, best_y)

示例#5

0

显示文件

文件： stationsim_validation_with_statmodels.py 项目： Guonalll/dust

    def ripleysKE(self, data):
        """
        """

        width = self.width
        height = self.height

        area = width * height
        rke = RipleysKEstimator(area=width * height,
                                x_max=width,
                                y_max=height,
                                y_min=0,
                                x_min=0)

        r = np.linspace(0, np.sqrt(area / 2), 10)
        rkes = []
        for i, item in enumerate(data):

            #plt.plot(r, rke.poisson(r))
            rkes.append(rke(item, radii=r, mode='none'))
            #plt.plot(r, rke(data, radii=r, mode='translation'))
            #plt.plot(r, rke(data, radii=r, mode='ohser'))
            #plt.plot(r, rke(data, radii=r, mode='var-width'))
            #plt.plot(r, rke(data, radii=r, mode='ripley'))
            print("\r" + str(
                (i + 1) / len(data) * 100) + "% complete                    ",
                  end="")
        print("")
        return rkes, r

示例#6

0

显示文件

文件： OptimalProjection.py 项目： yangyang0477/scattertext

def get_optimal_category_projection_by_rank(
        corpus,
        n_dims=2,
        n_steps=20,
        projector=lambda rank, n_dims: CategoryProjector(
            AssociationCompactorByRank(rank), projector=PCA(n_dims)),
        verbose=False):
    try:
        from astropy.stats import RipleysKEstimator
    except:
        raise Exception("Please install astropy")

    ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)
    min_dev = None
    best_rank = None
    best_x = None
    best_y = None
    best_projector = None
    for rank in np.linspace(1,
                            TermCategoryRanker().get_max_rank(corpus),
                            n_steps):

        r = np.linspace(0, np.sqrt(2), 100)
        category_projector = projector(rank, n_dims)
        category_projection = category_projector.project(corpus)
        for dim_1 in range(0, n_dims):
            for dim_2 in range(dim_1 + 1, n_dims):
                proj = category_projection.projection[:, [dim_1, dim_2]]
                scaled_proj = np.array(
                    [stretch_0_to_1(proj.T[0]),
                     stretch_0_to_1(proj.T[1])]).T
                dev = np.sum(
                    np.abs(
                        ripley(scaled_proj, r, mode='ripley') -
                        ripley.poisson(r)))
                if min_dev is None or dev < min_dev:
                    min_dev = dev
                    best_rank = rank
                    best_projector = category_projector
                    best_x, best_y = (dim_1, dim_2)
                if verbose:
                    print('rank', rank, 'dims', dim_1, dim_2, 'K', dev)
                    print('     best rank', best_rank, 'dims', best_x, best_y,
                          'K', min_dev)
    if verbose:
        print(best_rank, best_x, best_y)
    return best_projector.project(corpus, best_x, best_y)

示例#7

0

显示文件

def ripley_function(locs_path, max_dist=200, method='ripley'):
    """
    Wrapper around astropy RipleyKEstimator class
    """
    with ClustersHDFStore(locs_path) as ct:
        _, xy = ct.get_points_for_clustering()
        x_min = np.min(xy[:, 0])
        x_max = np.max(xy[:, 0])
        y_min = np.min(xy[:, 1])
        y_max = np.min(xy[:, 1])
        area = (x_max - x_min) * (y_max - y_min)
        kest = RipleysKEstimator(area,
                                 x_min=x_min,
                                 x_max=x_max,
                                 y_min=y_min,
                                 y_max=y_max)
        radii = np.linspace(0, max_dist, 100)
        rip = kest.Hfunction(xy, radii, mode=method)
    return rip

示例#8

0

显示文件

文件： OptimalProjection.py 项目： JasonKessler/scattertext

def get_optimal_category_projection_by_rank(
        corpus,
        n_dims=2,
        n_steps=20,
        projector=lambda rank, n_dims: CategoryProjector(AssociationCompactorByRank(rank),
                                                         projector=PCA(n_dims)),
        verbose=False
):
    try:
        from astropy.stats import RipleysKEstimator
    except:
        raise Exception("Please install astropy")

    ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)
    min_dev = None
    best_rank = None
    best_x = None
    best_y = None
    best_projector = None
    for rank in np.linspace(1, TermCategoryRanker().get_max_rank(corpus), n_steps):

        r = np.linspace(0, np.sqrt(2), 100)
        category_projector = projector(rank, n_dims)
        category_projection = category_projector.project(corpus)
        for dim_1 in range(0, n_dims):
            for dim_2 in range(dim_1 + 1, n_dims):
                proj = category_projection.projection[:, [dim_1, dim_2]]
                scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T
                dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r)))
                if min_dev is None or dev < min_dev:
                    min_dev = dev
                    best_rank = rank
                    best_projector = category_projector
                    best_x, best_y = (dim_1, dim_2)
                if verbose:
                    print('rank', rank, 'dims', dim_1, dim_2, 'K', dev)
                    print('     best rank', best_rank, 'dims', best_x, best_y, 'K', min_dev)
    if verbose:
        print(best_rank, best_x, best_y)
    return best_projector.project(corpus, best_x, best_y)

示例#9

0

显示文件

文件： OptimalProjection.py 项目： JasonKessler/scattertext

def get_optimal_category_projection(
        corpus,
        n_dims=3,
        n_steps=10,
        projector=lambda n_terms, n_dims: CategoryProjector(AssociationCompactor(n_terms, scorer=RankDifference),
                                                            projector=PCA(n_dims)),
        verbose=False
):
    try:
        from astropy.stats import RipleysKEstimator
    except:
        raise Exception("Please install astropy")

    ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)
    min_dev = None
    best_k = None
    best_x = None
    best_y = None
    best_projector = None
    for k in np.power(2, np.linspace(np.log(corpus.get_num_categories()) / np.log(2),
                                     np.log(corpus.get_num_terms()) / np.log(2), n_steps)).astype(int):
        r = np.linspace(0, np.sqrt(2), 100)
        category_projector = projector(k, n_dims)
        category_projection = category_projector.project(corpus)
        for dim_1 in range(0, n_dims):
            for dim_2 in range(dim_1 + 1, n_dims):
                proj = category_projection.projection[:, [dim_1, dim_2]]
                scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T
                dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r)))
                if min_dev is None or dev < min_dev:
                    min_dev = dev
                    best_k = k
                    best_projector = category_projector
                    best_x, best_y = (dim_1, dim_2)
                if verbose:
                    print(k, dim_1, dim_2, dev, best_k, best_x, best_y, min_dev)
    if verbose:
        print(best_k, best_x, best_y)
    return best_projector.project(corpus, best_x, best_y)

示例#10

0

显示文件

文件： CategoryProjectorEvaluator.py 项目： pdkyll/scattertext

    def evaluate(self, category_projection):
        assert type(category_projection) == CategoryProjection
        try:
            from astropy.stats import RipleysKEstimator
        except:
            raise Exception("Please install astropy")

        ripley_estimator = RipleysKEstimator(area=1.,
                                             x_max=1.,
                                             y_max=1.,
                                             x_min=0.,
                                             y_min=0.)
        proj = category_projection.projection[:, [
            category_projection.x_dim, category_projection.y_dim
        ]]
        scaled_proj = np.array(
            [stretch_0_to_1(proj.T[0]),
             stretch_0_to_1(proj.T[1])]).T
        radii = np.linspace(0, self.max_distance, 1000)
        deviances = np.abs(
            ripley_estimator(scaled_proj, radii, mode='ripley') -
            ripley_estimator.poisson(radii))
        return np.trapz(deviances, x=radii)

示例#11

0

显示文件

文件： stationsim_validation.py 项目： Urban-Analytics/dust

    def ripleysKE(self, collisions, width, height):
        """ Generate Ripley's K (RK) curve for collisions in StationSim region.
        
        For more info on RK see:
        https://docs.astropy.org/en/stable/stats/ripley.html
        https://wiki.landscapetoolbox.org/doku.php/spatial_analysis_methods:ripley_s_k_and_pair_correlation_function"
        
        
        Parameters
        ------
        
        collisions : list
            list of model `collisions`
            
        width, height : float
            `width` and `height` of stationsim model
        
        Returns 
        ------
        
        rkes, rs : list
            lists of radii `rs` and corresponding Ripley's K values `rkes`
            for a given set of model collisions.
            
        """

        "define area of stationsim."

        "init astropy RKE class with stationsim boundaries/area"

        area = width * height
        rke = RipleysKEstimator(area=area,
                                x_max=width,
                                y_max=height,
                                y_min=0,
                                x_min=0)
        """generate list of radii to assess. We generate 10 between 0 
        and the root of half the total area. More radii would give a higher 
        resolution but increases the computation time.
        
        see https://wiki.landscapetoolbox.org/doku.php/spatial_analysis_methods:
        ripley_s_k_and_pair_correlation_function
        for more details on this"
        """
        r = np.linspace(0, np.sqrt(area / 2), 10)

        "generate the full list of radii for data frame later."
        "just repeats r above for how many models we have"
        rs = [r] * len(collisions)

        "placeholder list for RK estimates"
        rkes = []
        for i, collision in enumerate(collisions):
            """estimate RK curve given model collisions and list of radii
            Note mode arguement here for how to deal with common edge effect problem.
            Choice doesnt seem to have much effect in this case.
            Either none or translation recommended.
            """

            #rkes.append(rke(collisions, radii=r, mode='none'))
            rkes.append(rke(collision, radii=r, mode='translation'))
            #rkes.append(rke(collisions, radii=r, mode='ohser'))
            #rkes.append(rke(collisions, radii=r, mode='var-width'))
            #rkes.append(ke(collisions, radii=r, mode='ripley'))

            "this can take a long time so here's a progess bar"

            print("\r" + str((i + 1) / len(collisions) * 100) +
                  "% complete                  ",
                  end="")

        return rkes, rs

示例#12

0

显示文件

文件： _ppatterns.py 项目： sophial05/squidpy

def ripley_k(
    adata: AnnData,
    cluster_key: str,
    spatial_key: str = Key.obsm.spatial,
    mode: str = "ripley",
    support: int = 100,
    copy: bool = False,
) -> Optional[pd.DataFrame]:
    r"""
    Calculate `Ripley's K <https://en.wikipedia.org/wiki/Spatial_descriptive_statistics#Ripley's_K_and_L_functions>`_
    statistics for each cluster in the tissue coordinates.

    Parameters
    ----------
    %(adata)s
    %(cluster_key)s
    %(spatial_key)s
    mode
        Keyword which indicates the method for edge effects correction.
        See :class:`astropy.stats.RipleysKEstimator` for valid options.
    support
        Number of points where Ripley's K is evaluated between a fixed radii with :math:`min=0`,
        :math:`max=\sqrt{{area \over 2}}`.
    %(copy)s

    Returns
    -------
    If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys:

        - `'ripley_k'` - the Ripley's K statistic.
        - `'distance'` - set of distances where the estimator was evaluated.

    Otherwise, modifies the ``adata`` with the following key:

        - :attr:`anndata.AnnData.uns` ``['{{cluster_key}}_ripley_k']`` - the above mentioned dataframe.
    """  # noqa: D205, D400
    try:
        # from pointpats import ripley, hull
        from astropy.stats import RipleysKEstimator
    except ImportError:
        raise ImportError(
            "Please install `astropy` as `pip install astropy`.") from None

    _assert_spatial_basis(adata, key=spatial_key)
    coord = adata.obsm[spatial_key]

    # set coordinates
    y_min = int(coord[:, 1].min())
    y_max = int(coord[:, 1].max())
    x_min = int(coord[:, 0].min())
    x_max = int(coord[:, 0].max())
    area = int((x_max - x_min) * (y_max - y_min))
    r = np.linspace(0, (area / 2)**0.5, support)

    # set estimator
    Kest = RipleysKEstimator(area=area,
                             x_max=x_max,
                             y_max=y_max,
                             x_min=x_min,
                             y_min=y_min)
    df_lst = []

    # TODO: how long does this take (i.e. does it make sense to measure the elapse time?)
    logg.info("Calculating Ripley's K")
    for c in adata.obs[cluster_key].unique():
        idx = adata.obs[cluster_key].values == c
        coord_sub = coord[idx, :]
        est = Kest(data=coord_sub, radii=r, mode=mode)
        df_est = pd.DataFrame(np.stack([est, r], axis=1))
        df_est.columns = ["ripley_k", "distance"]
        df_est[cluster_key] = c
        df_lst.append(df_est)

    df = pd.concat(df_lst, axis=0)
    # filter by min max dist
    minmax_dist = df.groupby(cluster_key)["ripley_k"].max().min()
    df = df[df.ripley_k < minmax_dist].copy()

    if copy:
        return df

    adata.uns[f"ripley_k_{cluster_key}"] = df
    _save_data(adata, attr="uns", key=Key.uns.ripley_k(cluster_key), data=df)

示例#13

0

显示文件

文件： Spat_Feature.py 项目： husun0822/spat_feature_flare_pred

def spat_feature_gen(flare_type, preceding_hour, seed=1):
    # set random seed
    random.seed(seed)

    filename = flare_type + "flare_data/" + flare_type + "_flare_" + str(
        preceding_hour) + "h.hdf5"
    f = h5py.File(filename, "r")

    du = pd.read_csv("GOES_dataset.csv")
    du.loc[:, 'peak_time'] = pd.to_datetime(du['peak_time'])
    du['intensity'] = du['class'].map(get_intensity)
    timeline = []
    flareclass = []
    intensity = du['intensity'].values

    for index, row in du.iterrows():
        t = np64_datetime(row['peak_time'])
        flaret = datetime.strftime(t, "%Y.%m.%d_%H:%M:%S")
        timeline.append(flaret)
        flareclass.append(row['class'][0])

    allfeature = []
    threshold_list = [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]

    for HARP in list(f.keys()):
        v = f[HARP]
        for frametime in list(v.keys()):
            # print(frametime)
            flare_obj = v[frametime]
            br = np.array(flare_obj['Br'])

            # normalize br
            br[br > 5000] = 5000
            br[br < -5000] = -5000
            # br = (br+5000)/10000

            PIL = np.array(flare_obj['PIL_MASK'])
            width = br.shape[1]
            height = br.shape[0]
            rho = 500 / (width * height)  # sample at most 500 points
            index = timeline.index(frametime[6:])
            flare_intensity = intensity[index]
            fclass = flareclass[index]
            N_feature = 100 + 2  # 100-dim ripley's K feature + 2-dim Variogram feature
            N_coor = []

            for thres in threshold_list:
                # print(thres)
                coordinate = ptsgen(br, PIL, threshold=thres, rho=rho)
                if (type(coordinate) == int):
                    if thres == threshold_list[0]:
                        area = np.int(np.sum(PIL != 0))
                        feature = [
                            flare_intensity, fclass, HARP, frametime, area,
                            np.sum(PIL), width, height
                        ]

                        for SHARP_feature in [
                                'USFLUX', 'MEANGAM', 'MEANGBT', 'MEANGBH',
                                'MEANGBZ', 'MEANJZD', 'TOTUSJZ', 'MEANALP',
                                'TOTUSJH', 'SAVNCPP', 'MEANPOT', 'MEANSHR'
                        ]:
                            feature.append(flare_obj.attrs[SHARP_feature])
                    for _ in range(N_feature):
                        feature.append(-1)
                    N_coor.append(0)
                else:
                    final_coordinate = []
                    z = []

                    for point in coordinate:
                        x = point[0]
                        y = point[1]
                        if (PIL[x, y] != 0):
                            final_coordinate.append([x, y])
                            z.append(br[x, y])
                    N_coor.append(len(final_coordinate))

                    if (np.sum(PIL) == 0 or len(final_coordinate) <= 10):
                        if thres == threshold_list[0]:
                            area = np.int(np.sum(PIL != 0))
                            feature = [
                                flare_intensity, fclass, HARP, frametime, area,
                                np.sum(PIL), width, height
                            ]

                            for SHARP_feature in [
                                    'USFLUX', 'MEANGAM', 'MEANGBT', 'MEANGBH',
                                    'MEANGBZ', 'MEANJZD', 'TOTUSJZ', 'MEANALP',
                                    'TOTUSJH', 'SAVNCPP', 'MEANPOT', 'MEANSHR'
                            ]:
                                feature.append(flare_obj.attrs[SHARP_feature])
                        for _ in range(N_feature):
                            feature.append(-1)
                    else:
                        final_coordinate = np.array(final_coordinate)
                        nonjitter_coordinate = copy.deepcopy(final_coordinate)
                        z = np.array(z)
                        z = z / 5000
                        final_coordinate = final_coordinate + np.random.normal(
                            loc=0, scale=1, size=final_coordinate.shape)
                        area = np.int(np.sum(PIL != 0))
                        pils = np.nonzero(PIL)
                        Kest = RipleysKEstimator(area=area,
                                                 x_max=np.int(max(pils[0])),
                                                 x_min=np.int(min(pils[0])),
                                                 y_max=np.int(max(pils[1])),
                                                 y_min=np.int(min(pils[1])))
                        r = np.linspace(0, 100, 100)
                        res = Kest(data=final_coordinate,
                                   radii=r,
                                   mode='ripley')  # Ripley's K feature

                        if thres == threshold_list[0]:
                            feature = [
                                flare_intensity, fclass, HARP, frametime, area,
                                np.sum(PIL), width, height
                            ]
                            for SHARP_feature in [
                                    'USFLUX', 'MEANGAM', 'MEANGBT', 'MEANGBH',
                                    'MEANGBZ', 'MEANJZD', 'TOTUSJZ', 'MEANALP',
                                    'TOTUSJH', 'SAVNCPP', 'MEANPOT', 'MEANSHR'
                            ]:
                                feature.append(flare_obj.attrs[SHARP_feature])
                        for kval in res:
                            feature.append(kval)

                    # variogram
                        try:
                            V = Variogram(coordinates=nonjitter_coordinate,
                                          values=z,
                                          model="exponential")
                            # res2_dist = V.data(n=50)[0]
                            # res2_var = V.data(n=50)[1]  # 100-dim Variogram feature
                            # for kval in res2_dist:
                            #    feature.append(kval)
                            # for kval in res2_var:
                            #    feature.append(kval)
                            param = V.describe()
                            feature.append(param['effective_range'])
                            feature.append(param['sill'])
                        except RuntimeError:
                            for _ in range(2):
                                feature.append(-1)

            for amt in N_coor:
                feature.append(amt)
            allfeature.append(feature)

    colname = [
        'intensity', 'class', 'HARP', 'Time', 'NPIL', 'areaPIL', 'width',
        'height'
    ]
    for ch in [
            'USFLUX', 'MEANGAM', 'MEANGBT', 'MEANGBH', 'MEANGBZ', 'MEANJZD',
            'TOTUSJZ', 'MEANALP', 'TOTUSJH', 'SAVNCPP', 'MEANPOT', 'MEANSHR'
    ]:
        colname.append("SHARP_" + ch)
    for k in range(len(threshold_list)):
        for i in range(100):
            colname.append("Ripley" + str(k) + "_" + str(i + 1))
        for i in range(2):
            colname.append("Vario" + str(k) + "_" + str(i + 1))
    for k in range(len(threshold_list)):
        colname.append("Npts" + str(k))

    # np.save(flare_type + str(preceding_hour) + str("_spatfeature"), np.array(allfeature))
    result = pd.DataFrame(data=np.array(allfeature), columns=colname)
    result.to_csv("./spat_feature/" + flare_type + str(preceding_hour) + "_" +
                  str(seed) + ".csv")
    f.close()

示例#14

0

显示文件

文件： qcDriver.py 项目： hubmapconsortium/spatial-transcriptomics-pipeline

def getSpatialDensity(spots, imgsize, steps=10, doMonte=False):
    allSpots = {}
    for i in range(imgsize[2]):
        for k in spots.keys():
            r, ch = k
            allSpots[(r, ch, i)] = pd.DataFrame(columns=["x", "y"])

    for k, v in spots.items():
        tempSpots = v.spot_attrs.data[["x", "y", "z"]]
        r, ch = k
        for i in range(imgsize[2]):
            allSpots[(r, ch, i)] = allSpots[(r, ch, i)].append(
                tempSpots[tempSpots["z"] == i])

    print("Sorted all relevant spots")
    # print(allSpots)

    results = {}
    savedKest = None
    for i in allSpots.keys():
        print("looking at " + str(i) + "\nsize: " + str(allSpots[i].shape))
        allSpots[i].drop_duplicates(inplace=True)
        print("dropped dupes, size is now " + str(allSpots[i].shape))
        allSpots[i].drop(columns="z", inplace=True)
        print("removed z column, size is " + str(allSpots[i].shape))

        ymin = 0
        xmin = 0
        ymax = imgsize[1]
        xmax = imgsize[0]
        area = abs(ymax - ymin) * abs(xmax - xmin)
        Kest = RipleysKEstimator(area=area,
                                 x_max=xmax,
                                 y_max=ymax,
                                 x_min=xmin,
                                 y_min=ymin)
        r = np.linspace(0, ((area) / 2)**0.5, steps)

        print("finding Kest")
        data = allSpots[i][["x", "y"]].to_numpy()
        # print(data)
        kvals = Kest(data=data, radii=r, mode="ripley")
        print("found Kest\n")
        # env = monteCarloEnvelope(Kest, r, .95, np.size(allSpots[i]), 100)
        csr = Kest.poisson(r)

        if doMonte:
            if savedKest is not None and savedKest.area != Kest.area:
                print(
                    "Note! Area different between Kest, monte estimation may be wrong.\n{} old, {} new"
                    .format(savedKest.area, Kest.area))
            savedKest = Kest

        del Kest
        gc.collect()

        results[i] = (kvals, csr, r)

    savedSims = {}
    savedSimsSearchable = np.array([])
    numsim = 100
    if doMonte:
        monte = {}
        for k in allSpots.keys():
            simSize = allSpots[k].shape[0]
            searchResults = np.where(
                np.logical_and(savedSimsSearchable >= simSize * 0.95,
                               savedSimsSearchable <= simSize * 1.05))
            if np.shape(searchResults)[1] > 0:
                closest = savedSimsSearchable[searchResults[0][0]]
                print("Similar simulation found, {} hits near size {}".format(
                    np.shape(searchResults)[1], simSize))

                # in the event of multiple matches in this range,
                # go with the closest
                for ind in searchResults[1:]:
                    it = savedSimsSearchable[ind[0]]
                    if abs(it - simSize) < abs(closest - simSize):
                        closest = it
                print("Using sim of size {}".format(closest))
                monte[k] = savedSims[closest]
            else:
                print(
                    "No close simulation saved for {}, running new sim with sample count {}"
                    .format(k, allSpots[k].shape[0]))
                newSim = monteCarloEnvelope(savedKest, r, 0.95, simSize,
                                            numsim)
                monte[k] = newSim
                savedSims[simSize] = newSim
                savedSimsSearchable = np.append(savedSimsSearchable, simSize)

        return results, monte

    return results