Exemplo n.º 1
0
def vwap(candles: np.ndarray,
         source_type: str = "hlc3",
         anchor: str = "D",
         sequential: bool = False) -> Union[float, np.ndarray]:
    """
    VWAP

    :param candles: np.ndarray
    :param source_type: str - default: "close"
    :param sequential: bool - default=False

    :return: float | np.ndarray
    """
    warmup_candles_num = get_config('env.data.warmup_candles_num', 240)
    if not sequential and len(candles) > warmup_candles_num:
        candles = candles[-warmup_candles_num:]

    source = get_candle_source(candles, source_type=source_type)

    group_idx = candles[:, 0].astype('datetime64[ms]').astype(
        'datetime64[{}]'.format(anchor)).astype('int')
    vwap = aggregate(group_idx, candles[:, 5] * source, func='cumsum')
    vwap /= aggregate(group_idx, candles[:, 5], func='cumsum')

    if sequential:
        return vwap
    else:
        return None if np.isnan(vwap[-1]) else vwap[-1]
Exemplo n.º 2
0
def vwap(candles: np.ndarray,
         source_type: str = "hlc3",
         anchor: str = "D",
         sequential: bool = False) -> Union[float, np.ndarray]:
    """
    VWAP

    :param candles: np.ndarray
    :param source_type: str - default: "close"
    :param anchor: str - default: "D"
    :param sequential: bool - default: False

    :return: float | np.ndarray
    """
    candles = slice_candles(candles, sequential)

    source = get_candle_source(candles, source_type=source_type)

    group_idx = candles[:, 0].astype('datetime64[ms]').astype(
        f'datetime64[{anchor}]').astype('int')
    vwap_values = aggregate(group_idx, candles[:, 5] * source, func='cumsum')
    vwap_values /= aggregate(group_idx, candles[:, 5], func='cumsum')

    if sequential:
        return vwap_values
    else:
        return None if np.isnan(vwap_values[-1]) else vwap_values[-1]
Exemplo n.º 3
0
def plot_binned_ch(x0, ch, n_bin=9, **kw):
    ix, x = np2.quantilize(x0, n_quantile=n_bin, return_summary=True)
    p = npg.aggregate(ix, ch, func='mean')
    sd = npg.aggregate(ix, ch, func='std')
    n = npg.aggregate(ix, 1, func='sum')
    se = sd / np.sqrt(n)

    h = plt.errorbar(x, p, yerr=se, **kw)

    return h, x, p, se
Exemplo n.º 4
0
def createDatasetWeek(dataset,
                      look_back=60,
                      look_ahead=3,
                      sample_per=60,
                      sample=.2):
    idx, dataX, dataY = [], [], []

    #Static re-sampling index
    ix = np.floor(np.linspace(0, look_back,
                              look_back * sample_per + 1)[0:-1]).astype('int')

    #For each bar, extract re-sampled history and target
    for i in range(dataset.shape[0] - (look_back + look_ahead) * sample_per -
                   1):

        #Print Progress
        if i % 100000 == 0:
            print(i, 'rows processed')

        if np.isnan(dataset[(i + look_back * sample_per) - 1]):
            continue

        #Randomly pick a sample
        if np.random.random() > sample:
            continue

        #Resample price history
        p = dataset[i:(i + look_back * sample_per)]
        h = npg.aggregate(ix, p, 'nanmax', fill_value=np.nan)
        l = npg.aggregate(ix, p, 'nanmin', fill_value=np.nan)
        a = np.array([h, l])
        a = (a - p[-1]) / (a.max() - a.min())
        a = a.transpose()
        a = a.reshape((1, look_back, 2))

        #Determine target result - Skip if no signal at 20s
        try:
            #   Find next non-null value
            st = i + look_back * sample_per + np.where(
                ~np.isnan(dataset[i + look_back * sample_per:][:20]))[0][0]
            #   Find last non-null value until period end
            ed = i + look_back * sample_per + np.where(~np.isnan(
                dataset[i + look_back * sample_per:i + look_back * sample_per +
                        look_ahead * sample_per]))[0][-1]
        except:
            a = np.nan

        if not np.isnan(a).any():
            dataX.append(a[0])
            dataY.append(dataset[st] < dataset[ed])
            idx.append(i)

    return np.array(idx), np.array(dataX), np.array(dataY)
Exemplo n.º 5
0
def strategy():
    if np.isnan(Bprice).any():
        return False

    #Calculate h,l
    ix=np.round((Bepoch[-1]-Bepoch)/60).astype('int')
    if np.diff(npg.aggregate(ix,ix,'max')[[-HistDepth,-1]])[0]!=HistDepth-1:
        print('Incomplete time series')
        return 0
    h=npg.aggregate(-ix+max(ix),Bprice,'max',fill_value=np.nan)[-HistDepth:]
    l=npg.aggregate(-ix+max(ix),Bprice,'min',fill_value=np.nan)[-HistDepth:]
    a=np.array([h[-HistDepth:],l[-HistDepth:]])
    a=(a-Bprice[-1])/(a.max()-a.min())
        
    return (a[:,0]>a[:,1]).all()
Exemplo n.º 6
0
    def geneCount(self, spots):
        '''
        Produces a matrix numCells-by-numGenes where element at position (c,g) keeps the expected
        number of gene g  in cell c.
        :param spots:
        :return:
        '''
        start = time.time()
        nC = self.yx_coords.shape[0] + 1
        nG = spots.gene_panel.shape[0]
        # cell_id = self.cell_id
        # _id = np.append(cell_id, cell_id.max()+1)
        _id = self.ds.index.tolist()
        nN = spots.call.neighbors.shape[1]
        CellGeneCount = np.zeros([nC, nG])

        name = spots.gene_panel.index.values
        ispot = spots.data.gene_id.values
        for n in range(nN - 1):
            c = spots.call.neighbors.loc[:, n].values
            # c = spots.neighboring_cells['id'].sel(neighbor=n).values
            group_idx = np.vstack((c[None, :], ispot[None, :]))
            a = spots.call.cell_prob.loc[:, n]
            accumarray = npg.aggregate(group_idx, a, func="sum", size=(nC, nG))
            CellGeneCount = CellGeneCount + accumarray
        end = time.time()
        print('time in geneCount: ', end - start)
        CellGeneCount = xr.DataArray(CellGeneCount,
                                     coords=[_id, name],
                                     dims=['cell_id', 'gene_name'])
        # self.CellGeneCount = CellGeneCount
        return CellGeneCount
Exemplo n.º 7
0
def group(array: np.ndarray,
          groupby_cols: list,
          compute_functions: list,
          calcs_cols: list,
          display=True,
          length=None) -> np.ndarray:
    """
    Group the array according to a unique mapper of multiple columns (groupby_cols) by doing various calculations (compute_functions)
    over a select columns (calc_cols).

    :param array: np.ndarray, input array to be grouped.  
    :param groupby_cols: list, columns to be used to do the grouping.  
    :param compute_functions: list, columns to be used to specify the different calculations. 
    :param calcs_cols: list, columns over which the computations will be done. 
    :param display: bool, whether or not to display a printed HTML data frame. 
    :param length: int, how many rows of the displayed HTML table to print. 
    :return group_array: np.ndarray, grouped array. 
    """

    args_dict = {}
    for a in calcs_cols:
        for f in compute_functions:
            args_dict[a + "_" + f] = npg.aggregate(
                np.unique(array[groupby_cols], return_inverse=True)[1],
                array[a], f)

    struct_gb = rfn.unstructured_to_structured(np.c_[list(
        args_dict.values())].T,
                                               names=list(args_dict.keys()))
    grouped = np.unique(array[groupby_cols], return_inverse=True)[0]
    group_array = rfn.merge_arrays([grouped, struct_gb], flatten=True)
    if display:
        table(group_array, length)
    return group_array
Exemplo n.º 8
0
def actionLikelihoods(data,
                      policies,
                      aggregateStates=True,
                      logIn=False,
                      logOut=False):
    """
    Computes the action likelihoods of a demonstration data set for a given set of stochastic policies.

    :param data: [D x 2] array containing D state-action pairs
    :param policies: [S x A x P] array containing P stochastic policies
    :param aggregateStates: flag to indicate if the likelihoods should be computed per demonstration pair or if they
            should be aggregated per state
    :param logIn: flag to indicate if the policies are provided in the log domain
    :param logOut: flag to indicate if the result should be return in the log domain

    :return: [D x P] or [S x P] array (depending on the aggregateStates flag) containing the action likelihoods
    """
    # transform policies to log domain
    logGoalPolicies = policies if logIn else np.log(policies)

    # evaluate likelihoods for each demonstration pair
    L = logGoalPolicies[data[:, 0], data[:, 1], :]

    # if desired, aggregate all action likelihoods per state
    if aggregateStates:
        L = aggregate(data[:, 0], L, axis=0, size=policies.shape[0])

    # convert back to linear domain
    if not logOut:
        L = np.exp(L)

    return L
Exemplo n.º 9
0
    def geneCount_upd(self):
        """
        Produces a matrix numCells-by-numGenes where element at position (c,g) keeps the expected
        counts of gene g  in cell c.
        """
        # make an array nS-by-nN and fill it with the spots id
        gene_ids = np.tile(self.spots.gene_id, (self.nN, 1)).T

        # flatten it
        gene_ids = gene_ids.ravel()

        # make corresponding arrays for cell_id and probs
        cell_ids = self.spots.parent_cell_id.ravel()
        probs = self.spots.parent_cell_prob.ravel()

        # make the array to be used as index in the group-by operation
        group_idx = np.vstack((cell_ids, gene_ids))

        # For each cell aggregate the number of spots from the same gene.
        # It will produce an array of size nC-by-nG where the entry at (c,g)
        # is the gene counts of gene g within cell c
        N_cg = npg.aggregate(group_idx, probs, size=(self.nC, self.nG))

        # assert N_cg.sum() == self.spots.data.shape[0], \
        #     "The sum of the background spots and the cell gene counts should be equal to the total number of spots"

        # make output. This part needs to be rewritten
        out = np.zeros([self.nC, self.nG])
        out[1:, :] = N_cg[1:, :]

        # cell at position zero is the background
        self.cells.background_counts = N_cg[0, :]
        # Actual cells are on non-zero positions
        self.cells.geneCount = out
Exemplo n.º 10
0
def plot_ch_vs_coh_by_dur(ch, coh, dur):
    cohs, i_coh = np.unique(coh, return_inverse=True)
    durs, i_dur = np.unique(dur, return_inverse=True)
    ch_by_coh_dur = npg.aggregate(np.stack([i_dur, i_coh]),
                                  ch.astype(np.double), 'mean')
    return plt2.plotmulti(cohs, ch_by_coh_dur,
                          cmap='coolwarm'), ch_by_coh_dur, cohs, durs
Exemplo n.º 11
0
def _binned_agg(
    array: np.ndarray,
    indices: np.ndarray,
    num_bins: int,
    *,
    func,
    fill_value,
    dtype,
) -> np.ndarray:
    """NumPy helper function for aggregating over bins."""

    try:
        import numpy_groupies
    except ImportError:
        raise ImportError(
            "This function requires the `numpy_groupies` package to be installed. Please install it with pip or conda."
        )

    mask = np.logical_not(np.isnan(indices))
    int_indices = indices[mask].astype(int)
    shape = array.shape[:-indices.ndim] + (num_bins, )
    result = numpy_groupies.aggregate(
        int_indices,
        array[..., mask],
        func=func,
        size=num_bins,
        fill_value=fill_value,
        dtype=dtype,
        axis=-1,
    )
    return result
Exemplo n.º 12
0
    def hist_ch_rt(
            self, ch, rt,
            to_plot=True,
            normalize='density',
    ):
        """

        @param ch: ch[trial]
        @param rt: rt[trial]
        @param n_ch:
        @param nt:
        @return:
        """
        n = npg.aggregate(
            np2.cat([ch, np.round(rt / self.dt).astype('long')]),
            1., 'sum', [self.n_ch, self.nt])

        if normalize == 'density':
            n = n / np.sum(n) / self.dt
        elif normalize == 'None':
            pass
        else:
            raise ValueError('Unsupported normalize=%s' % normalize)

        if to_plot:
            h = plt.plot(self.t, n.T)
        else:
            h = None
        return n, h
Exemplo n.º 13
0
def Overlapc(frames, Nx, Ny, mapid):  #check
    # overlap frames onto an image
    time0 = timer()
    accum = np.reshape(numpy_groupies.aggregate(mapid.ravel(), frames.ravel()),
                       (Ny, Nx))
    timers['Overlap'] += timer() - time0
    return accum
Exemplo n.º 14
0
def aggregate_rt_ch(
    cond1,
    rt1,
    ch1,
    n_cond,
    nt=consts.NT,
    n_ch=consts.N_CH,
):
    """
    @param rt_frame: [trial]
    @type rt_frame: torch.LongTensor
    @param ch: [trial]
    @type ch: torch.LongTensor
    @param cond: [trial]
    @type cond: torch.LongTensor
    @param nt: in frames
    @type nt: int
    @return: p_rt_ch[cond, rt_frame, ch]
    @rtype: torch.FloatTensor
    """
    # Use torch.index_add(dim, index, tensor) along with ravel()
    # see https://pytorch.org/docs/stable/tensors.html#torch.Tensor.index_add
    return torch.tensor(
        npg.aggregate(np.stack([cond1, rt1, ch1]), 1., 'sum',
                      [n_cond, nt, n_ch]))
Exemplo n.º 15
0
def pseudobulk_from_label(ds, agg_labels, norm_total=10000):
    label_marker_counts = npg.aggregate(agg_labels.encoded,
                                        ds.vals[:, agg_labels.is_labelled].A,
                                        func='sum',
                                        axis=1)
    label_total_counts = npg.aggregate(
        agg_labels.encoded,
        ds.vals[:, agg_labels.is_labelled].sum(0).A.ravel(),
        func='sum')

    label_norm_counts = ((label_marker_counts / label_total_counts) *
                         norm_total).T
    label_norm_counts = pd.DataFrame(label_norm_counts,
                                     columns=ds.ra.Gene,
                                     index=agg_labels.le.classes_)
    return label_norm_counts
Exemplo n.º 16
0
def proj_to_grid(
    points,
    xoff,
    yoff,
    xresolution,
    yresolution,
    xsize,
    ysize,
    fill_small_holes,
):
    row = np.floor((yoff - points[:, 1]) / xresolution).astype(dtype=np.int)
    col = np.floor((points[:, 0] - xoff) / yresolution).astype(dtype=np.int)
    points_group_idx = row * xsize + col
    points_val = points[:, 2]

    # remove points that lie out of the dsm boundary
    mask = ((row >= 0) * (col >= 0) * (row < ysize) * (col < xsize)) > 0
    points_group_idx = points_group_idx[mask]
    points_val = points_val[mask]

    # create a place holder for all pixels in the dsm
    group_idx = np.arange(xsize * ysize).astype(dtype=np.int)
    group_val = np.empty(xsize * ysize)
    group_val.fill(np.nan)

    # concatenate place holders with the real valuies, then aggregate
    group_idx = np.concatenate((group_idx, points_group_idx))
    group_val = np.concatenate((group_val, points_val))

    dsm = npg.aggregate(group_idx, group_val, func="nanmax", fill_value=np.nan)
    dsm = dsm.reshape((ysize, xsize))

    ###########################################################################
    # try to fill very small holes
    if fill_small_holes:
        dsm_new = dsm.copy()
        nan_places = np.argwhere(np.isnan(dsm_new))
        for i in range(nan_places.shape[0]):
            row = nan_places[i, 0]
            col = nan_places[i, 1]
            neighbors = []
            for j in range(row - 1, row + 2):
                for k in range(col - 1, col + 2):
                    if (
                        j >= 0
                        and j < dsm_new.shape[0]
                        and k >= 0
                        and k < dsm_new.shape[1]
                    ):
                        val = dsm_new[j, k]
                        if not np.isnan(val):
                            neighbors.append(val)

            if neighbors:
                dsm[row, col] = np.median(neighbors)
    ###########################################################################

    return dsm
Exemplo n.º 17
0
def compute_max_yval_boundary_rc(means, val=0.9):
    #rows,cols = np.where(np.isclose(means,val))
    rows, cols = np.where(means > val)
    order = np.argsort(cols)
    cols = cols[order]
    rows = rows[order]
    rows = npg.aggregate(cols, rows, func='max')
    cols = np.unique(cols)
    return rows, cols
Exemplo n.º 18
0
    def pseudobulk_counts(self, vals, level=10000, gene_names=None):

        label_marker_counts = npg.aggregate(self.encoded,
                    vals[:, :].A,
                    func='sum', axis=1)
        label_total_counts = npg.aggregate(self.encoded,
                     vals.sum(0).A.ravel(),
                     func='sum')
        label_norm_counts = ((label_marker_counts/label_total_counts) * level).T

        original_grp_names = self.le.inverse_transform(np.arange(label_norm_counts.shape[0]))

        label_norm_counts = pd.DataFrame(label_norm_counts,
            index = original_grp_names, 
            columns = gene_names,
            )


        return label_norm_counts
Exemplo n.º 19
0
def strategy():
    global TtradesConf

    #Only trade if profitable
    if (Amount - Bask[-1]) / Bask[-1] < .87:
        return 0

    #Only trade if the previous contract is over
    if Bepoch[-1] - Tepoch[-1] < MinTradePer:
        return 0

    #Check if no missing history point
    if np.isnan(Bprice).any():
        return False

    #Calculate h,l
    ix = np.round((Bepoch[-1] - Bepoch) / 60).astype('int')

    #   Check if we have collected enough history
    if max(ix) - min(ix) < HistDepth - 1:
        return 0

    #Check if we have a value for all bars
    if np.diff(npg.aggregate(ix, ix, 'max')[[-HistDepth, -1
                                             ]])[0] != HistDepth - 1:
        print(Bepoch[-1], ': Incomplete time series')
        return 0

    h = npg.aggregate(-ix + max(ix), Bprice, 'max',
                      fill_value=np.nan)[-HistDepth:]
    l = npg.aggregate(-ix + max(ix), Bprice, 'min',
                      fill_value=np.nan)[-HistDepth:]
    a = np.array([h[-HistDepth:], l[-HistDepth:]])
    a = (a - Bprice[-1]) / (a.max() - a.min())

    a = a.reshape((1, HistDepth, 2))
    y = model.predict_on_batch([a])[0]

    if y > .535:
        TtradesConf.append(y[0])
        print(Bepoch[-1], ':Order. Conf=', y)
    return y > .535
Exemplo n.º 20
0
    def trajs2dems(self, states, actions):
        """
        Converts state and action trajectories into a demonstration data set.

        :param states: [M x N] array containing M state trajectories of length N
        :param actions: [M x N] array containing M action trajectories of length N
        :return: [S x A] array representing S histograms over actions observed at the different states of the MDP
        """
        return aggregate([states.ravel(), actions.ravel()],
                         1,
                         size=(self.nStates, self.nActions))
Exemplo n.º 21
0
def det2stoch(x, nCats):
    """
    Converts a collection of deterministic category assignments into a stochastic representation with all mass placed at
    the indicated categories.

    :param x: 1d array of integers
    :param nCats: integer indicating total number of categories available (must be greater than maximum value in x)
    :return: [L x nCats] array providing the stochastic representation, where L is the length of x
    """
    l = len(x)
    return aggregate(np.vstack((range(l), x)), 1, size=(l, nCats))
Exemplo n.º 22
0
def segmentation_adjacency(segmentation, connectivity=4):
    """Generate an adjacency matrix out of a given segmentation."""

    assert connectivity == 4 or connectivity == 8

    # Get centroids.
    idx = np.indices(segmentation.shape)
    ys = npg.aggregate(segmentation.flatten(), idx[0].flatten(), func='mean')
    xs = npg.aggregate(segmentation.flatten(), idx[1].flatten(), func='mean')
    ys = np.reshape(ys, (-1, 1))
    xs = np.reshape(xs, (-1, 1))
    points = np.concatenate((ys, xs), axis=1)

    # Get mass.
    nums, mass = np.unique(segmentation, return_counts=True)
    n = nums.shape[0]

    # Get adjacency (https://goo.gl/y1xFMq).
    tmp = np.zeros((n, n), np.bool)

    # Get vertically adjacency.
    a, b = segmentation[:-1, :], segmentation[1:, :]
    tmp[a[a != b], b[a != b]] = True

    # Get horizontally adjacency.
    a, b = segmentation[:, :-1], segmentation[:, 1:]
    tmp[a[a != b], b[a != b]] = True

    # Get diagonal adjacency.
    if connectivity == 8:
        a, b = segmentation[:-1, :-1], segmentation[1:, 1:]
        tmp[a[a != b], b[a != b]] = True

        a, b = segmentation[:-1, 1:], segmentation[1:, :-1]
        tmp[a[a != b], b[a != b]] = True

    result = tmp | tmp.T
    result = result.astype(np.uint8)
    adj = sp.coo_matrix(result)

    return adj, points, mass
Exemplo n.º 23
0
	def fit(self, ds: loompy.LoomConnection, plot: str = None) -> np.ndarray:
		"""
		Fit a classifier and use it to determine cluster predictive power

		Args:
			ds		Dataset
			plot	Filename for optional plot

		Returns:
			Matrix of classification probabilities, shape (n_cells, n_labels)
		"""
		logging.info("Feature selection")
		nnz = ds.map([np.count_nonzero], axis=0)[0]
		valid_genes = np.logical_and(nnz > 5, nnz < ds.shape[1] * 0.5).astype("int")
		ds.ra._Valid = valid_genes

		logging.info("Normalization")
		normalizer = cg.Normalizer(False)
		normalizer.fit(ds)

		logging.info("Feature selection")
		(_, enrichment, _) = cg.MarkerSelection(findq=False, labels_attr="Clusters").fit(ds)
		genes = np.zeros_like(ds.ra.Gene, dtype=bool)
		for ix in range(enrichment.shape[1]):
			genes[np.argsort(-enrichment[:, ix])[:25]] = True

		logging.info("PCA projection")
		pca = cg.PCAProjection(genes, max_n_components=50)
		transformed = pca.fit_transform(ds, normalizer)

		le = LabelEncoder().fit(ds.ca.ClusterName)
		self.le = le
		labels = le.transform(ds.ca.ClusterName)

		train_X, test_X, train_Y, test_Y = train_test_split(transformed, labels, test_size=0.2)
		classifier = RandomForestClassifier(max_depth=30)
		classifier.fit(train_X, train_Y)
		self.report = classification_report(test_Y, classifier.predict(test_X), target_names=le.classes_)
		self.proba = classifier.predict_proba(transformed)

		if plot:
			agg = npg.aggregate(labels, self.proba, axis=0, func="mean")
			plt.imshow(agg, cmap="viridis")
			plt.xticks(np.arange(le.classes_.shape[0]), le.classes_, rotation="vertical", fontsize=7)
			plt.yticks(np.arange(le.classes_.shape[0]), le.classes_, rotation="horizontal", fontsize=7)
			plt.xlabel("Predicted cell type")
			plt.ylabel("Observed cell type")
			plt.title("Predictive power of cluster identities")
			cbar = plt.colorbar()
			cbar.set_label('Average classification probability', rotation=90)
			plt.savefig(plot, bbox_inches="tight")

		return self.proba
Exemplo n.º 24
0
def aggregate(subs, val=1., *args, **kwargs):
    """
    :param subs: [dim, element]
    :type subs: torch.LongTensor, (*torch.LongTensor)
    :type size: torch.LongTensor
    """

    if type(subs) is tuple or type(subs) is list:
        subs = np.stack(subs)
        # subs = np.concatenate(npys(*(sub.reshape(1,-1) for sub in subs)), 0)
    elif torch.is_tensor(subs):
        subs = npy(subs)
    return tensor(npg.aggregate(subs, val, *args, **kwargs))
Exemplo n.º 25
0
def extract_features(segmentation, image, form_features=None):
    features = FormFeatureExtraction(segmentation).get_features(form_features)

    group_idx = segmentation.flatten()

    # Prepend mean color to form features.
    if image.shape[2] == 1:
        mean = npg.aggregate(group_idx, image.flatten(), func='mean')
        mean = np.reshape(mean, (-1, 1))
        features = np.concatenate((mean, features), axis=1)
    elif image.shape[2] == 3:
        r = npg.aggregate(group_idx, image[:, :, 0:1].flatten(), func='mean')
        r = np.reshape(r, (-1, 1))
        g = npg.aggregate(group_idx, image[:, :, 1:2].flatten(), func='mean')
        g = np.reshape(g, (-1, 1))
        b = npg.aggregate(group_idx, image[:, :, 2:3].flatten(), func='mean')
        b = np.reshape(b, (-1, 1))
        features = np.concatenate((r, g, b, features), axis=1)
    else:
        raise ValueError

    return features.astype(np.float32)
Exemplo n.º 26
0
def get_coefs(dim, dif_other, dur, ch, cond, t_RDK_dur, correct_only=True):
    """

    :param dim:
    :param dif_other:
    :param dur: [tr]
    :param ch: [tr, dim]
    :param cond: [tr, dim]
    :param t_RDK_dur:
    :param correct_only:
    :return: glmres.params, glmres.bse, glmres, glmmodel
    """
    id_dif = np.empty_like(cond)
    for dim1 in range(consts.N_DIM):
        out = np.unique(np.abs(cond[:, dim1]), return_inverse=True)
        _, id_dif[:, dim1] = out

    odim = consts.N_DIM - 1 - dim
    incl = ((t_RDK_dur == dur) & (np.isin(id_dif[:, odim], dif_other)))
    if correct_only:
        incl = (incl & (np.sign(ch[:, odim] - 0.5) == np.sign(cond[:, odim])))
    ch1 = ch[incl, dim]
    coh1 = cond[incl, dim]

    cohs, id_cohs = np.unique(coh1, return_inverse=True)
    if np.issubdtype(ch1.dtype, np.floating):
        # p_ch=1 is given
        ch11 = np.stack(
            [npg.aggregate(id_cohs, ch1),
             npg.aggregate(id_cohs, 1 - ch1)], -1)
    else:
        ch11 = npg.aggregate(np.vstack((id_cohs, 1 - ch1)), 1)

    glmmodel = sm.GLM(ch11,
                      sm.add_constant(cohs),
                      family=sm.families.Binomial())
    glmres = glmmodel.fit()
    return glmres.params, glmres.bse, glmres, glmmodel
Exemplo n.º 27
0
def expressed_fraction_from_label(ds, agg_labels, frac_of_max=0.01):

    expr_threshold = ds.vals.max(1).A * frac_of_max
    expr_threshold[expr_threshold < 1] = 0

    detected_frac = npg.aggregate(
        agg_labels.encoded,
        ds.vals[:, agg_labels.is_labelled].A > expr_threshold,
        func='mean',
        axis=1)

    detected_frac = pd.DataFrame(detected_frac.T,
                                 columns=ds.ra.Gene,
                                 index=agg_labels.le.classes_)
    return detected_frac
Exemplo n.º 28
0
    def dat2p_dat(
        self, ch_tr_dim: np.ndarray, dur_tr: np.ndarray, ev_tr_dim: np.ndarray
    ) -> (torch.Tensor, torch.Tensor, np.ndarray, np.ndarray, np.ndarray,
          np.ndarray):
        """
        :param ch_tr_dim: [tr, dim]
        :param dur_tr: [tr]
        :param ev_tr_dim: [tr, dim]
        :return: n_cond_dur_ch[cond, dur, ch],
        ev_cond_fr_dim_meanvar[dcond, fr, dim, (mean, var)],
        ev_cond_dim[dcond, dim], dcond_tr[tr],
        durs[dur], ddur_tr[tr]
        """
        nt0 = self.nt0
        dt0 = self.dt0
        n_ch_flat = self.n_ch
        subsample_factor = self.subsample_factor

        nt = int(nt0 // subsample_factor)

        durs, ddur_tr = np.unique(dur_tr, return_inverse=True)
        ddur_tr = ddur_tr.astype(np.int)
        n_dur = len(durs)
        durs = torch.tensor(durs)
        ddur_tr = torch.tensor(ddur_tr, dtype=torch.long)

        ch_tr_flat = consts.ch_by_dim2ch_flat(ch_tr_dim)

        ev_cond_dim, dcond_tr = np.unique(ev_tr_dim,
                                          return_inverse=True,
                                          axis=0)
        n_cond_flat = len(ev_cond_dim)
        ev_cond_fr_dim = torch.tensor(ev_cond_dim)[:, None, :].expand(
            [-1, nt, -1])

        ev_cond_fr_dim_meanvar = torch.stack(
            [ev_cond_fr_dim, torch.zeros_like(ev_cond_fr_dim)], -1)

        n_cond_dur_ch = npt.tensor(
            npg.aggregate(np.stack([dcond_tr,
                                    npy(ddur_tr), ch_tr_flat]), 1., 'sum',
                          [n_cond_flat, n_dur, n_ch_flat]))

        return n_cond_dur_ch, ev_cond_fr_dim_meanvar, ev_cond_dim, dcond_tr, \
            durs, ddur_tr
Exemplo n.º 29
0
def aggregate(xda, idx_or_size, func=np.nanmean, fill_value=np.nan):
    """Aggregates a 2D array using an index array or block size

    Parameters
    ----------
    xda: xarray.DataArray
        the array with the data to aggregate
    idx_or_size: xarray.DataArray, int, or tuple
        either an array with each pixel indexed based on the value in the
        source array or the size in pixels of the desired grid
    func: callable (optional)
        the numpy function used to aggregate each block
    fill_value: int or float (optional)
        the value to use for missing values when cells do not fit
        perfectly into the original array

    Returns
    -------
    numpy.array
        array containing the aggregated values
    """

    if isinstance(idx_or_size, int):
        idx_or_size = (idx_or_size, idx_or_size)

    # Coerce array to a numpy array
    arr = to_numpy_array(xda)

    # Use scipy to split array into grid if block size given
    if isinstance(idx_or_size, (list, tuple)):
        return block_reduce(arr, idx_or_size, func=func, cval=fill_value)

    # Use numpy_groupies to group on an index array
    idx = np.ravel(to_numpy_array(idx_or_size))

    # Use nodata from index if set
    nodata = fill_value
    if isinstance(idx_or_size, xr.DataArray):
        nodata = idx_or_size.rio.nodata

    # Set data to fill_value where nodata in index
    vals = np.ravel(arr)
    vals[idx == nodata] = fill_value

    return npg.aggregate(idx, vals, func=func, fill_value=fill_value)
Exemplo n.º 30
0
def quantilize(v, n_quantile=5, return_summary=False, fallback_to_unique=True):
    """Quantile starting from 0. Array is flattened first."""

    v = np.array(v)

    if fallback_to_unique:
        x, ix = uniquetol(v, return_inverse=True)
    
    if (not fallback_to_unique) or len(x) > n_quantile:
        n = v.size
        ix = np.int32(np.ceil((stats.rankdata(v, method='ordinal') + 0.) \
                              / n * n_quantile) - 1)
    
    if return_summary:
        x = npg.aggregate(ix, v, func='mean')
        return ix, x
    else:   
        return ix
Exemplo n.º 31
0
def gridder(grid, time, lon, lat, depth, data, dt, title='ROMS Observations'):
    """
    Construct an observations set from raw observations by placing them
    onto a grid.

    Parameters
    ----------
    grid : seapy.model.grid or filename string,
        Grid to place the raw observations onto
    time : ndarray,
        Time of the observations. This can be a scalar and all values
        will be assigned to the single time; otherwise, there must be a
        corresponding time to each value in the data.
    lon : ndarray,
        longitude of the observations. This can be a scalar and all values
        will be assigned to the single location; otherwise, there must be a
        corresponding longitude to each value in the data.
    lat : ndarray,
        latitude of the observations. This can be a scalar and all values
        will be assigned to the single location; otherwise, there must be a
        corresponding latitude to each value in the data.
    depth : ndarray or None,
        depth of the observations. If None, then all values are placed on
        the surface; otherwise, must be a corresponding depth for each
        value in the data.
    data : list of named tuples of seapy.roms.obs.raw_data,
        This list is comprised of each set of observation data types that
        are to be gridded together. If there is only one type (e.g.,
        SSH observations, there is only one item). An Argo float would have
        two items in the list (temperature and salinity observations).
        The list is comprised of named tuples of the raw observations
        with the following fields:
            "type" : string (or integer) of the type from
                     seapy.roms.obs.obs_types
             "provenance"  : string (or integer) of the type from
                             seapy.roms.obs.obs_provenance
            "values" : ndarray of actual observed values in units
                       for type
            "error" : ndarray (or None) of individual observational
                      uncertainty (same units of values). If not known,
                      use None
            "min_error" : float of the minimum error that should be
                          prescribed to the observations (typically,
                          the instrument error) in the same units of
                          values.
    dt : float
        The bin size of time for observations to be considered at the
        same time. The units must be the same as the provided time.
    title : string, optional,
        Title to assign the observations structure for output

    Returns
    -------
    obs : seapy.obs class
        Resulting observations from the raw data as placed onto grid.

    Examples
    --------
    A profile of temp and salt observations at a given lat/lon:

    >>> obs = seapy.obs.gridder(grid, times, lon, lat,
            [ seapy.roms.obs.raw_data("TEMP", "CTD_ARGO", temp, None, 0.1),
              seapy.roms.obs.raw_data("SALT", "CTD_ARGO", salt, None, 0.05)],
            dt = 1/24, title="Argo")

    Satellite Data from a number of lat/lons at a single time

    >>> obs = seapy.obs.gridder(grid, time, lon, lat,
            seapy.roms.obs.raw_data("ZETA", "SSH_AVISO", sla, sla_err, 0.05),
            dt = 2/24, title="SSH")

    These will generate new observation structures from the raw data.
    """
    from numpy_groupies import aggregate

    # Make sure the input is of the proper form
    grid = seapy.model.asgrid(grid)
    time = np.atleast_1d(time)
    lon = np.atleast_1d(lon)
    lat = np.atleast_1d(lat)

    # First, before relying on gridding, extract only the data that are
    # encompassed by the grid
    region_list = np.where(np.logical_and.reduce((
        lat >= np.min(grid.lat_rho), lat <= np.max(grid.lat_rho),
        lon >= np.min(grid.lon_rho), lon <= np.max(grid.lon_rho))))
    if not np.any(region_list):
        warn("No observations were located within grid region_list")
        return None
    lat = lat[region_list]
    lon = lon[region_list]

    # Get the appropriate k-dimension depending on whether the data
    # are 2-D or 3-D
    if depth is None:
        # Get the grid locations from the data locations
        subsurface_values = False
        (j, i) = grid.ij((lon, lat))
        depth = np.zeros(i.size)
        k = np.ma.array(np.resize(grid.n, i.size))
    else:
        # Get the grid locations from the data locations
        subsurface_values = True
        depth = np.atleast_1d(depth)[region_list]
        (k, j, i) = grid.ijk((lon, lat, depth))

    # Sub-select only the points that lie on our grid
    valid_list = np.where((~i.mask * ~j.mask * ~k.mask) == True)
    i = i.compressed()
    j = j.compressed()
    k = k[valid_list]
    depth = depth[valid_list]

    # Make sure the times are consistent and in dt-space
    if time.size == 1:
        time = np.resize(time, valid_list[0].size)
    else:
        time = time[region_list][valid_list]
    dtime = np.floor(time / dt)

    # Loop over all time intervals putting everything together. NOTE: The
    # preference is to use aggregate over the time-dimension just as we do
    # in the spatial-dimension; however, this led to crashing.
    ot = list()
    ox = list()
    oy = list()
    oz = list()
    odep = list()
    olon = list()
    olat = list()
    oval = list()
    oerr = list()
    oprov = list()
    otype = list()
    for t in seapy.progressbar.progress(np.unique(dtime)):
        time_list = np.where(dtime == t)
        mtime = np.nanmean(time[time_list])

        for v in data:
            valid_data = np.s_[:]
            if isinstance(v.values, np.ma.core.MaskedArray):
                valid_data = \
                    (v.values[region_list][valid_list][time_list].nonzero())[0]
                if not valid_data.size:
                    continue

            # Put together the indices based on the type of data we have
            if subsurface_values:
                idx = (k[time_list][valid_data],
                       j[time_list][valid_data],
                       i[time_list][valid_data])
            else:
                idx = (j[time_list][valid_data],
                       i[time_list][valid_data])
            indices = np.floor(idx).astype(int)

            # Grid the data onto our grid and compute the mean and variance
            ii = aggregate(indices, i[time_list][valid_data], func='mean')
            jj = aggregate(indices, j[time_list][valid_data], func='mean')
            binned = np.where(ii * jj > 0)
            ii = ii[binned].ravel()
            jj = jj[binned].ravel()
            (latl, lonl) = grid.latlon((ii, jj))
            Nd = ii.size

            # Put the co-located values together
            nvalues = aggregate(indices,
                                v.values[region_list][valid_list][
                                    time_list][valid_data],
                                func='mean')

            # Get their variance
            vari = aggregate(indices,
                             v.values[region_list][valid_list][
                                 time_list][valid_data],
                             func='var')

            # Put together the known observation values
            if v.error is not None:
                errs = aggregate(indices,
                                 v.error[region_list][valid_list][
                                     time_list][valid_data]**2,
                                 func='mean')
                errs = errs[binned].flatten()
            else:
                errs = 0.0

            # Build the depth vectors
            if subsurface_values:
                dd = aggregate(indices, depth[time_list][valid_data],
                               func='mean')
                kk = aggregate(indices, k[time_list][valid_data],
                               func='mean')
                dd = dd[binned].ravel()
                # ROMS counts from 1 for depth layers
                kk = kk[binned].ravel() + 1
            else:
                kk = np.resize(grid.n, Nd)
                dd = np.zeros(ii.shape)

            # Put all of the data from this time into our lists
            ot.append(np.resize(mtime, Nd))
            ox.append(ii)
            oy.append(jj)
            oz.append(kk)
            odep.append(dd)
            olon.append(lonl)
            olat.append(latl)
            oval.append(nvalues[binned].flatten())
            otype.append(np.resize(seapy.roms.obs.astype(v.type), Nd))
            oprov.append(np.resize(
                seapy.roms.obs.asprovenance(v.provenance), Nd))
            oerr.append(np.maximum(v.min_error**2,
                                   np.maximum(vari[binned].flatten(),
                                              errs)))

    # Make sure that we have something relevant
    if not oval:
        return None

    # Put everything together and create an observation class
    return seapy.roms.obs.obs(time=np.hstack(ot).ravel(),
                              x=np.hstack(ox).ravel(),
                              y=np.hstack(oy).ravel(),
                              z=np.hstack(odep).ravel(),
                              lat=np.hstack(olat).ravel(),
                              lon=np.hstack(olon).ravel(),
                              depth=np.hstack(oz).ravel(),
                              value=np.hstack(oval).ravel(),
                              error=np.hstack(oerr).ravel(),
                              type=np.hstack(otype).ravel(),
                              provenance=np.hstack(oprov).ravel(),
                              title=title)