Exemplo n.º 1
0
    def determine_ref_occupancy(self,
                                test_set: pd.Index,
                                block_size=7500,
                                n_jobs=1):
        """ Function to determine the number of test cells occupying each
        reference sphere
        
        :param pd.Index test_set: Pandas index of test set observation names
        """
        if any(~test_set.isin(self.adata.obs_names)):
            raise ValueError(
                'Some of the cells in the test set are not in the AnnData object. '
                'Ensure that all the test cells are in the AnnData object')

        # Test data
        test_data = self.adata[_find_cell_indices(self.adata, test_set), ].X

        # Compute counts in blocks
        counts = np.zeros(len(self.ref_set))
        blocks = np.linspace(0, len(self.ref_set),
                             int(len(self.ref_set) / block_size) + 1).astype(
                                 np.int)
        for b in range(1, len(blocks)):
            test_range = range(blocks[b - 1], blocks[b])
            ref_data = self.adata[self.ref_set[test_range], ].X
            dists = pairwise_distances(ref_data, test_data, n_jobs=n_jobs)
            counts[test_range] = (dists < self.sigmas[test_range,
                                                      np.newaxis]).sum(axis=1)

        return counts
Exemplo n.º 2
0
def find_overlap_in_dataset(data_in: np.ndarray, list_1: pd.Index, list_2: list):
    if not isinstance(list_1, pd.Index):
        raise TypeError("{} is supposed to be of type [pd.Index]. "
                        "Wrong format instead: {}".format(list_1, type(list_1)))
    else:
        overlap = list(set(list_1.tolist()).intersection(list_2))
    found_bool = list_1.isin(overlap)
    return found_bool, data_in.X[:, found_bool]
Exemplo n.º 3
0
def filter_genes(genes: pd.Index, species: str = 'human') -> pd.Index:
    """Remove ribosomal and mitochondrial genes."""
    ribo_genes = get_ribosomal_genes(species)
    mito_genes = get_mitochondrial_genes(species)
    
    excluded_genes = ribo_genes + mito_genes
    sel_genes = genes[~genes.isin(excluded_genes)]
    
    return sel_genes
Exemplo n.º 4
0
 def determine_distance(self, test_set: pd.Index):
     """ Function to determine the fraction of unoccoupied reference 
     spheres in the test set
     
     :param pd.Index test_set: Pandas index of test set observation names
     """
     if any(~test_set.isin(self.adata.obs_names)):
         raise ValueError(
             'Some of the cells in the test set are not in the AnnData object. '
             'Ensure that all the test cells are in the AnnData object')
     counts = self.determine_ref_occupancy(test_set)
     return np.sum(counts == 0) / len(counts), counts
Exemplo n.º 5
0
def replace_multi_index_level(
    df: "classes.BeliefsDataFrame",
    level: str,
    index: pd.Index,
    intersection: bool = False,
) -> "classes.BeliefsDataFrame":
    """Replace one of the index levels of the multi-indexed DataFrame. Returns a new DataFrame object.
    :param df: a BeliefsDataFrame (or just a multi-indexed DataFrame).
    :param level: the name of the index level to replace.
    :param index: the new index.
    :param intersection: policy for replacing the index level.
    If intersection is False then simply replace (note that the new index should have the same length as the old index).
    If intersection is True then add indices not contained in the old index and delete indices not contained in the new
    index. New rows have nan columns values and copies of the first row for other index levels (note that the resulting
    index is usually longer and contains values that were both in the old and new index, i.e. the intersection).
    """
    # Todo: check whether timezone information is copied over correctly

    # Check input
    if intersection is False and len(index) != len(df.index):
        raise ValueError(
            "Cannot simply replace multi-index level with an index of different length than the original. "
            "Use intersection instead?")
    if index.name is None:
        index.name = level

    new_index_values = []
    new_index_names = []
    if intersection is True:
        contained_in_old = index.isin(df.index.get_level_values(level))
        new_index_not_in_old = index[~contained_in_old]
        contained_in_new = df.index.get_level_values(level).isin(index)
        for i in df.index.names:
            if i == level:  # For the index level that should be replaced
                # Copy old values that the new index contains, and add new values that the old index does not contain
                new_index_values.append(
                    df.index.get_level_values(i)[contained_in_new].append(
                        new_index_not_in_old))
                new_index_names.append(index.name)
            else:  # For the other index levels
                # Copy old values that the new index contains, and add the first value to the new rows
                new_row_values = pd.Index([df.index.get_level_values(i)[0]] *
                                          len(new_index_not_in_old))
                new_index_values.append(
                    df.index.get_level_values(i)[contained_in_new].append(
                        new_row_values))
                new_index_names.append(i)
    else:
        for i in df.index.names:
            if i == level:  # For the index level that should be replaced
                # Replace with new index
                new_index_values.append(index)
                new_index_names.append(index.name)
            else:  # For the other index levels
                # Copy all old values
                new_index_values.append(df.index.get_level_values(i))
                new_index_names.append(i)

    # Construct new MultiIndex
    mux = pd.MultiIndex.from_arrays(new_index_values, names=new_index_names)

    df = df.copy(deep=True)
    # Apply new MultiIndex
    if intersection is True:
        # Reindex such that new rows get nan column values
        df = df.reindex(mux)
    else:
        # Replace the index
        df.index = mux
    return df.sort_index()
Exemplo n.º 6
0
 def _get_indexer(self, index: pd.Index) -> np.ndarray:
     return index.isin(self.values)