Exemplo n.º 1
0
    def test_auto_distancing_scrambled(self):

        distance_matrix = genome_tools.RegionSet(self.regions1A, self.scrambled_genome)\
            .map_intersects(genome_tools.RegionSet(self.regions1B, self.scrambled_genome), 
            lambda x,y : x.get_genomic_distance(y), slop_distance=75)

        self.assertTrue(
            np.all(np.array(distance_matrix.todense()).astype(int) == self.auto_distancing_truth)
        )
Exemplo n.º 2
0
    def test_genome_bin_mapping(self):

        m2m_map = genome_tools.RegionSet(self.regions1A, self.genome)\
            .map_genomic_windows(min_window_overlap_proportion=0.0, regions_to_bins=False)

        self.assertTrue(
            np.all(m2m_map == self.m2m_map_truth)
        )
Exemplo n.º 3
0
    def load_genes(self):
        self.log.append('Loading gene info ...')
        self.genes = gene_selection.GeneSet.from_refseq(self._config.get('genome','genes')\
            .format(package_path = PACKAGE_PATH, species = self.species), self.genome)

        self.gene_loc_set = genome_tools.RegionSet(
            [gene.get_tss_region() for gene in self.genes], self.genome)

        self.rp_map_locs = np.array(
            [r.annotation.get_location() for r in self.gene_loc_set.regions])
Exemplo n.º 4
0
    def build_binned_rp_map(self, style, rp_decay):

        region_set = genome_tools.RegionSet(list(self.genome.list_windows()),
                                            self.genome)

        if style == 'basic':
            return self._make_basic_rp_map(self.gene_loc_set, region_set,
                                           rp_decay)
        elif style == 'enhanced':
            return self._make_enhanced_rp_map(self.gene_loc_set, region_set,
                                              rp_decay)
        else:
            NotImplementedError()
Exemplo n.º 5
0
    def _make_enhanced_rp_map(self, gene_loc_set, region_set, decay):

        #make regions x exons map and exons x genes map
        try:
            indptr, indices, exons = [0], [], []
            for locus in gene_loc_set.regions:
                new_exons = locus.annotation.get_exon_regions()
                exons.extend(new_exons)
                indices.extend(range(indptr[-1], indptr[-1] + len(new_exons)))
                indptr.append(indptr[-1] + len(new_exons))

            exon_gene_map = sparse.csc_matrix(
                (np.ones(len(exons)), indices, indptr),
                shape=(len(exons), len(gene_loc_set.regions)))

            exons = genome_tools.RegionSet(exons, self.genome)
            region_exon_map = region_set.map_intersects(
                exons,
                distance_function=lambda x, y: x.overlaps(
                    y, min_overlap_proportion=0.4),
                slop_distance=0)  #REGIONS X EXONS

            region_exon_map = region_exon_map.dot(exon_gene_map).astype(
                np.bool)

            not_exon_promoter = 1 - region_exon_map.sum(axis=1).astype(np.bool)

            basic_rp_map = self._make_basic_rp_map(gene_loc_set, region_set,
                                                   decay)

            enhanced_rp_map = basic_rp_map.transpose().multiply(
                not_exon_promoter) + region_exon_map

            return enhanced_rp_map.transpose()

        except Exception as err:
            print(repr(err))
            return region_exon_map, exon_gene_map
Exemplo n.º 6
0
    def __init__(self,
                 species,
                 regions,
                 rp_map='enhanced',
                 rp_decay=10000,
                 isd_method='chipseq',
                 verbose=4,
                 log=None):
        '''
*class*
**lisa.FromRegions** (species, regions, rp_map = 'enhanced', rp_decay = 10000, isd_method = 'chipseq', verbose = 4, log = None)**

    Initialize the LISA test using user-defined regions.

    Parameters:
        species: {'hg38', 'mm10'}

        regions (list of lists/tuples with format [('chr', start, end), ... ]):
            User-defined regions. 
        rp_map ({"basic", "enhanced"}, scipy.sparse_matrix):
            RP map type, currently supports "basic" and "enhanced". User may also pass their own RP map as scipy.sparse_matrix in the shape (genes x regions)
        rp_decay (float, int):
            Decay rate of region influence on gene based on distance from TSS. Increase to prioritize distal regions, decrease to prioritize promoters. Default of 10000 bp is balanced.
        isd_method {"chipseq", "motifs"}:
            Use ChIP-seq data or motifs to mark TF binding locations.
        verbose (int):
            Number of levels of log messages to print to stderr
    
    Returns:
        lisa object
        '''

        super().__init__(species,
                         _config,
                         100,
                         isd_method=isd_method,
                         verbose=verbose,
                         log=log)

        if isinstance(rp_map, str):
            rp_map_styles = self._config.get('lisa_params',
                                             'rp_map_styles').split(',')
            assert (
                rp_map in rp_map_styles
            ), 'RP map must be numpy/scipy.sparse array, or be one of provided maps: {}'.format(
                ','.join(rp_map_styles))
        else:
            assert (
                isinstance(rp_map, np.ndarry)
                or isinstance(rp_map, scipy.sparse)
            ), 'RP map must be either numpy ndarry or scipy.sparse matrix'
        self.rp_map = rp_map

        #self.genome = genome_tools.Genome.from_file(self._config.get('paths','genomes').format(package_path = PACKAGE_PATH, species = self.species), window_size=100)

        assert (
            isinstance(regions, (list, tuple))
        ), '"regions" parameter must be list of region tuples in format [ (chr,start,end [,score]), (chr,start,end [,score]) ... ] or name of bed file.'

        self.log.append('Validation user-provided regions ...')

        self.num_regions_supplied = len(regions)

        regions = self._check_region_specification(regions)

        self.region_set = genome_tools.RegionSet(regions,
                                                 self.data_interface.genome)
        self.region_score_map = np.array(
            [r.annotation for r in self.region_set.regions])

        assert (isinstance(rp_decay, (int, float)) and
                rp_decay > 0), 'RP decay parameter must be positive int/float'
        self.rp_decay = rp_decay

        assert (
            len(regions) >= 1000 and len(regions) < 1000000
        ), 'User must provide atleast 1000 reigons, and less than 1 million.'