示例#1
0
 def _combine_beds(beds, combine_mode, merge=False):
     if combine_mode == "all":
         return BedOperations.intersect(beds, merge=merge).result
     elif combine_mode == "any":
         return BedOperations.union(beds, merge=merge).result
     else: 
         raise InvalidCombineOperationException
示例#2
0
    def to_remove_get_tss_fantom5_bed1(genes, tissues, genome, combine_mode, upstream, downstream, *args):

        BedsProcessor.logger().info("Extracting F5 TSS for genes (%s): %s; tissues: %s; and combine_mode: %s" % (genome, genes, tissues, combine_mode))
                
        flanked_genes = BedsProcessor._get_gene_promoter_sites(genes, genome, upstream, downstream)
        
        beds = BedsProcessor._get_regulatory_regions_bed(genome,
                                                         tissues,
                                                         RegulatoryRegionsFilesRegistry.FANTOM5_PROMOTERS_KEY,
                                                         flanked_genes.sort().merge())
        
        BedsProcessor.log_count("Flanked genes' promoters BED", flanked_genes)
        BedsProcessor.log_bed(flanked_genes)
        BedsProcessor.logger().info("F5 TSS BEDs list:\n%s" % str(beds))
        
        if beds and flanked_genes:
            joined_f5_tss = BedsProcessor._combine_beds(beds, combine_mode)
            BedsProcessor.log_count("Combined F5 TSS BED", joined_f5_tss)
            BedsProcessor.log_bed(joined_f5_tss)
            
            result = BedOperations.intersect([joined_f5_tss, flanked_genes], **{"u": True}).result           
            BedsProcessor.log_count("F5 TSS BED intersected with genes' promoters",result)
            BedsProcessor.log_bed(result)
            
            return [result]
        else:
            BedsProcessor.logger().info("Returning empty TSS list") 
            return []
示例#3
0
    def _generic_get_promoters_bed(source, genes, tissues, genome, combine_mode, upstream, downstream, *args):

        BedsProcessor.logger().info(f"Extracting {source} for genes ({genome}): {genes}; tissues: {tissues}; and combine_mode: {combine_mode}")

        flanked_genes = BedsProcessor._get_gene_promoter_sites(genes, genome, upstream, downstream)

        beds = BedsProcessor._get_regulatory_regions_bed(genome, tissues, source, flanked_genes.sort().merge())

        BedsProcessor.log_count("Flanked genes' promoters BED", flanked_genes)
        BedsProcessor.log_bed(flanked_genes)
        BedsProcessor.logger().info(f"{source} BEDs list:\n{beds}")

        if beds and flanked_genes:
            joined_promoters = BedsProcessor._combine_beds(beds, combine_mode)
            BedsProcessor.log_count(f"Combined {source} BED", joined_promoters)
            BedsProcessor.log_bed(joined_promoters)

            result = BedOperations.intersect([joined_promoters, flanked_genes], **{"u": True}).result
            BedsProcessor.log_count(f"{source} BED intersected with genes' promoters", result)
            BedsProcessor.log_bed(result)

            return [result]
        else:
            BedsProcessor.logger().info("Returning empty promoters list")
            return []
示例#4
0
    def to_remove_get_enhancers_encode_bed(genes, tissues, genome, combine_mode, upstream, downstream, *args):
        
        BedsProcessor.logger().info("Extracting ENCODE enhancers for genes (%s): %s; tissues: %s; and combine_mode: %s" % (genome, genes, tissues, combine_mode))

        flanked_genes = BedsProcessor._get_gene_promoter_sites(genes, genome,
                                                            int(float(upstream) * 1000),
                                                            int(float(downstream) * 1000))
                                                            
        beds = BedsProcessor._get_regulatory_regions_bed(genome, 
                                                         tissues, 
                                                         RegulatoryRegionsFilesRegistry.ENCODE_ENHANCERS_KEY,
                                                         flanked_genes.sort().merge())
        
        BedsProcessor.log_count("Flanked genes' promoters BED", flanked_genes)
        BedsProcessor.log_bed(flanked_genes)
        BedsProcessor.logger().info("ENCODE enhancer BEDs list:\n%s" % str(beds))        

        if beds and flanked_genes:
            joined_enc_enh = BedsProcessor._combine_beds(beds, combine_mode)
            BedsProcessor.log_count("Combined ENCODE enhancers BED", joined_enc_enh)
            BedsProcessor.log_bed(joined_enc_enh)

            result = BedOperations.intersect([joined_enc_enh, flanked_genes], **{"u": True}).result
            BedsProcessor.log_count("ENCODE enhancers BED intersected with genes' promoters", result)
            BedsProcessor.log_bed(result)

            return [ result ]
        else:
            BedsProcessor.logger().info("Returning empty enhancers list") 
            return []
示例#5
0
文件: registry.py 项目: seru71/Remus
    def get_bed_fragment(self, tissue, source_symbol, regions):
        """ 
        Get slice of a BED. Filtering on non-tabixed BED files is not supported.
        If regions is None, entire BED is returned
        """

        self.logger.info('Requested {}tissue [{}] from source [{}]'.format(
            "fragment [%s] from " % regions if regions else "", tissue,
            source_symbol))

        if tissue not in self._available_tissues:
            raise InvalidTissueNameException(
                "Querried tissue [%s] was not among available tissue keys:\n%s"
                % (tissue, str(self._available_tissues.keys())))

        try:
            bed_path = self._available_tissues[tissue][source_symbol]
            track_name = source_symbol + "(" + tissue.split(
                '(')[0].strip().replace(" ", "_") + ")"
            self.logger.info('Found %s. Adding name %s' %
                             (bed_path, track_name))

            full_bed = BedLoader(bed_path)

            if regions is None:
                return BedOperations.add_name(full_bed.bed, track_name)
            else:
                beds = [full_bed.filter_by(i) for i in regions]
                if any(beds):
                    filtered_bed = BedOperations.union([e for e in beds if e],
                                                       merge=False).result
                    return BedOperations.add_name(filtered_bed, track_name)

        except KeyError:
            self.logger.info('No tissue [%s] in source [%s]' %
                             (tissue, source_symbol))

        return None
示例#6
0
    def get_genes_bed(genes, genome, *args):
        
        BedsProcessor.logger().info("Querying gene database for %s" % genes)
        
        registry = GenesDBRegistry.get_instance()
        gene_beds = [ registry.get_bed(genome, genes) ]
        
        BedsProcessor.log_count("Result BED file", gene_beds)
        BedsProcessor.log_bed(gene_beds)

        result = BedOperations.union(gene_beds).result
        
        BedsProcessor.log_count("Union of the BED files", result)
        BedsProcessor.log_bed(result)
        
        return [ result ]
示例#7
0
 def _get_accessible_mirnas(mirna_symbols, tissues, genome, combine_mode):
     mirna_bed = BedsProcessor.get_genes_bed(mirna_symbols, genome)
     
     # mirna_bed is one element list
     if mirna_bed[0] is None:
         return None
         
     # intersect beds with accessible chromatin in tissues
     accessible_chromatin = BedsProcessor._get_regulatory_regions_bed(genome, tissues, 
                                                                      RegulatoryRegionsFilesRegistry.ENCODE_CHROMATIN_KEY,
                                                                      mirna_bed[0].sort().merge())
     if any(accessible_chromatin):
         accessible_chromatin_aggregate = BedsProcessor._combine_beds(accessible_chromatin, combine_mode)
         accessible_mirna = BedOperations.intersect(mirna_bed + [accessible_chromatin_aggregate], merge=False).result        
         return accessible_mirna
         
     return None
示例#8
0
文件: remus.py 项目: seru71/Remus
def perform():
    try:
        start_time = time.time()
        params = get_perform_params()
        collected_beds_map = BedsCollector(params).collect_bed_files()
        collected_beds = [
            bed for beds_list in collected_beds_map.values()
            for bed in beds_list
        ]

        final_result = BedOperations.union(collected_beds, merge=True, **{"c": 4, "o": "distinct"}).result \
            if len(collected_beds) > 1 else collected_beds[0]

        tmp_file_path = save_as_tmp(final_result)
        session["last_result"] = tmp_file_path.name
        end_time = (time.time() - start_time)
        return return_summary(final_result, end_time)
    except Exception as e:
        app.logger.exception("Error occurred, details:")
        return "Error occurred"
示例#9
0
 def _get_gene_promoter_sites(genes, genome, upstream, downstream):
     genes_bed = BedsProcessor.get_genes_bed(genes, genome)[0]
     promoters = BedOperations.get_promoter_region(genes_bed, upstream, downstream)
     return promoters.result