def _combine_beds(beds, combine_mode, merge=False): if combine_mode == "all": return BedOperations.intersect(beds, merge=merge).result elif combine_mode == "any": return BedOperations.union(beds, merge=merge).result else: raise InvalidCombineOperationException
def to_remove_get_tss_fantom5_bed1(genes, tissues, genome, combine_mode, upstream, downstream, *args): BedsProcessor.logger().info("Extracting F5 TSS for genes (%s): %s; tissues: %s; and combine_mode: %s" % (genome, genes, tissues, combine_mode)) flanked_genes = BedsProcessor._get_gene_promoter_sites(genes, genome, upstream, downstream) beds = BedsProcessor._get_regulatory_regions_bed(genome, tissues, RegulatoryRegionsFilesRegistry.FANTOM5_PROMOTERS_KEY, flanked_genes.sort().merge()) BedsProcessor.log_count("Flanked genes' promoters BED", flanked_genes) BedsProcessor.log_bed(flanked_genes) BedsProcessor.logger().info("F5 TSS BEDs list:\n%s" % str(beds)) if beds and flanked_genes: joined_f5_tss = BedsProcessor._combine_beds(beds, combine_mode) BedsProcessor.log_count("Combined F5 TSS BED", joined_f5_tss) BedsProcessor.log_bed(joined_f5_tss) result = BedOperations.intersect([joined_f5_tss, flanked_genes], **{"u": True}).result BedsProcessor.log_count("F5 TSS BED intersected with genes' promoters",result) BedsProcessor.log_bed(result) return [result] else: BedsProcessor.logger().info("Returning empty TSS list") return []
def _generic_get_promoters_bed(source, genes, tissues, genome, combine_mode, upstream, downstream, *args): BedsProcessor.logger().info(f"Extracting {source} for genes ({genome}): {genes}; tissues: {tissues}; and combine_mode: {combine_mode}") flanked_genes = BedsProcessor._get_gene_promoter_sites(genes, genome, upstream, downstream) beds = BedsProcessor._get_regulatory_regions_bed(genome, tissues, source, flanked_genes.sort().merge()) BedsProcessor.log_count("Flanked genes' promoters BED", flanked_genes) BedsProcessor.log_bed(flanked_genes) BedsProcessor.logger().info(f"{source} BEDs list:\n{beds}") if beds and flanked_genes: joined_promoters = BedsProcessor._combine_beds(beds, combine_mode) BedsProcessor.log_count(f"Combined {source} BED", joined_promoters) BedsProcessor.log_bed(joined_promoters) result = BedOperations.intersect([joined_promoters, flanked_genes], **{"u": True}).result BedsProcessor.log_count(f"{source} BED intersected with genes' promoters", result) BedsProcessor.log_bed(result) return [result] else: BedsProcessor.logger().info("Returning empty promoters list") return []
def to_remove_get_enhancers_encode_bed(genes, tissues, genome, combine_mode, upstream, downstream, *args): BedsProcessor.logger().info("Extracting ENCODE enhancers for genes (%s): %s; tissues: %s; and combine_mode: %s" % (genome, genes, tissues, combine_mode)) flanked_genes = BedsProcessor._get_gene_promoter_sites(genes, genome, int(float(upstream) * 1000), int(float(downstream) * 1000)) beds = BedsProcessor._get_regulatory_regions_bed(genome, tissues, RegulatoryRegionsFilesRegistry.ENCODE_ENHANCERS_KEY, flanked_genes.sort().merge()) BedsProcessor.log_count("Flanked genes' promoters BED", flanked_genes) BedsProcessor.log_bed(flanked_genes) BedsProcessor.logger().info("ENCODE enhancer BEDs list:\n%s" % str(beds)) if beds and flanked_genes: joined_enc_enh = BedsProcessor._combine_beds(beds, combine_mode) BedsProcessor.log_count("Combined ENCODE enhancers BED", joined_enc_enh) BedsProcessor.log_bed(joined_enc_enh) result = BedOperations.intersect([joined_enc_enh, flanked_genes], **{"u": True}).result BedsProcessor.log_count("ENCODE enhancers BED intersected with genes' promoters", result) BedsProcessor.log_bed(result) return [ result ] else: BedsProcessor.logger().info("Returning empty enhancers list") return []
def get_bed_fragment(self, tissue, source_symbol, regions): """ Get slice of a BED. Filtering on non-tabixed BED files is not supported. If regions is None, entire BED is returned """ self.logger.info('Requested {}tissue [{}] from source [{}]'.format( "fragment [%s] from " % regions if regions else "", tissue, source_symbol)) if tissue not in self._available_tissues: raise InvalidTissueNameException( "Querried tissue [%s] was not among available tissue keys:\n%s" % (tissue, str(self._available_tissues.keys()))) try: bed_path = self._available_tissues[tissue][source_symbol] track_name = source_symbol + "(" + tissue.split( '(')[0].strip().replace(" ", "_") + ")" self.logger.info('Found %s. Adding name %s' % (bed_path, track_name)) full_bed = BedLoader(bed_path) if regions is None: return BedOperations.add_name(full_bed.bed, track_name) else: beds = [full_bed.filter_by(i) for i in regions] if any(beds): filtered_bed = BedOperations.union([e for e in beds if e], merge=False).result return BedOperations.add_name(filtered_bed, track_name) except KeyError: self.logger.info('No tissue [%s] in source [%s]' % (tissue, source_symbol)) return None
def get_genes_bed(genes, genome, *args): BedsProcessor.logger().info("Querying gene database for %s" % genes) registry = GenesDBRegistry.get_instance() gene_beds = [ registry.get_bed(genome, genes) ] BedsProcessor.log_count("Result BED file", gene_beds) BedsProcessor.log_bed(gene_beds) result = BedOperations.union(gene_beds).result BedsProcessor.log_count("Union of the BED files", result) BedsProcessor.log_bed(result) return [ result ]
def _get_accessible_mirnas(mirna_symbols, tissues, genome, combine_mode): mirna_bed = BedsProcessor.get_genes_bed(mirna_symbols, genome) # mirna_bed is one element list if mirna_bed[0] is None: return None # intersect beds with accessible chromatin in tissues accessible_chromatin = BedsProcessor._get_regulatory_regions_bed(genome, tissues, RegulatoryRegionsFilesRegistry.ENCODE_CHROMATIN_KEY, mirna_bed[0].sort().merge()) if any(accessible_chromatin): accessible_chromatin_aggregate = BedsProcessor._combine_beds(accessible_chromatin, combine_mode) accessible_mirna = BedOperations.intersect(mirna_bed + [accessible_chromatin_aggregate], merge=False).result return accessible_mirna return None
def perform(): try: start_time = time.time() params = get_perform_params() collected_beds_map = BedsCollector(params).collect_bed_files() collected_beds = [ bed for beds_list in collected_beds_map.values() for bed in beds_list ] final_result = BedOperations.union(collected_beds, merge=True, **{"c": 4, "o": "distinct"}).result \ if len(collected_beds) > 1 else collected_beds[0] tmp_file_path = save_as_tmp(final_result) session["last_result"] = tmp_file_path.name end_time = (time.time() - start_time) return return_summary(final_result, end_time) except Exception as e: app.logger.exception("Error occurred, details:") return "Error occurred"
def _get_gene_promoter_sites(genes, genome, upstream, downstream): genes_bed = BedsProcessor.get_genes_bed(genes, genome)[0] promoters = BedOperations.get_promoter_region(genes_bed, upstream, downstream) return promoters.result