def getChr(self, chr): """ Return the genes regions on the specified chr. :param chr: The chromosome name. :type chr: str :return: Genes regions on the specified chr. :rtype: anacore.region.RegionList """ if chr not in self.model: genes = loadModel(self.filepath, "genes", chr) self.model[chr] = genes return self.model[chr]
log = logging.getLogger(os.path.basename(__file__)) log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Load selected regions log.info("Load targeted regions.") selected_regions = getTargets(args.input_aln, args.input_targets) # Find shallow areas log.info("Find shallow areas.") shallow = shallowFromAlignment(args.input_aln, selected_regions, args.depth_mode, args.min_depth, log) # Annotate shallow areas if args.input_annotations is not None: log.info("Load annotations from {}.".format(args.input_annotations)) transcripts = loadModel(args.input_annotations, "transcripts") log.info("Annotate shallow areas.") setTranscriptsAnnotByOverlap(shallow, transcripts) # Retrieved known variants potentialy masked in shallow areas for curr_input in args.inputs_variants: log.info("Load variants from {}.".format(curr_input)) variant_regions = variantsRegionFromVCF( curr_input, args.known_min_count, args.known_symbol_field, args.known_hgvsc_field, args.known_hgvsp_field, args.known_count_field ) log.info("List potentialy masked mutations.")
group_input = parser.add_argument_group('Inputs') # Inputs group_input.add_argument('-a', '--input-annotations', help='Path to the genomic annotations file (format: GTF). It contains coordinates for transcripts, exon and cds.') group_input.add_argument('-d', '--input-domains', help="Path to the domains annotations file (format: TSV). It contains InterPro domains extracted from ensembl's biomart (mandatory fields: 'Transcript stable ID version', 'Protein stable ID version', 'Interpro ID', 'Interpro Short Description', 'Interpro Description', 'Interpro start', 'Interpro end').") group_output = parser.add_argument_group('Outputs') # Outputs group_output.add_argument('-o', '--output-annotations', required=True, help='Path to the domains annotations file (format: GFF).') args = parser.parse_args() # Logger logging.basicConfig(format='%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s') log = logging.getLogger(os.path.basename(__file__)) log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Load annotations log.info("Load model from {}.".format(args.input_annotations)) tr_by_id = {tr.annot["id"]: tr for tr in loadModel(args.input_annotations, "transcripts")} # Parse and convert domains data log.info("Parse and convert domains data from {}.".format(args.input_domains)) domains_by_tr_id = dict() with HashedSVIO(args.input_domains) as reader: for record in reader: if record['Interpro ID'] != "": record['Interpro start'] = int(record['Interpro start']) record['Interpro end'] = int(record['Interpro end']) tr_id = record['Transcript stable ID version'].split(".", 1)[0] if tr_id not in tr_by_id: log.warning("The transcript {} is missing in {}.".format(tr_id, args.input_annotations)) else: domain_id = record['Interpro ID'] # Get genomic coordinates
required=True, help='Path to the annotated file. (format: VCF).') args = parser.parse_args() # Logger logging.basicConfig( format= '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s' ) log = logging.getLogger(os.path.basename(__file__)) log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Load annotations log.info("Load model from {}.".format(args.input_annotations)) genes = loadModel(args.input_annotations, "genes") genes_by_chr = splittedByRef(genes) # Annot variants log.info("Annot variants in {}.".format(args.input_variants)) with BreakendVCFIO(args.output_variants, "w", args.annotation_field) as writer: with BreakendVCFIO(args.input_variants) as reader: # Header writer.copyHeader(reader) writer.ANN_titles = [ "SYMBOL", "Gene", "Feature", "Feature_type", "Protein", "STRAND", "RNA_ELT_TYPE", "RNA_ELT_POS", "CDS_position", "Protein_position", "GENE_SHARD", "IN_FRAME" ] writer.info[args.annotation_field] = HeaderInfoAttr(
def testLoadModelNCBI(self): ncbi_genes = loadModel(self.tmp_ncbi_in_gtf, "genes") self.assertEqual(toBracketTree(self.ncbi_expected), toBracketTree(ncbi_genes))
def testLoadModelEnsembl(self): ensembl_genes = loadModel(self.tmp_ensembl_in_gtf, "genes") self.assertEqual(toBracketTree(self.ensembl_expected), toBracketTree(ensembl_genes))