Пример #1
0
    def getChr(self, chr):
        """
        Return the genes regions on the specified chr.

        :param chr: The chromosome name.
        :type chr: str
        :return: Genes regions on the specified chr.
        :rtype: anacore.region.RegionList
        """
        if chr not in self.model:
            genes = loadModel(self.filepath, "genes", chr)
            self.model[chr] = genes
        return self.model[chr]
Пример #2
0
    log = logging.getLogger(os.path.basename(__file__))
    log.setLevel(logging.INFO)
    log.info("Command: " + " ".join(sys.argv))

    # Load selected regions
    log.info("Load targeted regions.")
    selected_regions = getTargets(args.input_aln, args.input_targets)

    # Find shallow areas
    log.info("Find shallow areas.")
    shallow = shallowFromAlignment(args.input_aln, selected_regions, args.depth_mode, args.min_depth, log)

    # Annotate shallow areas
    if args.input_annotations is not None:
        log.info("Load annotations from {}.".format(args.input_annotations))
        transcripts = loadModel(args.input_annotations, "transcripts")
        log.info("Annotate shallow areas.")
        setTranscriptsAnnotByOverlap(shallow, transcripts)

    # Retrieved known variants potentialy masked in shallow areas
    for curr_input in args.inputs_variants:
        log.info("Load variants from {}.".format(curr_input))
        variant_regions = variantsRegionFromVCF(
            curr_input,
            args.known_min_count,
            args.known_symbol_field,
            args.known_hgvsc_field,
            args.known_hgvsp_field,
            args.known_count_field
        )
        log.info("List potentialy masked mutations.")
    group_input = parser.add_argument_group('Inputs')  # Inputs
    group_input.add_argument('-a', '--input-annotations', help='Path to the genomic annotations file (format: GTF). It contains coordinates for transcripts, exon and cds.')
    group_input.add_argument('-d', '--input-domains', help="Path to the domains annotations file (format: TSV). It contains InterPro domains extracted from ensembl's biomart (mandatory fields: 'Transcript stable ID version', 'Protein stable ID version', 'Interpro ID', 'Interpro Short Description', 'Interpro Description', 'Interpro start', 'Interpro end').")
    group_output = parser.add_argument_group('Outputs')  # Outputs
    group_output.add_argument('-o', '--output-annotations', required=True, help='Path to the domains annotations file (format: GFF).')
    args = parser.parse_args()

    # Logger
    logging.basicConfig(format='%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s')
    log = logging.getLogger(os.path.basename(__file__))
    log.setLevel(logging.INFO)
    log.info("Command: " + " ".join(sys.argv))

    # Load annotations
    log.info("Load model from {}.".format(args.input_annotations))
    tr_by_id = {tr.annot["id"]: tr for tr in loadModel(args.input_annotations, "transcripts")}

    # Parse and convert domains data
    log.info("Parse and convert domains data from {}.".format(args.input_domains))
    domains_by_tr_id = dict()
    with HashedSVIO(args.input_domains) as reader:
        for record in reader:
            if record['Interpro ID'] != "":
                record['Interpro start'] = int(record['Interpro start'])
                record['Interpro end'] = int(record['Interpro end'])
                tr_id = record['Transcript stable ID version'].split(".", 1)[0]
                if tr_id not in tr_by_id:
                    log.warning("The transcript {} is missing in {}.".format(tr_id, args.input_annotations))
                else:
                    domain_id = record['Interpro ID']
                    # Get genomic coordinates
Пример #4
0
        required=True,
        help='Path to the annotated file. (format: VCF).')
    args = parser.parse_args()

    # Logger
    logging.basicConfig(
        format=
        '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s'
    )
    log = logging.getLogger(os.path.basename(__file__))
    log.setLevel(logging.INFO)
    log.info("Command: " + " ".join(sys.argv))

    # Load annotations
    log.info("Load model from {}.".format(args.input_annotations))
    genes = loadModel(args.input_annotations, "genes")
    genes_by_chr = splittedByRef(genes)

    # Annot variants
    log.info("Annot variants in {}.".format(args.input_variants))
    with BreakendVCFIO(args.output_variants, "w",
                       args.annotation_field) as writer:
        with BreakendVCFIO(args.input_variants) as reader:
            # Header
            writer.copyHeader(reader)
            writer.ANN_titles = [
                "SYMBOL", "Gene", "Feature", "Feature_type", "Protein",
                "STRAND", "RNA_ELT_TYPE", "RNA_ELT_POS", "CDS_position",
                "Protein_position", "GENE_SHARD", "IN_FRAME"
            ]
            writer.info[args.annotation_field] = HeaderInfoAttr(
Пример #5
0
 def testLoadModelNCBI(self):
     ncbi_genes = loadModel(self.tmp_ncbi_in_gtf, "genes")
     self.assertEqual(toBracketTree(self.ncbi_expected),
                      toBracketTree(ncbi_genes))
Пример #6
0
 def testLoadModelEnsembl(self):
     ensembl_genes = loadModel(self.tmp_ensembl_in_gtf, "genes")
     self.assertEqual(toBracketTree(self.ensembl_expected),
                      toBracketTree(ensembl_genes))