Python MartsAdapter примеры использования

Язык программирования: Python

Пространство имен/Пакет: Fred2.IO

Класс/Тип: MartsAdapter

Примеров на hotexamples.com: 6

Python MartsAdapter - 6 примеров найдено. Это лучшие примеры Python кода для Fred2.IO.MartsAdapter, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MartsAdapter(5)

get_ensembl_ids_from_id(1)

get_product_sequence(1)

Пример #1

Показать файл

    def test_peptides_from_varaints(self):
        coding = {}
        coding['NM_080751'] = MutationSyntax('NM_080751',2629,876,'c.2630C>T','p.Pro877Leu')
        var = Variant('line0',0,20,2621905,'C','T',coding,True,False)
        var.gene = 'TMC2'
        ma = MartsAdapter(biomart="http://ensembl.org")

        vars = [var, Variant("testInsertion", 2, 20, 2621899, "", "AAAAAA", {'NM_080751':MutationSyntax('NM_080751',2625,876,'c.2630C>T','p.Pro877Leu')}, True, False)]

        test = Generator.generate_peptides_from_variants(vars, 9, ma, id_type=EIdentifierTypes.REFSEQ, peptides=None)
        test2 = [x for x in test]
        print(len(test2))

        ts = list()
        #using a tweaked generator that takes another sequence source if the sequence is too short in respect to the given variants
        #in this case a newer/older sequence from mart in respect to what was given as reference in the annotation process
        t = Generator.generate_transcripts_from_variants(vars, ma, id_type=EIdentifierTypes.REFSEQ)
        ts = [x for x in t]
        print(len(ts[0]))
        p = Generator.generate_proteins_from_transcripts(ts, to_stop=True)
        ps = [x for x in p]
        e = Generator.generate_peptides_from_proteins(ps, 9)
        es = [x for x in e]
        print(len(es))

        #print vars
        print len(vars)

Пример #2

Показать файл

    def setUp(self):
        self.trid = "NM_001114377"  # FOXP3
        # self, id, type, chrom, genomePos, ref, obs, coding, isHomozygous,
        # isSynonymous, metadata=None)
        self.non_syn_hetero_snp = Variant("COSM1122493", VariationType.SNP, "X",
                                          49111949, "G", "T",
                                          {"NM_001114377": MutationSyntax( \
                                              "NM_001114377", 756, 217, "", "")
                                          }, False, False)

        self.non_frame_shift_del = Variant("COSM1122495", VariationType.DEL, "X",
                                           49113232, "CTT", "",
                                           {"NM_001114377": MutationSyntax( \
                                               "NM_001114377", 615, 205, "", "")
                                           }, True, False)

        self.syn_homo_snp = Variant("COSM1122494", VariationType.SNP, "X",
                                    49112257, "C", "T",
                                    {"NM_001114377": MutationSyntax( \
                                        "NM_001114377", 653, 217, "", "")
                                    }, False, True)

        self.db_adapter = MartsAdapter(biomart="http://grch37.ensembl.org/biomart/martservice?query=")

Пример #3

Показать файл

Файл: TestGenerator.py Проект: horse90/Fred2

    def test_real_life_test(self):
        mart = MartsAdapter(
            biomart="http://grch37.ensembl.org/biomart/martservice?query=")

        ano_path = os.path.join(os.path.dirname(inspect.getfile(Fred2)),
                                "Data/examples/test_annovar.out")
        vars = read_annovar_exonic(ano_path)

        peps = set(
            map(
                lambda x: str(x),
                Generator.generate_peptides_from_variants(
                    vars, 9, mart, EIdentifierTypes.REFSEQ)))

        peps_from_prot = set(
            map(
                str,
                Generator.generate_peptides_from_proteins(
                    Generator.generate_proteins_from_transcripts(
                        Generator.generate_transcripts_from_variants(
                            vars, mart, EIdentifierTypes.REFSEQ)), 9)))

        self.assertTrue(len(peps - peps_from_prot) == 0)
        self.assertTrue(len(peps_from_prot - peps) == 0)

Пример #4

Показать файл

def main():

    model = argparse.ArgumentParser(
        description='Neoepitope prediction for TargetInsepctor.')

    model.add_argument(
        '-m',
        '--method',
        type=str,
        choices=EpitopePredictorFactory.available_methods().keys(),
        default="bimas",
        help='The name of the prediction method')

    model.add_argument('-v',
                       '--vcf',
                       type=str,
                       default=None,
                       help='Path to the vcf input file')

    model.add_argument(
        '-t',
        '--type',
        type=str,
        choices=["VEP", "ANNOVAR", "SNPEFF"],
        default="VEP",
        help=
        'Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)'
    )

    model.add_argument('-p',
                       '--proteins',
                       type=str,
                       default=None,
                       help='Path to the protein ID input file (in HGNC-ID)')

    model.add_argument('-l',
                       '--length',
                       choices=range(8, 18),
                       type=int,
                       default=9,
                       help='The length of peptides')

    model.add_argument(
        '-a',
        '--alleles',
        type=str,
        required=True,
        help='Path to the allele file (one per line in new nomenclature)')

    model.add_argument(
        '-r',
        '--reference',
        type=str,
        default='GRCh38',
        help='The reference genome used for varinat annotation and calling.')

    model.add_argument(
        '-fINDEL',
        '--filterINDEL',
        action="store_true",
        help='Filter insertions and deletions (including frameshifts)')

    model.add_argument('-fFS',
                       '--filterFSINDEL',
                       action="store_true",
                       help='Filter frameshift INDELs')

    model.add_argument('-fSNP',
                       '--filterSNP',
                       action="store_true",
                       help='Filter SNPs')

    model.add_argument('-o',
                       '--output',
                       type=str,
                       required=True,
                       help='Path to the output file')
    model.add_argument('-etk',
                       '--etk',
                       action="store_true",
                       help=argparse.SUPPRESS)

    args = model.parse_args()

    martDB = MartsAdapter(biomart=MARTDBURL[args.reference.upper()])
    transcript_to_genes = {}

    if args.vcf is None and args.proteins is None:
        sys.stderr.write(
            "At least a vcf file or a protein id file has to be provided.\n")
        return -1

    # if vcf file is given: generate variants and filter them if HGNC IDs ar given
    if args.vcf is not None:
        protein_ids = []
        if args.proteins is not None:
            with open(args.proteins, "r") as f:
                for l in f:
                    l = l.strip()
                    if l != "":
                        protein_ids.append(l)
        if args.type == "VEP":
            variants = read_variant_effect_predictor(args.vcf,
                                                     gene_filter=protein_ids)
        elif args.type == "SNPEFF":
            variants = read_vcf(args.vcf)[0]
        else:
            variants = read_annovar_exonic(args.vcf, gene_filter=protein_ids)

        variants = filter(lambda x: x.type != VariationType.UNKNOWN, variants)

        if args.filterSNP:
            variants = filter(lambda x: x.type != VariationType.SNP, variants)

        if args.filterINDEL:
            variants = filter(
                lambda x: x.type not in [
                    VariationType.INS, VariationType.DEL, VariationType.FSDEL,
                    VariationType.FSINS
                ], variants)

        if args.filterFSINDEL:
            variants = filter(
                lambda x: x.type not in
                [VariationType.FSDEL, VariationType.FSINS], variants)

        if not variants:
            sys.stderr.write(
                "No variants left after filtering. Please refine your filtering criteria.\n"
            )
            return -1

        epitopes = filter(
            lambda x: any(
                x.get_variants_by_protein(tid)
                for tid in x.proteins.iterkeys()),
            generate_peptides_from_variants(variants, int(args.length), martDB,
                                            EIdentifierTypes.ENSEMBL))

        for v in variants:
            for trans_id, coding in v.coding.iteritems():
                if coding.geneID != None:
                    transcript_to_genes[trans_id] = coding.geneID
                else:
                    transcript_to_genes[trans_id] = 'None'

    #else: generate protein sequences from given HGNC IDs and than epitopes
    else:
        proteins = []
        with open(args.proteins, "r") as f:
            for l in f:
                ensembl_ids = martDB.get_ensembl_ids_from_id(
                    l.strip(), type=EIdentifierTypes.HGNC)[0]
                protein_seq = martDB.get_product_sequence(
                    ensembl_ids[EAdapterFields.PROTID])
                if protein_seq is not None:
                    transcript_to_genes[ensembl_ids[
                        EAdapterFields.TRANSID]] = l.strip()
                    proteins.append(
                        Protein(
                            protein_seq,
                            gene_id=l.strip(),
                            transcript_id=ensembl_ids[EAdapterFields.TRANSID]))
        epitopes = generate_peptides_from_proteins(proteins, int(args.length))

    #read in allele list
    alleles = read_lines(args.alleles, in_type=Allele)

    result = EpitopePredictorFactory(args.method).predict(epitopes,
                                                          alleles=alleles)

    with open(args.output, "w") as f:
        alleles = result.columns
        var_column = " Variants" if args.vcf is not None else ""
        f.write("Sequence\tMethod\t" + "\t".join(a.name for a in alleles) +
                "\tAntigen ID\t" + var_column + "\n")
        for index, row in result.iterrows():
            p = index[0]
            method = index[1]
            proteins = ",".join(
                set([
                    transcript_to_genes[prot.transcript_id.split(":FRED2")[0]]
                    for prot in p.get_all_proteins()
                ]))
            vars_str = ""

            if args.vcf is not None:
                vars_str = "\t" + "|".join(
                    set(
                        prot_id.split(":FRED2")[0] + ":" + ",".join(
                            repr(v)
                            for v in set(p.get_variants_by_protein(prot_id)))
                        for prot_id in p.proteins.iterkeys()
                        if p.get_variants_by_protein(prot_id)))

            f.write(
                str(p) + "\t" + method + "\t" + "\t".join("%.3f" % row[a]
                                                          for a in alleles) +
                "\t" + proteins + vars_str + "\n")

    if args.etk:
        with open(args.output.rsplit(".", 1)[0] + "_etk.tsv", "w") as g:
            alleles = result.columns
            g.write("Alleles:\t" + "\t".join(a.name for a in alleles) + "\n")
            for index, row in result.iterrows():
                p = index[0]
                proteins = " ".join(
                    set([
                        transcript_to_genes[prot.transcript_id.split(
                            ":FRED2")[0]] for prot in p.get_all_proteins()
                    ]))
                g.write(
                    str(p) + "\t" + "\t".join("%.3f" % row[a]
                                              for a in alleles) + "\t" +
                    proteins + "\n")
    return 0

Пример #5

Показать файл

def main():

    model = argparse.ArgumentParser(
        description='Neoepitope protein fasta generation from variant vcf')

    model.add_argument('-v',
                       '--vcf',
                       type=str,
                       default=None,
                       help='Path to the vcf input file')

    model.add_argument(
        '-t',
        '--type',
        type=str,
        choices=["VEP", "ANNOVAR", "SnpEff"],
        default="VEP",
        help=
        'Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)'
    )

    model.add_argument('-p',
                       '--proteins',
                       type=str,
                       default=None,
                       help='Path to the protein ID input file (in HGNC-ID)')

    model.add_argument(
        '-r',
        '--reference',
        type=str,
        default='GRCh38',
        help='The reference genome used for varinat annotation and calling.')

    model.add_argument(
        '-fINDEL',
        '--filterINDEL',
        action="store_true",
        help='Filter insertions and deletions (including frameshifts)')

    model.add_argument('-fFS',
                       '--filterFSINDEL',
                       action="store_true",
                       help='Filter frameshift INDELs')

    model.add_argument('-fSNP',
                       '--filterSNP',
                       action="store_true",
                       help='Filter SNPs')

    model.add_argument('-o',
                       '--output',
                       type=str,
                       required=True,
                       help='Path to the output file')

    args = model.parse_args()

    martDB = MartsAdapter(biomart=MARTDBURL[args.reference.upper()])

    if args.vcf is None:
        sys.stderr.write(
            "At least a vcf file or a protein id file has to be provided.\n")
        return -1

    # if vcf file is given: generate variants and filter them if HGNC IDs ar given
    if args.vcf is not None:
        protein_ids = []
        if args.proteins is not None:
            with open(args.proteins, "r") as f:
                for l in f:
                    l = l.strip()
                    if l != "":
                        protein_ids.append(l)

        if args.type == "VEP":
            variants = read_variant_effect_predictor(args.vcf,
                                                     gene_filter=protein_ids)

        elif args.type == "SNPEFF":
            variants = read_vcf(args.vcf)[0]

        else:
            variants = read_annovar_exonic(args.vcf, gene_filter=protein_ids)

        if args.filterSNP:
            variants = filter(lambda x: x.type != VariationType.SNP, variants)

        if args.filterINDEL:
            variants = filter(
                lambda x: x.type not in [
                    VariationType.INS, VariationType.DEL, VariationType.FSDEL,
                    VariationType.FSINS
                ], variants)

        if args.filterFSINDEL:
            variants = filter(
                lambda x: x.type not in
                [VariationType.FSDEL, VariationType.FSINS], variants)

        if not variants:
            sys.stderr.write(
                "No variants left after filtering. Please refine your filtering criteria.\n"
            )
            return -1

        variants = filter(lambda x: x.type != VariationType.UNKNOWN, variants)

        #generate transcripts
        transcripts = generate_transcripts_from_variants(
            variants, martDB, EIdentifierTypes.ENSEMBL)

        #generate proteins
        proteins = filter(
            lambda x: any(
                x.get_variants_by_protein(tid)
                for tid in x.proteins.iterkeys()),
            generate_proteins_from_transcripts(transcripts))

        #write fasta file
        with open(output, "w") as f:
            for p in proteins:
                f.write('>' + str(p.transcript_id) + '|' + str(p.vars) +
                        '_var_' + '\n')
                f.write(str(p) + '\n')

    else:
        sys.stderr.write(
            "At least a vcf file or a protein id file has to be provided.\n")
        return -1

    return 0

Пример #6

Показать файл

Файл: neoepitopeprediction.py Проект: FRED-2/ImmunoNodes

def main():

    model = argparse.ArgumentParser(description='Neoepitope prediction for TargetInsepctor.')

    model.add_argument(
        '-m','--method',
        type=str,
        choices=EpitopePredictorFactory.available_methods().keys(),
        default="bimas",
        help='The name of the prediction method'
        )


    model.add_argument(
        '-v', '--vcf',
        type=str,
        default=None,
        help='Path to the vcf input file'
        )

    model.add_argument(
        '-t', '--type',
        type=str,
        choices=["VEP", "ANNOVAR", "SNPEFF"],
        default="VEP",
        help='Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)'
        )

    model.add_argument(
        '-p','--proteins',
        type=str,
        default=None,
        help='Path to the protein ID input file (in HGNC-ID)'
        )

    model.add_argument(
        '-l','--length',
        choices=range(8, 18),
        type=int,
        default=9,
        help='The length of peptides'
        )

    model.add_argument(
        '-a','--alleles',
        type=str,
        required=True,
        help='Path to the allele file (one per line in new nomenclature)'
        )

    model.add_argument(
        '-r' ,'--reference',
        type=str,
        default='GRCh38',
        help='The reference genome used for varinat annotation and calling.'
        )

    model.add_argument(
        '-fINDEL' ,'--filterINDEL',
        action="store_true",
        help='Filter insertions and deletions (including frameshifts)'
        )

    model.add_argument(
        '-fFS' ,'--filterFSINDEL',
        action="store_true",
        help='Filter frameshift INDELs'
        )

    model.add_argument(
        '-fSNP' ,'--filterSNP',
        action="store_true",
        help='Filter SNPs'
        )

    model.add_argument(
        '-o','--output',
        type=str,
        required=True,
        help='Path to the output file'
        )
    model.add_argument(
        '-etk','--etk',
        action="store_true",
        help=argparse.SUPPRESS
        )

    args = model.parse_args()

    martDB = MartsAdapter(biomart=MARTDBURL[args.reference.upper()])
    transcript_to_genes = {}

    if args.vcf is None and args.proteins is None:
        sys.stderr.write("At least a vcf file or a protein id file has to be provided.\n")
        return -1

    # if vcf file is given: generate variants and filter them if HGNC IDs ar given
    if args.vcf is not None:
        protein_ids = []
        if args.proteins is not None:
            with open(args.proteins, "r") as f:
                for l in f:
                    l = l.strip()
                    if l != "":
                        protein_ids.append(l)
        if args.type == "VEP":
            variants = read_variant_effect_predictor(args.vcf, gene_filter=protein_ids)
        elif args.type == "SNPEFF":
            variants = read_vcf(args.vcf)[0]
        else:
            variants = read_annovar_exonic(args.vcf, gene_filter=protein_ids)

        variants = filter(lambda x: x.type != VariationType.UNKNOWN, variants)

        if args.filterSNP:
            variants = filter(lambda x: x.type != VariationType.SNP, variants)

        if args.filterINDEL:
            variants = filter(lambda x: x.type not in [VariationType.INS,
                                                       VariationType.DEL,
                                                       VariationType.FSDEL,
                                                       VariationType.FSINS], variants)

        if args.filterFSINDEL:
            variants = filter(lambda x: x.type not in [VariationType.FSDEL, VariationType.FSINS], variants)

        if not variants:
            sys.stderr.write("No variants left after filtering. Please refine your filtering criteria.\n")
            return -1

        epitopes = filter(lambda x:any(x.get_variants_by_protein(tid) for tid in x.proteins.iterkeys()),
                        generate_peptides_from_variants(variants,
                                                int(args.length), martDB, EIdentifierTypes.ENSEMBL))

        for v in variants:
            for trans_id,coding in v.coding.iteritems():
                if coding.geneID!=None:
                   transcript_to_genes[trans_id] = coding.geneID
                else:
                   transcript_to_genes[trans_id] = 'None'



    #else: generate protein sequences from given HGNC IDs and than epitopes
    else:
        proteins = []
        with open(args.proteins, "r") as f:
            for l in f:
                ensembl_ids = martDB.get_ensembl_ids_from_id(l.strip(), type=EIdentifierTypes.HGNC)[0]
                protein_seq = martDB.get_product_sequence(ensembl_ids[EAdapterFields.PROTID])
                if protein_seq is not None:
                    transcript_to_genes[ensembl_ids[EAdapterFields.TRANSID]] = l.strip()
                    proteins.append(Protein(protein_seq, gene_id=l.strip(), transcript_id=ensembl_ids[EAdapterFields.TRANSID]))
        epitopes = generate_peptides_from_proteins(proteins, int(args.length))


    #read in allele list
    alleles = read_lines(args.alleles, in_type=Allele)

    result = EpitopePredictorFactory(args.method).predict(epitopes, alleles=alleles)

    with open(args.output, "w") as f:
        alleles = result.columns
        var_column = " Variants" if args.vcf is not None else ""
        f.write("Sequence\tMethod\t"+"\t".join(a.name for a in alleles)+"\tAntigen ID\t"+var_column+"\n")
        for index, row in result.iterrows():
            p = index[0]
            method = index[1]
            proteins = ",".join(set([transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] for prot in p.get_all_proteins()]))
            vars_str = ""

            if args.vcf is not None:
                vars_str = "\t"+"|".join(set(prot_id.split(":FRED2")[0]+":"+",".join(repr(v) for v in set(p.get_variants_by_protein(prot_id)))
                                                                            for prot_id in p.proteins.iterkeys()
                                          if p.get_variants_by_protein(prot_id)))
            
            f.write(str(p)+"\t"+method+"\t"+"\t".join("%.3f"%row[a] for a in alleles)+"\t"+proteins+vars_str+"\n")

    if args.etk:
        with open(args.output.rsplit(".",1)[0]+"_etk.tsv", "w") as g:
            alleles = result.columns
            g.write("Alleles:\t"+"\t".join(a.name for a in alleles)+"\n")
            for index, row in result.iterrows():
                p = index[0]
                proteins = " ".join(set([transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] for prot in p.get_all_proteins()]))
                g.write(str(p)+"\t"+"\t".join("%.3f"%row[a] for a in alleles)+"\t"+proteins+"\n")
    return 0