示例#1
0
    def setUp(self):
        self.proteins=[]
        self.alleles = [Allele("HLA-A*01:01"),Allele("HLA-B*07:02"), Allele("HLA-C*03:01")]
        self.peptides = [Peptide(p) for p in """SFSIFLLAL
GHRMAWDMM
VYEADDVIL
CFTPSPVVV
FLLLADARV
GPADGMVSK
YLYDHLAPM
GLRDLAVAV
GPTPLLYRL
TWVLVGGVL
IELGGKPAL
LAGGVLAAV
QYLAGLSTL
NFVSGIQYL
VLSDFKTWL
ARPDYNPPL
KLLPRLPGV
RHTPVNSWL
GLYLFNWAV
ALYDVVSTL
RRCRASGVL
WPLLLLLLA
VTYSLTGLW
YFVIFFVAA""".split()]
        self.result= EpitopePredictorFactory("BIMAS").predict(self.peptides, self.alleles)
        self.thresh = {"A*01:01":10,"B*07:02":10,"C*03:01":10}
示例#2
0
 def test_single_peptide_input_mhcII(self):
     for m in EpitopePredictorFactory.available_methods():
         model = EpitopePredictorFactory(m)
         if not isinstance(model, AExternalEpitopePrediction):
             if all(a.name in model.supportedAlleles for a in self.mhcII):
                 res = model.predict(self.peptides_mhcII[0],
                                     alleles=self.mhcII[1])
 def test_single_allele_input(self):
     for m in EpitopePredictorFactory.available_methods():
         for v in EpitopePredictorFactory.available_methods()[m]:
             mo = EpitopePredictorFactory(m, version=v)
             if isinstance(mo, AExternalEpitopePrediction) and not (
                     mo.version == "0.1" and mo.name == "netmhc"):
                 print "Testing", mo.name, "version", mo.version
                 try:
                     if any(a.name in mo.supportedAlleles
                            for a in self.mhcII):
                         mo.predict(self.peptides_mhcII,
                                    alleles=self.mhcII[0])
                     if any(a.name in mo.supportedAlleles
                            for a in self.mhcII_combined_alleles):
                         mo.predict(self.peptides_mhcII,
                                    alleles=self.mhcII_combined_alleles[0])
                     if any(a.name in mo.supportedAlleles
                            for a in self.mhcI):
                         mo.predict(self.peptides_mhcI,
                                    alleles=self.mhcI[0])
                     print "Success"
                 except RuntimeError as e:  #catch only those stemming from binary unavailability
                     if "could not be found in PATH" not in e.message:
                         raise e  #all others do not except
                     else:
                         print mo.name, "not available"
def predict_peptide_effects(peptides, alleles=None):
    """
    Predict the peptide effect for all the available methods on the machine

    Args:
        peptides (list of Peptides): Usually an output from read_fasta
        alleles (list of chars): Alleles for which to run the predictors

    Returns:
        pd.DataFrame: Tidy pd.DataFrame. If the method is unable to predict
                      for a particular value the rows are not present.

    Example:
    >>> peptides = [Peptide("SYFPEITHI"), Peptide("FIASNGVKL"), Peptide("LLGATCMFV")]
    >>> alleles = ['A*02:16', 'B*45:01']
    >>> predict_peptide_effects(peptides, alleles = alleles).head()
                               Seq    Method   allele       score
    0  (F, I, A, S, N, G, V, K, L)       arb  A*02:16  594.691144
    1  (F, I, A, S, N, G, V, K, L)       smm  A*02:16  159.768074
    2  (F, I, A, S, N, G, V, K, L)  smmpmbec  A*02:16  211.977614
    4  (F, I, A, S, N, G, V, K, L)   unitope  A*02:16    0.527849
    5  (L, L, G, A, T, C, M, F, V)       arb  A*02:16    6.784222
    """
    dt = valid_predictors()
    results = []
    for i in range(len(dt)):
        # subset to valid alleles
        if alleles is not None:
            valid_alleles = dt.iloc[i]["supportedAlleles"].intersection(
                alleles)

            if len(valid_alleles) == 0:
                continue
            valid_alleles = [Allele(al) for al in valid_alleles]
        else:
            valid_alleles = None
        method = dt.iloc[i]["name"]
        print("method: ", method)
        # TODO - use try, except
        t0 = time.time()

        try:
            results.append(
                EpitopePredictorFactory(method).predict(peptides,
                                                        alleles=valid_alleles))
        except:
            print("Error! Unable to run ", method, ": ", sys.exc_info())
        t1 = time.time()
        print("  - runtime: ", str(t1 - t0))

    df = results[0].merge_results(results[1:]).reset_index()
    dfm = pd.melt(df,
                  id_vars=["Seq", "Method"],
                  var_name="allele",
                  value_name="score")
    dfm = dfm[dfm["score"].notnull()]
    dfm.rename(columns={'Seq': 'peptide', 'Method': 'method'}, inplace=True)
    return dfm
示例#5
0
 def test_epitope_conservation_constraint(self):
     import random
     self.result = EpitopePredictorFactory("BIMAS").predict(self.peptides, self.alleles)
     conservation = {}
     for e in self.result.index.levels[0]:
         conservation[str(e)] = random.random()
     pt = OptiTope(self.result, self.thresh, k=3, solver="cbc", verbosity=0)
     pt.activate_epitope_conservation_const(0.5, conservation=conservation)
     for e in pt.solve():
         print e, conservation[e]
 def test_path_and_optional_parameters_netctl(self):
     netctlpan = EpitopePredictorFactory("NetCTLpan")
     exe = netctlpan.command.split()[0]
     for try_path in os.environ["PATH"].split(os.pathsep):
         try_path = try_path.strip('"')
         exe_try = os.path.join(try_path, exe).strip()
         if os.path.isfile(exe_try) and os.access(exe_try, os.X_OK):
             print netctlpan.predict(self.peptides_mhcI, alleles=self.mhcI,
                                     commad=exe_try,
                                     options="-wt 0.05 -wc 0.225 -ethr 0.5")
 def test_path_option_and_optional_parameters_netmhc(self):
     netmhc = EpitopePredictorFactory("NetMHC")
     exe = netmhc.command.split()[0]
     for try_path in os.environ["PATH"].split(os.pathsep):
         try_path = try_path.strip('"')
         exe_try = os.path.join(try_path, exe).strip()
         if os.path.isfile(exe_try) and os.access(exe_try, os.X_OK):
             r = netmhc.predict(self.peptides_mhcI, alleles=self.mhcI, command=exe_try, options="--sort", chunksize=1)
             self.assertTrue(len(r) == len(self.peptides_mhcI))
             self.assertAlmostEqual(r["A*02:01"]["SYFPEITHI"]["netmhc"], 0.150579105869, places=7, msg=None, delta=None)
             self.assertAlmostEqual(r["A*02:01"]["IHTIEPFYS"]["netmhc"], 0.0619540879359, places=7, msg=None, delta=None)
示例#8
0
    def test_pareto_front_assembly(self):
        cl_pred = CleavageSitePredictorFactory("PCM")
        ep_pred = EpitopePredictorFactory("SMM")
        allele = [Allele("HLA-A*02:01")]
        thresh = {a.name:10000 for a in allele}
        comp = lambda a,b: a <= b

        assembler = ParetoEpitopeAssembly(self.peptides,cl_pred, ep_pred, allele, thresh, comp, solver="cbc", verbosity=0)
        r = assembler.paretosolve()
        print(r)

        #print assembler.solve(eps=2.0)
示例#9
0
def run_predictor(pred, dataset):
    predictor = EpitopePredictorFactory(pred)
    results = ()
    try:
        results = predictor.predict(dataset,
                                    alleles=[Allele(a) for a in args.allele])
        print(results)
        print(results.describe())
    except ValueError:
        pass

    return (len(results), len(dataset))
示例#10
0
    def test_standart_functions(self):
        """
        Tests default functions
        needs GLPK installed
        :return:
        """
        epi_pred =  EpitopePredictorFactory("Syfpeithi")
        cl_pred = CleavageSitePredictorFactory("PCM")

        sbws = EpitopeAssemblyWithSpacer(self.epis,cl_pred,epi_pred,self.alleles)
        sol = sbws.solve()
        print sol
        assert all(i == str(j) for i,j in zip(["GHRMAWDMM","HH","VYEADDVIL"],sol))
示例#11
0
    def test_pareto_assembly(self):
        cl_pred = CleavageSitePredictorFactory("PCM")
        ep_pred = EpitopePredictorFactory("SMM")
        allele = [Allele("HLA-A*02:01")]
        thresh = {a.name:10000 for a in allele}
        comp = lambda a,b: a <= b

        print(ep_pred.predict(self.peptides,alleles=allele))
        #cl_pred, ep_pred, alleles, threshold, comparator, length=9

        assembler = ParetoEpitopeAssembly(self.peptides,cl_pred, ep_pred, allele, thresh, comp, solver="cbc", verbosity=1)
        r = assembler.solve(eps=1e10, order=(1,0))
        print(r)
示例#12
0
    def test_allele_cov_constraint(self):
        """
        tests the allele converage constraints

        :return:
        """
        #self.alleles.extend([Allele("HLA-A*02:01"),Allele("HLA-B*15:01")])
        #self.thresh.update({"A*02:01":0,"B*15:01":0})
        self.result= EpitopePredictorFactory("BIMAS").predict(self.peptides, self.alleles)
        opt = OptiTope(self.result, self.thresh, k=3, solver="cbc", verbosity=0)
        opt.activate_allele_coverage_const(0.99)
        r = opt.solve()

        self.assertTrue(len(set(str(p) for p in r) - set(["GPTPLLYRL", "QYLAGLSTL", "ALYDVVSTL"])) == 0 )
示例#13
0
 def test_unsupported_allele_length_combination_exception(self):
     """
     Tests default functions
     needs GLPK installed
     :return:
     """
     epi_pred = EpitopePredictorFactory("Syfpeithi")
     cl_pred = CleavageSitePredictorFactory("PCM")
     alleles = [Allele("HLA-A*26:01", prob=0.5)]
     sbws = EpitopeAssemblyWithSpacer(self.epis,
                                      cl_pred,
                                      epi_pred,
                                      alleles,
                                      solver="cbc")
     self.assertRaises(ValueError, sbws.solve)
示例#14
0
def predictor_info(method):
    """
    Get all the information about a particular predictor/method from Fred2
    """

    predictor = EpitopePredictorFactory(method)
    try:
        is_in_path = predictor.is_in_path()
    except:
        is_in_path = None
    try:
        command = predictor.command
    except:
        command = None

    method_hash = {
        "syfpeithi": "T-cell epitope",
        "bimas": "MHC-I binding",
        "svmhc": "MHC-I binding",
        "arb": "MHC-I binding",
        "smm": "MHC-I binding",
        "smmpmbec": "MHC-I binding",
        "epidemix": "MHC-I binding",
        "comblib": "MHC-I binding",
        "comblibsidney": "MHC-I binding",
        "pickpocket": "MHC-I binding",
        "netmhc": "MHC-I binding",
        "netmhcpan": "MHC-I binding",
        "hammer": "MHC-II binding",
        "tepitopepan": "MHC-II binding",
        "netmhcii": "MHC-II binding",
        "netmhciipan": "MHC-II binding",
        "unitope": "T-cell epitope",
        "netctlpan": "T-cell epitope",
    }

    retdict = {
        "is_in_path": is_in_path,
        "name": method,
        "supportedAlleles": predictor.supportedAlleles,
        "supportedLength": predictor.supportedLength,
        "command": command,
        "version": predictor.version,
        "type": method_hash.get(method)
    }
    return retdict
示例#15
0
def __main__():
    parser = argparse.ArgumentParser(
        "Write out information about supported models by Fred2 for available prediction tool versions."
    )
    parser.add_argument('-v',
                        '--versions',
                        help='File with used software versions.',
                        required=True)
    args = parser.parse_args()

    # NOTE this needs to be updated manually, if other methods should be used in the future
    available_methods = [
        'syfpeithi', 'mhcflurry', 'mhcnuggets-class-1', 'mhcnuggets-class-2'
    ]
    with open(args.versions, 'r') as versions_file:
        tool_version = [(row[0].split()[0], str(row[1]))
                        for row in csv.reader(versions_file, delimiter=":")]
        # NOTE this needs to be updated, if a newer version will be available via Fred2 and should be used in the future
        tool_version.append(('syfpeithi', '1.0'))
        # get for each method the corresponding tool version
        methods = {
            method.strip(): version.strip()
            for tool, version in tool_version for method in available_methods
            if tool.lower() in method.lower()
        }

    for method, version in methods.items():
        if (version
                not in EpitopePredictorFactory.available_methods()[method]):
            raise ValueError("The specified version " + version + " for " +
                             method + " is not supported by Fred2.")

        predictor = EpitopePredictorFactory(method, version=version)
        with open(method + ".v" + str(version) + ".supported_alleles.txt",
                  'w') as output:
            for a in sorted(predictor.supportedAlleles):
                output.write(convert_allele_back(a) + "\n")
        with open(method + ".v" + str(version) + ".supported_lengths.txt",
                  'w') as output:
            for l in sorted(predictor.supportedLength):
                output.write(str(l) + "\n")
示例#16
0
 def test_unsupported_allele_length_combination(self):
     """
     Tests default functions
     needs GLPK installed
     :return:
     """
     epi_pred = EpitopePredictorFactory("Syfpeithi")
     cl_pred = CleavageSitePredictorFactory("PCM")
     alleles = [
         Allele("HLA-A*02:01", prob=0.5),
         Allele("HLA-A*26:01", prob=0.5)
     ]
     sbws = EpitopeAssemblyWithSpacer(self.epis,
                                      cl_pred,
                                      epi_pred,
                                      alleles,
                                      solver="cbc")
     sol = sbws.solve()
     print sol
     assert all(i == str(j)
                for i, j in zip(["GHRMAWDMM", "HH", "VYEADDVIL"], sol))
 def test_wrong_internal_to_external_version(self):
     with self.assertRaises(RuntimeError):
         EpitopePredictorFactory("NetMHC",
                                 version="0.1").predict(self.peptides_mhcI,
                                                        alleles=self.mhcI)
示例#18
0
 def test_epitope_prediction_no_version(self):
     print EpitopePredictorFactory("BIMAS").predict(self.peptides_mhcI,
                                                    self.mhcI)
示例#19
0
def main():
    parser = argparse.ArgumentParser(description="""The software is a novel approach to construct epitope-based string-of-beads
vaccines in optimal order and with sequence-optimized spacers of flexible length
such that the recovery of contained epitopes is maximized and immunogenicity of 
arising neo-epitopes is reduced. """)
    parser.add_argument("-i", "--input",
                        required=True,
                        help="File containing epitopes (one peptide per line)"
    )
    parser.add_argument("-a", "--alleles",
                        required=True,
                        help="Specifies file containing HLA alleles with corresponding HLA probabilities (one HLA per line)"
    )

    #parameters of the model
    parser.add_argument("-k","--max_length",
                        default=6,
                        type=int,
                        help="Specifies the max. length of the spacers (default 6)")
    parser.add_argument("-al","--alpha",
                        default=0.99,
                        type=float,
                        help="Specifies the first-order preference of the user in the model [0,1] (default 0.99)")
    parser.add_argument("-be","--beta",
                        default=0.0,
                        type=float,
                        help="Specifies the second-order preference of the user in the model [0,1] (default 0).")

    parser.add_argument("-cp","--cleavage_prediction",
                        default="PCM",
                        help="Specifies the used cleavage prediction method (default PCM) [available: PCM, PROTEASMM_C, PROTEASMM_S]"
    )
    parser.add_argument("-ep","--epitope_prediction",
                        default="Syfpeithi",
                        help="Specifies the used epitope prediction method (default Syfpeithi) [available: Syfpeithi, BIMAS, SMM, SMMPMBEC]"
    )
    parser.add_argument("-thr","--threshold",
                        default=20,
                        type=float,
                        help="Specifies epitope prediction threshold for SYFPEITHI (default 20).")

    parser.add_argument("-o", "--output",
                        required=True,
                        help="Specifies the output file.")
    parser.add_argument("-t", "--threads",
                        type=int,
                        default=None,
                        help="Specifies number of threads. If not specified all available logical cpus are used.")


    args = parser.parse_args()

    #parse input
    peptides = list(FileReader.read_lines(args.input, in_type=Peptide))
    #read in alleles
    alleles = generate_alleles(args.alleles)

    if args.cleavage_prediction.upper() not in ["PCM", "PROTEASMM_C", "PROTEASMM_S"]:
        print "Specified cleavage predictor is currently not supported. Please choose either PCM, PROTEASMM_C, or PROTEASMM_S"
        sys.exit(-1)

    if args.epitope_prediction.upper() not in ["SYFPEITHI", "BIMAS", "SMM", "SMMPMBEC"]:
        print "Specified cleavage predictor is currently not supported. Please choose either Syfpeithi, BIMAS, SMM, SMMPMBEC"
        sys.exit(-1)

    #set-up model
    cl_pred = CleavageSitePredictorFactory(args.cleavage_prediction)
    epi_pred = EpitopePredictorFactory(args.epitope_prediction)

    thr = {a.name:args.threshold for a in alleles}

    solver = EpitopeAssemblyWithSpacer(peptides,cl_pred,epi_pred,alleles,
                                       k=args.max_length,en=9,threshold=thr,
                                       solver="cplex", alpha=args.alpha, beta=args.beta,
                                       verbosity=0)

    #solve
    #pre-processing has to be disable otherwise many solver will destroy the symmetry of the problem
    #how to do this is dependent on the solver used. For CPLEX it is preprocessing_presolve=n
    threads = mp.cpu_count() if args.threads is None else args.threads
    svbws = solver.approximate(threads=threads,options={"preprocessing_presolve":"n","threads":1})

    print
    print "Resulting String-of-Beads: ","-".join(map(str,svbws))
    print
    with open(args.output, "w") as f:
        f.write("-".join(map(str,svbws)))
示例#20
0
 def test_multiple_predictors_names_different_version(self):
     self.assertTrue(
         EpitopePredictorFactory("BIMAS", version="1.0").version == "1.0")
     self.assertTrue(
         EpitopePredictorFactory("BIMAS", version="2.0").version == "2.0")
示例#21
0
 def test_epitope_prediction_unsupported_version(self):
     print EpitopePredictorFactory("BIMAS", version="4.0").predict(
         self.peptides_mhcI, self.mhcI)
示例#22
0
def main():

    model = argparse.ArgumentParser(
        description='Neoepitope prediction for TargetInsepctor.')

    model.add_argument(
        '-m',
        '--method',
        type=str,
        choices=EpitopePredictorFactory.available_methods().keys(),
        default="bimas",
        help='The name of the prediction method')

    model.add_argument('-v',
                       '--vcf',
                       type=str,
                       default=None,
                       help='Path to the vcf input file')

    model.add_argument(
        '-t',
        '--type',
        type=str,
        choices=["VEP", "ANNOVAR", "SNPEFF"],
        default="VEP",
        help=
        'Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)'
    )

    model.add_argument('-p',
                       '--proteins',
                       type=str,
                       default=None,
                       help='Path to the protein ID input file (in HGNC-ID)')

    model.add_argument('-l',
                       '--length',
                       choices=range(8, 18),
                       type=int,
                       default=9,
                       help='The length of peptides')

    model.add_argument(
        '-a',
        '--alleles',
        type=str,
        required=True,
        help='Path to the allele file (one per line in new nomenclature)')

    model.add_argument(
        '-r',
        '--reference',
        type=str,
        default='GRCh38',
        help='The reference genome used for varinat annotation and calling.')

    model.add_argument(
        '-fINDEL',
        '--filterINDEL',
        action="store_true",
        help='Filter insertions and deletions (including frameshifts)')

    model.add_argument('-fFS',
                       '--filterFSINDEL',
                       action="store_true",
                       help='Filter frameshift INDELs')

    model.add_argument('-fSNP',
                       '--filterSNP',
                       action="store_true",
                       help='Filter SNPs')

    model.add_argument('-o',
                       '--output',
                       type=str,
                       required=True,
                       help='Path to the output file')
    model.add_argument('-etk',
                       '--etk',
                       action="store_true",
                       help=argparse.SUPPRESS)

    args = model.parse_args()

    martDB = MartsAdapter(biomart=MARTDBURL[args.reference.upper()])
    transcript_to_genes = {}

    if args.vcf is None and args.proteins is None:
        sys.stderr.write(
            "At least a vcf file or a protein id file has to be provided.\n")
        return -1

    # if vcf file is given: generate variants and filter them if HGNC IDs ar given
    if args.vcf is not None:
        protein_ids = []
        if args.proteins is not None:
            with open(args.proteins, "r") as f:
                for l in f:
                    l = l.strip()
                    if l != "":
                        protein_ids.append(l)
        if args.type == "VEP":
            variants = read_variant_effect_predictor(args.vcf,
                                                     gene_filter=protein_ids)
        elif args.type == "SNPEFF":
            variants = read_vcf(args.vcf)[0]
        else:
            variants = read_annovar_exonic(args.vcf, gene_filter=protein_ids)

        variants = filter(lambda x: x.type != VariationType.UNKNOWN, variants)

        if args.filterSNP:
            variants = filter(lambda x: x.type != VariationType.SNP, variants)

        if args.filterINDEL:
            variants = filter(
                lambda x: x.type not in [
                    VariationType.INS, VariationType.DEL, VariationType.FSDEL,
                    VariationType.FSINS
                ], variants)

        if args.filterFSINDEL:
            variants = filter(
                lambda x: x.type not in
                [VariationType.FSDEL, VariationType.FSINS], variants)

        if not variants:
            sys.stderr.write(
                "No variants left after filtering. Please refine your filtering criteria.\n"
            )
            return -1

        epitopes = filter(
            lambda x: any(
                x.get_variants_by_protein(tid)
                for tid in x.proteins.iterkeys()),
            generate_peptides_from_variants(variants, int(args.length), martDB,
                                            EIdentifierTypes.ENSEMBL))

        for v in variants:
            for trans_id, coding in v.coding.iteritems():
                if coding.geneID != None:
                    transcript_to_genes[trans_id] = coding.geneID
                else:
                    transcript_to_genes[trans_id] = 'None'

    #else: generate protein sequences from given HGNC IDs and than epitopes
    else:
        proteins = []
        with open(args.proteins, "r") as f:
            for l in f:
                ensembl_ids = martDB.get_ensembl_ids_from_id(
                    l.strip(), type=EIdentifierTypes.HGNC)[0]
                protein_seq = martDB.get_product_sequence(
                    ensembl_ids[EAdapterFields.PROTID])
                if protein_seq is not None:
                    transcript_to_genes[ensembl_ids[
                        EAdapterFields.TRANSID]] = l.strip()
                    proteins.append(
                        Protein(
                            protein_seq,
                            gene_id=l.strip(),
                            transcript_id=ensembl_ids[EAdapterFields.TRANSID]))
        epitopes = generate_peptides_from_proteins(proteins, int(args.length))

    #read in allele list
    alleles = read_lines(args.alleles, in_type=Allele)

    result = EpitopePredictorFactory(args.method).predict(epitopes,
                                                          alleles=alleles)

    with open(args.output, "w") as f:
        alleles = result.columns
        var_column = " Variants" if args.vcf is not None else ""
        f.write("Sequence\tMethod\t" + "\t".join(a.name for a in alleles) +
                "\tAntigen ID\t" + var_column + "\n")
        for index, row in result.iterrows():
            p = index[0]
            method = index[1]
            proteins = ",".join(
                set([
                    transcript_to_genes[prot.transcript_id.split(":FRED2")[0]]
                    for prot in p.get_all_proteins()
                ]))
            vars_str = ""

            if args.vcf is not None:
                vars_str = "\t" + "|".join(
                    set(
                        prot_id.split(":FRED2")[0] + ":" + ",".join(
                            repr(v)
                            for v in set(p.get_variants_by_protein(prot_id)))
                        for prot_id in p.proteins.iterkeys()
                        if p.get_variants_by_protein(prot_id)))

            f.write(
                str(p) + "\t" + method + "\t" + "\t".join("%.3f" % row[a]
                                                          for a in alleles) +
                "\t" + proteins + vars_str + "\n")

    if args.etk:
        with open(args.output.rsplit(".", 1)[0] + "_etk.tsv", "w") as g:
            alleles = result.columns
            g.write("Alleles:\t" + "\t".join(a.name for a in alleles) + "\n")
            for index, row in result.iterrows():
                p = index[0]
                proteins = " ".join(
                    set([
                        transcript_to_genes[prot.transcript_id.split(
                            ":FRED2")[0]] for prot in p.get_all_proteins()
                    ]))
                g.write(
                    str(p) + "\t" + "\t".join("%.3f" % row[a]
                                              for a in alleles) + "\t" +
                    proteins + "\n")
    return 0
def __main__():
    parser = argparse.ArgumentParser(
        "Write out information about supported models by Fred2 for installed predictor tool versions."
    )
    parser.add_argument('-p',
                        "--peptides",
                        help="File with one peptide per line")
    parser.add_argument('-c',
                        "--mhcclass",
                        default=1,
                        help="MHC class I or II")
    parser.add_argument('-l',
                        "--max_length",
                        help="Maximum peptide length",
                        type=int)
    parser.add_argument('-ml',
                        "--min_length",
                        help="Minimum peptide length",
                        type=int)
    parser.add_argument('-a',
                        "--alleles",
                        help="<Required> MHC Alleles",
                        required=True,
                        type=str)
    parser.add_argument('-t',
                        '--tools',
                        help='Tools requested for peptide predictions',
                        required=True,
                        type=str)
    parser.add_argument('-v',
                        '--versions',
                        help='<Required> File with used software versions.',
                        required=True)
    args = parser.parse_args()
    selected_methods = [item for item in args.tools.split(',')]
    with open(args.versions, 'r') as versions_file:
        tool_version = [(row[0].split()[0], str(row[1]))
                        for row in csv.reader(versions_file, delimiter=":")]
        # NOTE this needs to be updated, if a newer version will be available via Fred2 and should be used in the future
        tool_version.append(('syfpeithi', '1.0'))  # how to handle this?
        # get for each method the corresponding tool version
        methods = {
            method.strip(): version.strip()
            for tool, version in tool_version for method in selected_methods
            if tool.lower() in method.lower()
        }

    # get the alleles
    alleles = [Allele(a) for a in args.alleles.split(";")]

    peptide_lengths = []
    if (args.peptides):
        peptides = read_peptide_input(args.peptides)
        peptide_lengths = set([len(pep) for pep in peptides])
    else:
        peptide_lengths = range(args.min_length, args.max_length + 1)

    with open("model_report.txt", 'w') as output:
        # check if requested tool versions are supported
        for method, version in methods.items():
            if version not in EpitopePredictorFactory.available_methods()[
                    method.lower()]:
                raise ValueError("The specified version " + version + " for " +
                                 method + " is not supported by Fred2.")

        # check if requested alleles are supported
        support_all_alleles = True
        no_allele_support = True
        for a in alleles:
            supported = False
            for method, version in methods.items():
                predictor = EpitopePredictorFactory(method, version=version)

                if a not in sorted(predictor.supportedAlleles):
                    output.write("Allele " + convert_allele_back(a) +
                                 " is not supported by " + method + " " +
                                 version + ".\n")
                else:
                    supported = True

            if not supported:
                output.write(
                    "Allele " + convert_allele_back(a) +
                    " is not supported by any of the requested tools.\n")
                logger.warning(
                    "Allele " + convert_allele_back(a) +
                    " is not supported by any of the requested tools.")
                support_all_alleles = False
            else:
                no_allele_support = False
        if support_all_alleles:
            output.write(
                "All selected alleles are supported by at least one of the requested tools.\n"
            )
        if no_allele_support:
            output.write(
                "None of the specified alleles is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models.\n"
            )
            raise ValueError(
                "None of the specified alleles is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models."
            )

        output.write("\n")
        # check if requested lengths are supported
        support_all_lengths = True
        no_length_support = True
        for l in peptide_lengths:
            supported = False
            for method, version in methods.items():
                predictor = EpitopePredictorFactory(method, version=version)

                if l not in sorted(predictor.supportedLength):
                    output.write("Peptide length " + str(l) +
                                 " is not supported by " + method + " " +
                                 version + ".\n")
                else:
                    supported = True

            if not supported:
                output.write(
                    "Peptide length " + str(l) +
                    " is not supported by any of the requested tools.\n")
                logger.warning(
                    "Peptide length " + str(l) +
                    " is not supported by any of the requested tools.")
                support_all_lengths = False
            else:
                no_length_support = False
        if support_all_lengths:
            output.write(
                "All selected or provided peptide lengths are supported by at least one of the requested tools.\n"
            )
        if no_length_support:
            output.write(
                "None of the peptide lengths is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models.\n"
            )
            raise ValueError(
                "None of the peptide lengths is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models."
            )
示例#24
0
def make_predictions_from_peptides(peptides, methods, alleles, protein_db,
                                   identifier, metadata):
    # dictionaries for syfpeithi matrices max values and allele mapping
    max_values_matrices = {}
    allele_string_map = {}

    # list to hold dataframes for all predictions
    pred_dataframes = []

    # filter out self peptides if specified
    selfies = [str(p) for p in peptides if protein_db.exists(str(p))]
    peptides_filtered = [p for p in peptides if str(p) not in selfies]

    # sort peptides by length (for predictions)
    sorted_peptides = {}

    for p in peptides_filtered:
        length = len(str(p))
        if length in sorted_peptides:
            sorted_peptides[length].append(p)
        else:
            sorted_peptides[length] = [p]

    for peplen in sorted_peptides:
        all_peptides_filtered = sorted_peptides[peplen]
        results = []
        for m in methods:
            try:
                results.extend([
                    EpitopePredictorFactory(m.split('-')[0],
                                            version=m.split('-')[1]).predict(
                                                all_peptides_filtered,
                                                alleles=alleles)
                ])
            except:
                logging.warning(
                    "Prediction for length {length} and allele {allele} not possible with {method}. No model available."
                    .format(length=peplen,
                            allele=','.join([str(a) for a in alleles]),
                            method=m))

        # merge dataframes of the performed predictions
        if (len(results) == 0):
            continue
        df = results[0].merge_results(results[1:])

        df.insert(0, 'length', df.index.map(create_length_column_value))

        for a in alleles:
            conv_allele = "%s_%s%s" % (a.locus, a.supertype, a.subtype)
            allele_string_map['%s_%s' %
                              (a, peplen)] = '%s_%i' % (conv_allele, peplen)
            max_values_matrices['%s_%i' %
                                (conv_allele, peplen)] = get_matrix_max_score(
                                    conv_allele, peplen)

        # reset index to have index as columns
        df.reset_index(inplace=True)

        mandatory_columns = [
            'chr', 'pos', 'gene', 'transcripts', 'proteins', 'variant type',
            'synonymous', 'homozygous', 'variant details (genomic)',
            'variant details (protein)'
        ]

        for header in mandatory_columns:
            if header not in metadata:
                df[header] = np.nan
            else:
                df[header] = df.apply(
                    lambda row: row[0].get_metadata(header)[0], axis=1)

        for c in list(set(metadata) - set(mandatory_columns)):
            df[c] = df.apply(lambda row: row[0].get_metadata(c)[0], axis=1)

        for c in df.columns:
            if '*' in str(c):
                idx = df.columns.get_loc(c)
                df.insert(
                    idx + 1, '%s affinity' % c,
                    df.apply(lambda x: create_affinity_values(
                        str(c), int(x['length']), float(x[c]), x['Method'],
                        max_values_matrices, allele_string_map),
                             axis=1))
                df.insert(
                    idx + 2, '%s binder' % c,
                    df.apply(lambda x: create_binder_values(
                        float(x['%s affinity' % c]), x['Method']),
                             axis=1))
                df = df.rename(columns={c: '%s score' % c})

        df = df.rename(columns={'Seq': 'sequence'})
        df = df.rename(columns={'Method': 'method'})
        pred_dataframes.append(df)

    # write prediction statistics
    statistics = {
        'prediction_methods': methods,
        'number_of_variants': '-',
        'number_of_peptides': len(peptides),
        'number_of_peptides_after_filtering': len(peptides_filtered)
    }

    return pred_dataframes, statistics
示例#25
0
def make_predictions_from_variants(variants_all, methods, alleles, minlength,
                                   maxlength, martsadapter, protein_db,
                                   identifier, metadata, transcriptProteinMap):
    # list for all peptides and filtered peptides
    all_peptides = []
    all_peptides_filtered = []

    # dictionaries for syfpeithi matrices max values and allele mapping
    max_values_matrices = {}
    allele_string_map = {}

    # list to hold dataframes for all predictions
    pred_dataframes = []

    prots = [
        p for p in generator.generate_proteins_from_transcripts(
            generator.generate_transcripts_from_variants(
                variants_all, martsadapter, ID_SYSTEM_USED))
    ]

    for peplen in range(minlength, maxlength):
        peptide_gen = generator.generate_peptides_from_proteins(prots, peplen)

        peptides_var = [x for x in peptide_gen]

        # remove peptides which are not 'variant relevant'
        peptides = [
            x for x in peptides_var if any(
                x.get_variants_by_protein(y) for y in x.proteins.keys())
        ]

        # filter out self peptides
        selfies = [str(p) for p in peptides if protein_db.exists(str(p))]
        filtered_peptides = [p for p in peptides if str(p) not in selfies]

        all_peptides = all_peptides + peptides
        all_peptides_filtered = all_peptides_filtered + filtered_peptides

        results = []

        if len(filtered_peptides) > 0:
            for m in methods:
                try:
                    results.extend([
                        EpitopePredictorFactory(
                            m.split('-')[0],
                            version=m.split('-')[1]).predict(filtered_peptides,
                                                             alleles=alleles)
                    ])
                except:
                    logging.warning(
                        "Prediction for length {length} and allele {allele} not possible with {method}."
                        .format(length=peplen,
                                allele=','.join([str(a) for a in alleles]),
                                method=m))

        if (len(results) == 0):
            continue

        df = results[0].merge_results(results[1:])

        for a in alleles:
            conv_allele = "%s_%s%s" % (a.locus, a.supertype, a.subtype)
            allele_string_map['%s_%s' %
                              (a, peplen)] = '%s_%i' % (conv_allele, peplen)
            max_values_matrices['%s_%i' %
                                (conv_allele, peplen)] = get_matrix_max_score(
                                    conv_allele, peplen)

        df.insert(0, 'length', df.index.map(create_length_column_value))
        df['chr'] = df.index.map(create_variant_chr_column_value)
        df['pos'] = df.index.map(create_variant_pos_column_value)
        df['gene'] = df.index.map(create_gene_column_value)
        df['transcripts'] = df.index.map(create_transcript_column_value)
        df['proteins'] = df.index.map(create_protein_column_value)
        df['variant type'] = df.index.map(create_variant_type_column_value)
        df['synonymous'] = df.index.map(create_variant_syn_column_value)
        df['homozygous'] = df.index.map(create_variant_hom_column_value)
        df['variant details (genomic)'] = df.index.map(
            create_mutationsyntax_genome_column_value)
        df['variant details (protein)'] = df.index.map(
            create_mutationsyntax_column_value)

        # reset index to have index as columns
        df.reset_index(inplace=True)

        for c in df.columns:
            if '*' in str(c):
                idx = df.columns.get_loc(c)
                df.insert(
                    idx + 1, '%s affinity' % c,
                    df.apply(lambda x: create_affinity_values(
                        str(c), int(x['length']), float(x[c]), x['Method'],
                        max_values_matrices, allele_string_map),
                             axis=1))
                df.insert(
                    idx + 2, '%s binder' % c,
                    df.apply(lambda x: create_binder_values(
                        float(x['%s affinity' % c]), x['Method']),
                             axis=1))
                df = df.rename(columns={c: '%s score' % c})
                df['%s score' % c] = df['%s score' %
                                        c].map(lambda x: round(x, 4))

        for c in metadata:
            df[c] = df.apply(lambda row: create_metadata_column_value(row, c),
                             axis=1)

        df = df.rename(columns={'Seq': 'sequence'})
        df = df.rename(columns={'Method': 'method'})
        pred_dataframes.append(df)

    statistics = {
        'prediction_methods': methods,
        'number_of_variants': len(variants_all),
        'number_of_peptides': len(all_peptides),
        'number_of_peptides_after_filtering': len(all_peptides_filtered)
    }

    return pred_dataframes, statistics, all_peptides_filtered
示例#26
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'The software is a novel approach to construct epitope-based string-of-beads \
vaccines in optimal order and with sequence-optimized spacers of flexible length \
such that the recovery of contained epitopes is maximized and immunogenicity of \
arising neo-epitopes is reduced.', )

    parser.add_argument('-i',
                        "--input",
                        required=True,
                        help="File containing epitopes (one peptide per line)",
                        type=str)

    parser.add_argument(
        '-a',
        "--alleles",
        required=True,
        help=
        "Specifies file containing HLA alleles with corresponding HLA probabilities (one HLA per line)",
        type=str)

    #parameters of the model
    parser.add_argument(
        '-l',
        "--max_length",
        default=6,
        type=int,
        help="Specifies the max. length of the spacers (default 6)",
    )

    parser.add_argument(
        '-al',
        "--alpha",
        default=0.99,
        type=float,
        help=
        "Specifies the first-order preference of the user in the model [0,1] (default 0.99)",
    )

    parser.add_argument(
        '-be',
        "--beta",
        default=0.0,
        type=float,
        help=
        "Specifies the second-order preference of the user in the model [0,1] (default 0).",
    )

    parser.add_argument(
        '-cp',
        "--cleavage_prediction",
        default="pcm",
        choices=["pcm", "proteasmm_c", "proteasmm_i"],
        help=
        "Specifies the used cleavage prediction method (default PCM) [available: PCM, PROTEASMM_C, PROTEASMM_I]",
        type=str)

    parser.add_argument(
        '-ep',
        "--epitope_prediction",
        default="syfpeithi",
        choices=["syfpeithi", "smm", "smmpmbec", "bimas"],
        help=
        "Specifies the used epitope prediction method (default Syfpeithi) [available: Syfpeithi, BIMAS, SMM, SMMPMBEC]",
        type=str)
    parser.add_argument(
        '-t',
        "--threshold",
        default=20,
        type=float,
        help=
        "Specifies epitope prediction threshold for SYFPEITHI (default 20).",
    )

    parser.add_argument(
        '-o',
        "--output",
        required=True,
        type=str,
        help="Specifies the output file.",
    )

    parser.add_argument(
        '-p',
        "--threads",
        type=int,
        default=1,
        help=
        "Specifies number of threads. If not specified all available logical cpus are used.",
    )
    parser.add_argument(
        '-apx',
        "--approximate",
        action="store_true",
        help=
        "Specifies number of threads. If not specified all available logical cpus are used.",
    )

    args = parser.parse_args()

    #parse input
    peptides = read_lines(args.input)
    #read in alleles
    alleles = generate_alleles(args.alleles)

    if args.cleavage_prediction.upper() not in [
            "PCM", "PROTEASMM_C", "PROTEASMM_I"
    ]:
        sys.stderr.write(
            "Specified cleavage predictor is currently not supported. \
                         Please choose either PCM, PROTEASMM_C, or PROTEASMM_I"
        )
        sys.exit(-1)

    if args.epitope_prediction.upper() not in [
            "SYFPEITHI", "BIMAS", "SMM", "SMMPMBEC"
    ]:
        sys.stderr.write(
            "Specified cleavage predictor is currently not supported. \
                         Please choose either Syfpeithi, BIMAS, SMM, SMMPMBEC")
        sys.exit(-1)

    #set-up model
    cl_pred = CleavageSitePredictorFactory(args.cleavage_prediction)
    epi_pred = EpitopePredictorFactory(args.epitope_prediction)

    thr = {a.name: args.threshold for a in alleles}

    solver = EpitopeAssemblyWithSpacer(peptides,
                                       cl_pred,
                                       epi_pred,
                                       alleles,
                                       k=args.max_length,
                                       en=9,
                                       threshold=thr,
                                       solver="cbc",
                                       alpha=args.alpha,
                                       beta=args.beta,
                                       verbosity=1)
    #solve
    #pre-processing has to be disable otherwise many solver will destroy the symmetry of the problem
    #how to do this is dependent on the solver used. For CPLEX it is preprocessing_presolve=n
    #TODO:CBC should be shipped with the node
    #TODO: has to be tested with CBC
    #TODO: LHK has to be shipped as well -> only academic license!
    #"preprocess":"off", "threads":1}
    threads = mp.cpu_count() if args.threads is None else args.threads
    if args.approximate:
        svbws = solver.approximate(threads=threads,
                                   options={
                                       "preprocess": "off",
                                       "threads": 1
                                   })
        if not svbws:
            svbws = solver.solve(threads=threads,
                                 options={
                                     "preprocess": "off",
                                     "threads": 1
                                 })
    else:
        svbws = solver.solve(threads=threads,
                             options={
                                 "preprocess": "off",
                                 "threads": 1
                             })

    with open(args.output, "w") as f:
        f.write(">assembled_spacer_design\n")
        f.write("".join(map(str, svbws)))
    return 0
示例#27
0
def __main__():
    parser = argparse.ArgumentParser(version=VERSION)
    parser.add_argument('-V',
                        '--variations',
                        dest="var_file",
                        help='<Required> full path to the input variations',
                        required=True)
    parser.add_argument('-o',
                        "--outfile",
                        dest="outfile_path",
                        help="Created fasta file",
                        required=True)
    parser.add_argument(
        '-d',
        "--digest",
        dest="digest",
        type=int,
        help="Length of peptides for predigestion and prediction, default 9.")
    parser.add_argument('-a',
                        "--alleles",
                        dest="alleles",
                        help="Input alleles for prediction")
    parser.add_argument(
        '-p',
        "--predict",
        dest="predict_with",
        help="Method of prediction, needs alleles & length, allowed:[{m}]".
        format(m=PRED_METH))
    parser.add_argument(
        '-f',
        "--filter",
        dest="filter",
        type=float,
        help=
        "Only include sequences with predictions above the given threshold (e.g. 0.4256 for at least weak binder), needs predict"
    )
    parser.add_argument('-P',
                        "--Proteins",
                        dest="only_proteins",
                        action='store_true',
                        help="Will write only proteins.")
    parser.add_argument(
        '-b',
        "--base",
        dest="basefasta_path",
        help="If given, entries are replaced by the variation.")

    options = parser.parse_args()
    if len(sys.argv) <= 1:
        parser.print_help()
        sys.exit(1)

    if options.filter and not options.predict_with:
        parser.print_help()
        print "Need alleles with predict option, aborting!"
        sys.exit(1)

    if options.predict_with and not options.alleles:
        parser.print_help()
        print "Need alleles with predict option, aborting!"
        sys.exit(1)

    temp_dir = "/tmp/"

    logging.basicConfig(
        filename=os.path.splitext(options.outfile_path)[0] +
        "_{:%d-%m-%Y_%H-%M-%S}".format(datetime.datetime.now()) + '.log',
        filemode='w+',
        level=logging.DEBUG)  #, format='%(levelname)s:%(message)s'
    logging.info("Starting variant fasta creation " + options.outfile_path +
                 " at " + str(datetime.datetime.now()))
    logging.warning("verbosity turned on")

    #... look at theos filter, ligandoqc, fasta-distributions, lica and the morgenstellen server conten scripts
    # complete proteins?
    # only containing binders?
    # k-mers?
    # binders only?
    # FastaSlicer.py?
    # remove original if homozygous (needs fasta input)?
    # add germline variant option? or expect all to be in one vcf?

    # MyObject = type('MyObject', (object,), {})
    # options = MyObject()
    # setattr(options,"var_file","/home/walzer/immuno-tools/Fred2/Fred2/Data/examples/vcftestfile3.vcf")
    #
    # vt = os.path.splitext(options.var_file)[-1]
    # if ".vcf" == vt:
    #     vcfvars, accessions = FileReader.read_vcf(options.var_file)
    #
    # mart_db = MartsAdapter(biomart="http://grch37.ensembl.org")
    #
    # transcript_gen = g.generate_transcripts_from_variants(vcfvars, mart_db, id_type=EIdentifierTypes.REFSEQ)
    # transcripts = [x for x in transcript_gen if x.vars]
    # transcript_gen = g.generate_transcripts_from_variants(vcfvars, mart_db, id_type=EIdentifierTypes.REFSEQ)
    # protein_gen = g.generate_proteins_from_transcripts(transcript_gen)
    # proteins = [x for x in protein_gen if x.vars]
    # for p in proteins:
    #     p.gene_id = p.vars.values()[0][0].gene
    #
    #
    # for t in transcripts:
    #     t.gene_id = t.vars.values()[0].gene
    #

    vt = os.path.splitext(options.var_file)[-1]
    if ".vcf" == vt:
        vcfvars, accessions = FileReader.read_vcf(options.var_file)
    elif ".GSvar" == vt:
        pass
        # vcfvars = FileReader.read_GSvar(options.var_file)
    else:
        m = "Could not read variants {f}, aborting.".format(f=options.var_file)
        logging.error(m)
        print m
        sys.exit(1)

    mart_db = MartsAdapter(biomart="http://grch37.ensembl.org"
                           )  # TODO guess id_type for mart_db from accessions

    transcript_gen = g.generate_transcripts_from_variants(
        vcfvars, mart_db, id_type=EIdentifierTypes.REFSEQ)

    protein_gen = g.generate_proteins_from_transcripts(transcript_gen)
    proteins = [x for x in protein_gen if x.vars]  # removing unvaried

    for p in proteins:
        p.gene_id = p.vars.values(
        )[0][0].gene  # assume gene name from first variant

    proteins = [p for p in proteins
                if not is_stop_gain(p)]  # kick out stop gains

    # First exit option
    if not (options.predict_with or options.filter) and options.only_proteins:
        if options.basefasta_path:
            # TODO - replace from base fasta
            print "N/A"
            sys.exit(0)
        else:
            e = proteins_to_fasta(proteins)
            with open(options.outfile_path, 'w') as f:
                f.write(e)
            sys.exit(0)

    # From now on, digestion must be set somehow
    if not options.digest:
        digest = 9
    else:
        digest = options.digest
    peptide_gen = g.generate_peptides_from_proteins(proteins, digest)
    peptides = [x for x in peptide_gen]
    peptides_var = [
        x for x in peptides if any(
            x.get_variants_by_protein(y) for y in x.proteins.keys())
    ]  # removing unvaried

    # Second exit option
    if not (options.predict_with or options.filter):
        e = peptides_to_fasta(peptides_var)
        with open(options.outfile_path, 'w') as f:
            f.write(e)
        sys.exit(0)

    # From now on, predictions are needed
    try:
        target_alleles_set = set(
            FileReader.read_lines(options.alleles, in_type=Allele))
    except Exception as e:
        m = "Could not read alleles file {f}, aborting.".format(
            f=options.alleles)
        logging.error(m)
        print m, "what:", str(e)
        sys.exit(1)

    try:
        ttn = EpitopePredictorFactory(options.predict_with)
    except Exception as e:
        m = "Could not initialize prediction method {f}, aborting.".format(
            f=options.predict_with)
        logging.error(m)
        print m
        sys.exit(1)

    try:
        preds = ttn.predict(peptides_var, alleles=target_alleles_set)
    except Exception as e:
        print "something went wrong with the prediction", options.inf, options.predict_with, "what:", str(
            e)
        sys.exit(1)

    # punch prediction results in peptide metadata (inside pandas dataframe)
    #PRED_METH = set()
    for i, row in preds.iterrows():
        for j in i[1:]:
            i[0].log_metadata(j, dict(zip(row.index, row.values)))
            #PRED_METH.add(j)  # need that later

    # Third exit option
    if not options.filter:
        if options.only_proteins:
            if options.basefasta_path:
                # TODO - replace from base fasta plus prediction annotation
                print "N/A"
                sys.exit(0)
            else:
                prs = annotate_protein_from_peptides(preds)
                e = proteins_to_fasta(prs)
                with open(options.outfile_path, 'w') as f:
                    f.write(e)
                sys.exit(0)
        else:
            e = peptides_to_fasta(preds)
            with open(options.outfile_path, 'w') as f:
                f.write(e)
            sys.exit(0)

    # kick out nonbinder
    preds_f = preds[(preds > options.filter).any(axis=1)]

    # Fourth exit option
    if options.only_proteins:
        if options.basefasta_path:
            # TODO - replace from base fasta binders only plus prediction annotation
            print "N/A"
            sys.exit(0)
        else:
            prs = annotate_protein_from_peptides(preds_f)
            e = proteins_to_fasta(prs)
            with open(options.outfile_path, 'w') as f:
                f.write(e)
            sys.exit(0)
    else:
        e = peptides_to_fasta(preds_f)
        with open(options.outfile_path, 'w') as f:
            f.write(e)
        sys.exit(0)
示例#28
0
def main():
    #Specify CTD interface
    # Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them.
    model = argparse.ArgumentParser(description='Process some integers.')

    model.add_argument('-m',
        '--method',
        type=str,
        choices=EpitopePredictorFactory.available_methods().keys(),
        default="bimas",
        help='The name of the prediction method'
        )

    model.add_argument('-v',
        '--version',
        type=str,
        default="",
        help='The version of the prediction method'
        )

    model.add_argument('-i',
        '--input',
        type=str,
        required=True,
        help='Path to the input file'
        )

    model.add_argument('-t',
        '--type',
        choices=["fasta","peptide"],
        type=str,
        default="fasta",
        help='The data type of the input (fasta, peptide list)'
        )

    model.add_argument('-l',
        '--length',
        choices=range(8, 18),
        type=int,
        default=9,
        help='The length of peptides'
        )

    model.add_argument('-a',
        '--alleles',
        type=str,
        required=True,
        help='Path to the allele file (one per line in new nomenclature)'
        )

    model.add_argument('-op',
        '--options',
        type=str,
        default="",
        help="Additional options that get directly past to the tool"
    )

    model.add_argument('-o',
        '--output',
        type=str,
        required=True,
        help='Path to the output file'
        )

    args = model.parse_args()


    #fasta protein
    if args.type == "fasta":
        with open(args.input, 'r') as f:
            first_line = f.readline()
        sep_pos = 1 if first_line.count("|") else 0
        proteins = read_fasta(args.input, in_type=Protein, id_position=sep_pos)
        peptides = generate_peptides_from_proteins(proteins, args.length)
    elif args.type == "peptide":
        peptides = read_lines(args.input, in_type=Peptide)
    else:
        sys.stderr.write('Input type not known\n')
        return -1

    #read in alleles
    alleles = read_lines(args.alleles, in_type=Allele)
    if args.version == "":
        result = EpitopePredictorFactory(args.method).predict(peptides, alleles, options=args.options)
    else:
        result = EpitopePredictorFactory(args.method, version=args.version).predict(peptides, alleles,
                                                                 options=args.options)

    #write to TSV columns sequence method allele-scores...,protein-id/transcript-id
    with open(args.output, "w") as f:
        proteins = "\tAntigen ID" if args.type == "fasta" else ""
        alleles = result.columns
        f.write("Sequence\tMethod\t"+"\t".join(a.name for a in alleles)+proteins+"\n")
        for index, row in result.iterrows():
            p = index[0]
            method = index[1]
            proteins =  "\t"+",".join( prot.transcript_id for prot in p.get_all_proteins()) if args.type == "fasta" else ""
            f.write(str(p)+"\t"+method+"\t"+"\t".join("%.3f"%row[a] for a in alleles)+proteins+"\n")

    return 0
示例#29
0
def main():
    parser = argparse.ArgumentParser(
        description=
        """The software is a novel approach to construct epitope-based string-of-beads
vaccines in optimal order and with sequence-optimized spacers of flexible length
such that the recovery of contained epitopes is maximized and immunogenicity of 
arising neo-epitopes is reduced. """)
    parser.add_argument("-i",
                        "--input",
                        required=True,
                        help="File containing epitopes (one peptide per line)")
    parser.add_argument(
        "-a",
        "--alleles",
        required=True,
        help=
        "Specifies file containing HLA alleles with corresponding HLA probabilities (one HLA per line)"
    )

    #parameters of the model
    parser.add_argument(
        "-k",
        "--max_length",
        default=6,
        type=int,
        help="Specifies the max. length of the spacers (default 6)")
    parser.add_argument(
        "-al",
        "--alpha",
        default=0.99,
        type=float,
        help=
        "Specifies the first-order preference of the user in the model [0,1] (default 0.99)"
    )
    parser.add_argument(
        "-be",
        "--beta",
        default=0.0,
        type=float,
        help=
        "Specifies the second-order preference of the user in the model [0,1] (default 0)."
    )

    parser.add_argument(
        "-cp",
        "--cleavage_prediction",
        default="PCM",
        help=
        "Specifies the used cleavage prediction method (default PCM) [available: PCM, PROTEASMM_C, PROTEASMM_S]"
    )
    parser.add_argument(
        "-ep",
        "--epitope_prediction",
        default="Syfpeithi",
        help=
        "Specifies the used epitope prediction method (default Syfpeithi) [available: Syfpeithi, BIMAS, SMM, SMMPMBEC]"
    )
    parser.add_argument(
        "-thr",
        "--threshold",
        default=20,
        type=float,
        help=
        "Specifies epitope prediction threshold for SYFPEITHI (default 20).")

    parser.add_argument("-o",
                        "--output",
                        required=True,
                        help="Specifies the output file.")
    parser.add_argument(
        "-t",
        "--threads",
        type=int,
        default=None,
        help=
        "Specifies number of threads. If not specified all available logical cpus are used."
    )

    parser.add_argument(
        "--ips-solver",
        default="cplex",
        choices=["cplex", "cbc"],
        help=
        "Executable name of the IPS solver. Executable needs to be available in PATH."
    )

    parser.add_argument("--tsp-solution",
                        default="approximate",
                        choices=["approximate", "optimal"],
                        help="Type of solution of the TSP")

    parser.add_argument(
        "--random-order",
        action="store_true",
        help=
        "Indicate whether to generate a random ordered string-of-beads polypeptide"
    )

    parser.add_argument(
        "--seed",
        type=int,
        default=1,
        help="Seed for random ordering of string-of-beads polypeptide")

    args = parser.parse_args()

    #parse input
    peptides = list(FileReader.read_lines(args.input, in_type=Peptide))
    #read in alleles
    alleles = generate_alleles(args.alleles)

    if args.cleavage_prediction.upper() not in [
            "PCM", "PROTEASMM_C", "PROTEASMM_S"
    ]:
        print "Specified cleavage predictor is currently not supported. Please choose either PCM, PROTEASMM_C, or PROTEASMM_S"
        sys.exit(-1)

    if args.epitope_prediction.upper() not in [
            "SYFPEITHI", "BIMAS", "SMM", "SMMPMBEC"
    ]:
        print "Specified cleavage predictor is currently not supported. Please choose either Syfpeithi, BIMAS, SMM, SMMPMBEC"
        sys.exit(-1)

    #set-up model
    cl_pred = CleavageSitePredictorFactory(args.cleavage_prediction)
    epi_pred = EpitopePredictorFactory(args.epitope_prediction)

    thr = {a.name: args.threshold for a in alleles}

    solver = EpitopeAssemblyWithSpacer(peptides,
                                       cl_pred,
                                       epi_pred,
                                       alleles,
                                       k=args.max_length,
                                       en=9,
                                       threshold=thr,
                                       solver=args.ips_solver,
                                       alpha=args.alpha,
                                       beta=args.beta,
                                       verbosity=0)

    #solve
    #pre-processing has to be disable otherwise many solver will destroy the symmetry of the problem
    #how to do this is dependent on the solver used. For CPLEX it is preprocessing_presolve=n
    threads = mp.cpu_count() if args.threads is None else args.threads

    if args.tsp_solution == "approximate":
        svbws = solver.approximate(threads=threads,
                                   options={
                                       "preprocessing_presolve": "n",
                                       "threads": 1
                                   })
    else:
        svbws = solver.solve(threads=threads,
                             options={
                                 "preprocessing_presolve": "n",
                                 "threads": 1
                             })

    # Generate random ordered string-of-breads, but still uses optimal spacers
    # determined from the above solve function.
    if args.random_order:
        print "Generating a randomly ordered polypeptide"
        random.seed(args.seed)
        random_order_sob = []
        random.shuffle(peptides)
        for i in range(len(peptides)):

            # Break from loop once we hit the last peptide
            if i == len(peptides) - 1:
                random_order_sob.extend([Peptide(str(peptides[i]))])
                break

            left_peptide = str(peptides[i])
            right_peptide = str(peptides[i + 1])
            opt_spacer = solver.spacer[(left_peptide, right_peptide)]

            # Right peptide gets added in the next iteration
            random_order_sob.extend(
                [Peptide(left_peptide),
                 Peptide(opt_spacer)])

        svbws = random_order_sob

    print
    print "Resulting String-of-Beads: ", "-".join(map(str, svbws))
    print
    with open(args.output, "w") as f:
        f.write("-".join(map(str, svbws)))
 def test_wrong_allele_input(self):
     with self.assertRaises(ValueError):
         EpitopePredictorFactory("NetMHC").predict(self.mhcI,
                                                   alleles=self.transcript)