Пример #1
0
    def setUp(self):
        self.proteins=[]
        self.alleles = [Allele("HLA-A*01:01"),Allele("HLA-B*07:02"), Allele("HLA-C*03:01")]
        self.peptides = [Peptide(p) for p in """SFSIFLLAL
GHRMAWDMM
VYEADDVIL
CFTPSPVVV
FLLLADARV
GPADGMVSK
YLYDHLAPM
GLRDLAVAV
GPTPLLYRL
TWVLVGGVL
IELGGKPAL
LAGGVLAAV
QYLAGLSTL
NFVSGIQYL
VLSDFKTWL
ARPDYNPPL
KLLPRLPGV
RHTPVNSWL
GLYLFNWAV
ALYDVVSTL
RRCRASGVL
WPLLLLLLA
VTYSLTGLW
YFVIFFVAA""".split()]
        self.result= EpitopePredictorFactory("BIMAS").predict(self.peptides, self.alleles)
        self.thresh = {"A*01:01":10,"B*07:02":10,"C*03:01":10}
Пример #2
0
 def test_unsupported_allele_length_combination(self):
     """
     Tests default functions
     needs GLPK installed
     :return:
     """
     epi_pred = EpitopePredictorFactory("Syfpeithi")
     cl_pred = CleavageSitePredictorFactory("PCM")
     alleles = [
         Allele("HLA-A*02:01", prob=0.5),
         Allele("HLA-A*26:01", prob=0.5)
     ]
     sbws = EpitopeAssemblyWithSpacer(self.epis,
                                      cl_pred,
                                      epi_pred,
                                      alleles,
                                      solver="cbc")
     sol = sbws.solve()
     print sol
     assert all(i == str(j)
                for i, j in zip(["GHRMAWDMM", "HH", "VYEADDVIL"], sol))
Пример #3
0
 def test_unsupported_allele_length_combination_exception(self):
     """
     Tests default functions
     needs GLPK installed
     :return:
     """
     epi_pred = EpitopePredictorFactory("Syfpeithi")
     cl_pred = CleavageSitePredictorFactory("PCM")
     alleles = [Allele("HLA-A*26:01", prob=0.5)]
     sbws = EpitopeAssemblyWithSpacer(self.epis,
                                      cl_pred,
                                      epi_pred,
                                      alleles,
                                      solver="cbc")
     self.assertRaises(ValueError, sbws.solve)
Пример #4
0
 def revert_allele_repr(self, name):
     if name.startswith("H-2-"):
         return MouseAllele(name)
     else:
         # since we need to support single and double mhc2 alleles
         name_split = name.split('-')
         if len(name_split) > 2:
             return CombinedAllele(name_split[0] + '-' +
                                   name_split[1][:4] + '*' +
                                   name_split[1][4:] + '-' +
                                   name_split[2][:4] + '*' +
                                   name_split[2][4:])
         else:
             return Allele(name_split[0] + '-' + name_split[1][:4] +
                           '*' + name_split[1][4:])
Пример #5
0
    def setUp(self):
        epis ="""GHRMAWDMM
                 VYEADDVIL""".split("\n")

        self.epis = map(lambda x: Peptide(x.strip()),epis)
        self.alleles =[Allele("HLA-A*02:01",prob=0.5)]
Пример #6
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no
        :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        def __load_allele_model(allele, length):
            allele_model = "%s_%i" % (allele, length)
            return getattr(
                __import__("Fred2.Data.pssms." + self.name + ".mat." +
                           allele_model,
                           fromlist=[allele_model]), allele_model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(al), al)
            }
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(alleles),
                                                alleles)
            }

        result = {}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(),
                                              key=lambda x: len(x)):
            peps = list(peps)
            #dynamicaly import prediction PSSMS for alleles and predict
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s" %
                              (length, self.name))
                continue

            for a in alleles_string.keys():
                try:
                    pssm = __load_allele_model(a, length)
                except ImportError:
                    warnings.warn("No model found for %s with length %i" %
                                  (alleles_string[a], length))
                    continue

                result[alleles_string[a]] = {}
                ##here is the prediction and result object missing##
                for p in peps:
                    score = sum(pssm[i].get(p[i], 0.0)
                                for i in xrange(length)) + pssm.get(
                                    -1, {}).get("con", 0)
                    score /= -length
                    score -= pssm[-1]["intercept"]
                    score /= pssm[-1]["slope"]
                    score = math.pow(10, score)
                    if score < 0.0001:
                        score = 0.0001
                    elif score > 1e6:
                        score = 1e6
                    result[alleles_string[a]][pep_seqs[p]] = score
                    #print a, score, result

        if not result:
            raise ValueError("No predictions could be made with " + self.name +
                             " for given input. Check your"
                             "epitope length and HLA allele combination.")

        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples(
            [tuple((i, self.name)) for i in df_result.index],
            names=['Seq', 'Method'])
        return df_result
Пример #7
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no
        :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`pandas.DataFrame` object with the prediction results
        :rtype: :class:`pandas.DataFrame`
        """
        def __load_allele_model(allele, length):
            allele_model = "%s" % allele
            return getattr(
                __import__("Fred2.Data.pssms." + self.name + ".mat." +
                           allele_model,
                           fromlist=[allele_model]), allele_model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(al), al)
            }
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(alleles),
                                                alleles)
            }

        result = {}
        pep_groups = pep_seqs.keys()
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):

            if self.supportedLength is not None and length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s" %
                              (length, self.name))
                continue

            peps = list(peps)
            for a, allele in alleles_string.iteritems():

                if alleles_string[a] not in result:
                    result[allele] = {}

                #load matrix
                try:
                    pssm = __load_allele_model(a, length)
                except ImportError:
                    pssm = []

                importance = self.__importance  if length <= 9 else \
                             self.__importance[:5] + ((length - 9) * [0.30]) + self.__importance[5:]

                for p in peps:
                    score = sum(
                        self.__log_enrichment.get(p[i], 0.0) * importance[i]
                        for i in xrange(length) if i not in pssm)
                    result[allele][pep_seqs[p]] = score

        if not result:
            raise ValueError("No predictions could be made with " + self.name +
                             " for given input. Check your"
                             "epitope length and HLA allele combination.")

        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples(
            [tuple((i, self.name)) for i in df_result.index],
            names=['Seq', 'Method'])
        return df_result
def __main__():
    parser = argparse.ArgumentParser(
        "Write out information about supported models by Fred2 for installed predictor tool versions."
    )
    parser.add_argument('-p',
                        "--peptides",
                        help="File with one peptide per line")
    parser.add_argument('-c',
                        "--mhcclass",
                        default=1,
                        help="MHC class I or II")
    parser.add_argument('-l',
                        "--max_length",
                        help="Maximum peptide length",
                        type=int)
    parser.add_argument('-ml',
                        "--min_length",
                        help="Minimum peptide length",
                        type=int)
    parser.add_argument('-a',
                        "--alleles",
                        help="<Required> MHC Alleles",
                        required=True,
                        type=str)
    parser.add_argument('-t',
                        '--tools',
                        help='Tools requested for peptide predictions',
                        required=True,
                        type=str)
    parser.add_argument('-v',
                        '--versions',
                        help='<Required> File with used software versions.',
                        required=True)
    args = parser.parse_args()
    selected_methods = [item for item in args.tools.split(',')]
    with open(args.versions, 'r') as versions_file:
        tool_version = [(row[0].split()[0], str(row[1]))
                        for row in csv.reader(versions_file, delimiter=":")]
        # NOTE this needs to be updated, if a newer version will be available via Fred2 and should be used in the future
        tool_version.append(('syfpeithi', '1.0'))  # how to handle this?
        # get for each method the corresponding tool version
        methods = {
            method.strip(): version.strip()
            for tool, version in tool_version for method in selected_methods
            if tool.lower() in method.lower()
        }

    # get the alleles
    alleles = [Allele(a) for a in args.alleles.split(";")]

    peptide_lengths = []
    if (args.peptides):
        peptides = read_peptide_input(args.peptides)
        peptide_lengths = set([len(pep) for pep in peptides])
    else:
        peptide_lengths = range(args.min_length, args.max_length + 1)

    with open("model_report.txt", 'w') as output:
        # check if requested tool versions are supported
        for method, version in methods.items():
            if version not in EpitopePredictorFactory.available_methods()[
                    method.lower()]:
                raise ValueError("The specified version " + version + " for " +
                                 method + " is not supported by Fred2.")

        # check if requested alleles are supported
        support_all_alleles = True
        no_allele_support = True
        for a in alleles:
            supported = False
            for method, version in methods.items():
                predictor = EpitopePredictorFactory(method, version=version)

                if a not in sorted(predictor.supportedAlleles):
                    output.write("Allele " + convert_allele_back(a) +
                                 " is not supported by " + method + " " +
                                 version + ".\n")
                else:
                    supported = True

            if not supported:
                output.write(
                    "Allele " + convert_allele_back(a) +
                    " is not supported by any of the requested tools.\n")
                logger.warning(
                    "Allele " + convert_allele_back(a) +
                    " is not supported by any of the requested tools.")
                support_all_alleles = False
            else:
                no_allele_support = False
        if support_all_alleles:
            output.write(
                "All selected alleles are supported by at least one of the requested tools.\n"
            )
        if no_allele_support:
            output.write(
                "None of the specified alleles is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models.\n"
            )
            raise ValueError(
                "None of the specified alleles is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models."
            )

        output.write("\n")
        # check if requested lengths are supported
        support_all_lengths = True
        no_length_support = True
        for l in peptide_lengths:
            supported = False
            for method, version in methods.items():
                predictor = EpitopePredictorFactory(method, version=version)

                if l not in sorted(predictor.supportedLength):
                    output.write("Peptide length " + str(l) +
                                 " is not supported by " + method + " " +
                                 version + ".\n")
                else:
                    supported = True

            if not supported:
                output.write(
                    "Peptide length " + str(l) +
                    " is not supported by any of the requested tools.\n")
                logger.warning(
                    "Peptide length " + str(l) +
                    " is not supported by any of the requested tools.")
                support_all_lengths = False
            else:
                no_length_support = False
        if support_all_lengths:
            output.write(
                "All selected or provided peptide lengths are supported by at least one of the requested tools.\n"
            )
        if no_length_support:
            output.write(
                "None of the peptide lengths is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models.\n"
            )
            raise ValueError(
                "None of the peptide lengths is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models."
            )
Пример #9
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)}

        # group peptides by length and
        result = {}

        model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name)
        # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name))
        model = svmlight.read_model(model_path)

        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)):
            # load svm model
            peps = list(peps)
            if length != 9:
                warnings.warn("Peptide length of %i is not supported by UniTope" % length)
                continue

            for a in allales_string.keys():
                if allales_string[a].name in self.supportedAlleles:
                    encoding = self.encode(peps, a)
                    pred = svmlight.classify(model, encoding.values())
                    result[allales_string[a]] = {}
                    for pep, score in itertools.izip(encoding.keys(), pred):
                        result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your \
            epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
Пример #10
0
 def setUp(self):
     #Peptides of different length 9,10,11,12,13,14,15
     self.peptides_mhcI = [Peptide("SYFPEITHI"), Peptide("IHTIEPFYS")]
     self.peptides_fragment = [Peptide("IHTIEPFYSAA")]
     self.mhcI = [Allele("HLA-B*15:01"), Allele("HLA-A*02:01")]
     self.mhcII = [Allele("HLA-DRB1*07:01"), Allele("HLA-DRB1*15:01")]
Пример #11
0
 def revert_allele_repr(self, name):
     if name.startswith("H-2-"):
         return MouseAllele(name)
     else:
         return Allele(name[:5] + '*' + name[5:7] + ':' + name[7:])
Пример #12
0
 def revert_allele_repr(self, name):
     if name.startswith("H-2-"):
         return MouseAllele(name)
     else:
         return Allele(name)
def __main__():
    parser = argparse.ArgumentParser(version=VERSION)
    parser.add_argument('-c',
                        dest="mhcclass",
                        help='<Required> MHC class',
                        required=True)
    parser.add_argument('-in',
                        dest="inf",
                        help='<Required> full path to the input file',
                        required=True)
    parser.add_argument('-out',
                        dest="out",
                        help="<Required> full path to the output file",
                        required=True)
    parser.add_argument(
        '-allele',
        dest="allele",
        help=
        "<Required> full path to an allele file, if 'in', allele file will be deduced from in file name",
        required=True)
    parser.add_argument(
        '-dirallele',
        dest="dirallele",
        help=
        "for use with '-allele in', describes full base path to the allele files"
    )

    options = parser.parse_args()
    if len(sys.argv) <= 1:
        parser.print_help()
        sys.exit(1)

    if not (options.inf or options.out or options.allele):
        parser.print_help()
        sys.exit(1)

    target_alleles_set = set()
    #Fred2.FileReader.read_lines is broken
    #alleles = FileReader.read_lines(options.allele, type=Allele)
    if options.allele == "in" and options.dirallele:
        if "_W_" not in options.inf:
            print "No class 1 type run detected."
            sys.exit(0)
        af = None
        for sp in options.inf.split("_"):
            if sp.startswith("BD"):
                af = join(options.dirallele, sp.split("-")[1] + ".allele")
        with open(af, 'r') as handle:
            for line in handle:
                target_alleles_set.add(Allele(line.strip().upper()))
    else:
        with open(options.allele, 'r') as handle:
            for line in handle:
                target_alleles_set.add(Allele(line.strip().upper()))

    if not target_alleles_set:
        parser.print_help()
        sys.exit(1)

    if options.mhcclass == "I":
        ttn = EpitopePredictorFactory('netmhcpan', version='3.0')
        lowerBound = 8
        upperBound = 12
    elif options.mhcclass == "II":
        ttn = EpitopePredictorFactory('netmhcIIpan', version='3.1')
        lowerBound = 15
        upperBound = 25

    pros = list()
    peps = list()
    f = oms.IdXMLFile()
    f.load(options.inf, pros, peps)

    pepstr = set()
    for pep in peps:
        for h in pep.getHits():
            #if "decoy" not in h.getMetaValue("target_decoy"):
            unmod = h.getSequence().toUnmodifiedString()
            if lowerBound <= len(unmod) <= upperBound \
                    and 'U' not in unmod and 'B' not in unmod and 'X' not in unmod and 'Z' not in unmod:
                pepstr.add(h.getSequence().toUnmodifiedString())

    es = [Peptide(x) for x in pepstr]

    try:
        preds_n = ttn.predict(es, alleles=target_alleles_set)
    except Exception as e:
        print "something went wrong with the netMHC prediction", options.inf, "what:", str(
            e)
        sys.exit(1)

    #only max
    preds = dict()
    for index, row in preds_n.iterrows():
        score = row.max()  #bigger_is_better
        allele = str(row.idxmax())
        categ = categorize(score)
        seq = row.name[0].tostring()
        if categ:
            preds[seq] = (allele, categ, score)

    npeps = list()
    for pep in peps:
        hits = pep.getHits()
        nhits = list()
        for h in hits:
            if h.getSequence().toUnmodifiedString() in preds:
                x = preds[h.getSequence().toUnmodifiedString()]
                h.setMetaValue('binder', x[0])
                h.setMetaValue(str(x[1]), x[2])
                nhits.append(h)
            else:
                nhits.append(h)
        pep.setHits(nhits)

    f.store(options.out, pros, peps)
Пример #14
0
    def setUp(self):
        epis = """GHRMAWDMM
                 VYEADDVIL""".split("\n")

        self.epis = [Peptide(x.strip()) for x in epis]
        self.alleles = [Allele("HLA-A*02:01", prob=0.5)]