Пример #1
0
        def predict(self, peptides, alleles=None, binary=False, **kwargs):

            # test whether one peptide or a list
            if not isinstance(peptides, list):
                peptides = [peptides]

            # if no alleles are specified do predictions for all supported alleles
            if alleles is None:
                alleles = self.supportedAlleles
            else:
                # filter for supported alleles
                alleles = filter(lambda a: a in self.supportedAlleles, alleles)

            # fetch peptides as strings
            peptides = [str(peptide) for peptide in peptides]

            # write peptides temporarily, new line separated
            tmp_input_file = tempfile.NamedTemporaryFile().name
            with open(tmp_input_file, 'wb') as file:
                for peptide in peptides:
                    file.write(peptide + "\n")

            alleles = self.convert_alleles(alleles)
            result = {}
            # predict binding affinities
            for a in alleles:
                allele_repr = self.revert_allele_repr(a)
                result[allele_repr] = {}
                tmp_output_file = tempfile.NamedTemporaryFile().name
                mhcnuggets_predict(class_='I',
                                   peptides_path=tmp_input_file,
                                   mhc=a,
                                   output=tmp_output_file + a)

                # read predicted binding affinities back
                with open(tmp_output_file + a, 'rb') as csvfile:
                    reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
                    # skip header
                    reader.next()

                    # assign binding affinities
                    for row in reader:
                        content = row[0].split(',')
                        peptide = content[0]
                        binding_affinity = content[1]
                        if binary:
                            if binding_affinity <= 500:
                                result[allele_repr][peptide] = 1.0
                            else:
                                result[allele_repr][peptide] = 0.0
                        else:
                            result[allele_repr][peptide] = binding_affinity

            # create EpitopePredictionResult object. This is a multi-indexed DataFrame
            # with Peptide and Method as multi-index and alleles as columns
            df_result = EpitopePredictionResult.from_dict(result)
            df_result.index = pandas.MultiIndex.from_tuples(
                [tuple((i, self.name)) for i in df_result.index],
                names=['Seq', 'Method'])
            return df_result
Пример #2
0
        def predict(self, peptides, alleles=None, binary=False, **kwargs):

            # test whether one peptide or a list
            if not isinstance(peptides, list):
                peptides = [peptides]

            # if no alleles are specified do predictions for all supported alleles
            if alleles is None:
                alleles = self.supportedAlleles
            else:
                # filter for supported alleles
                alleles = filter(lambda a: a in self.supportedAlleles, alleles)
            alleles = self.convert_alleles(alleles)

            # prepare results dictionary
            result = defaultdict(defaultdict)

            # keep input peptide objects for later use
            peptide_objects = {}
            for peptide in peptides:
                peptide_objects[str(peptide)] = peptide

            # group peptides by length
            pep_groups = peptide_objects.keys()
            pep_groups.sort(key=len)
            for length, peps in itertools.groupby(pep_groups, key=len):
                if length not in self.supportedLength:
                    logging.warn(
                        "Peptide length must be at least %i or at most %i for %s but is %i"
                        % (min(self.supportedLength), max(
                            self.supportedLength), self.name, length))
                    continue
                peps = list(peps)

                # write peptides temporarily, new line separated
                tmp_input_file = tempfile.NamedTemporaryFile().name
                with open(tmp_input_file, 'wb') as file:
                    for peptide in peps:
                        file.write(peptide + "\n")

                # predict bindings
                for a in alleles:
                    allele_repr = self.revert_allele_repr(a)

                    # workaround for mhcnuggets file i/o buffer bug
                    mhcnuggets_output = cStringIO.StringIO()
                    with capture_stdout(mhcnuggets_output):
                        mhcnuggets_predict(class_='II',
                                           peptides_path=tmp_input_file,
                                           mhc=a)

                    # read predicted binding affinities back
                    mhcnuggets_output.seek(0)
                    reader = csv.reader(mhcnuggets_output,
                                        delimiter=' ',
                                        quotechar='|')
                    # skip log statements from mhcnuggets and header
                    for row in reader:
                        if row[0] == 'peptide,ic50':
                            break
                        print ' '.join(row)

                    for row in reader:
                        content = row[0].split(',')
                        # get original peptide object
                        peptide = peptide_objects[content[0]]
                        binding_affinity = content[1]
                        if binary:
                            if binding_affinity <= 500:
                                result[allele_repr][peptide] = 1.0
                            else:
                                result[allele_repr][peptide] = 0.0
                        else:
                            result[allele_repr][peptide] = binding_affinity

            if not result:
                raise ValueError(
                    "No predictions could be made with " + self.name +
                    " for given input. Check your epitope length and HLA allele combination."
                )

            # create EpitopePredictionResult object. This is a multi-indexed DataFrame
            # with Peptide and Method as multi-index and alleles as columns
            df_result = EpitopePredictionResult.from_dict(result)
            df_result.index = pandas.MultiIndex.from_tuples(
                [tuple((i, self.name)) for i in df_result.index],
                names=['Seq', 'Method'])
            return df_result