def predict(self, peptides, alleles=None, binary=False, **kwargs): # test whether one peptide or a list if not isinstance(peptides, list): peptides = [peptides] # if no alleles are specified do predictions for all supported alleles if alleles is None: alleles = self.supportedAlleles else: # filter for supported alleles alleles = filter(lambda a: a in self.supportedAlleles, alleles) # fetch peptides as strings peptides = [str(peptide) for peptide in peptides] # write peptides temporarily, new line separated tmp_input_file = tempfile.NamedTemporaryFile().name with open(tmp_input_file, 'wb') as file: for peptide in peptides: file.write(peptide + "\n") alleles = self.convert_alleles(alleles) result = {} # predict binding affinities for a in alleles: allele_repr = self.revert_allele_repr(a) result[allele_repr] = {} tmp_output_file = tempfile.NamedTemporaryFile().name mhcnuggets_predict(class_='I', peptides_path=tmp_input_file, mhc=a, output=tmp_output_file + a) # read predicted binding affinities back with open(tmp_output_file + a, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=' ', quotechar='|') # skip header reader.next() # assign binding affinities for row in reader: content = row[0].split(',') peptide = content[0] binding_affinity = content[1] if binary: if binding_affinity <= 500: result[allele_repr][peptide] = 1.0 else: result[allele_repr][peptide] = 0.0 else: result[allele_repr][peptide] = binding_affinity # create EpitopePredictionResult object. This is a multi-indexed DataFrame # with Peptide and Method as multi-index and alleles as columns df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples( [tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
def predict(self, peptides, alleles=None, binary=False, **kwargs): # test whether one peptide or a list if not isinstance(peptides, list): peptides = [peptides] # if no alleles are specified do predictions for all supported alleles if alleles is None: alleles = self.supportedAlleles else: # filter for supported alleles alleles = filter(lambda a: a in self.supportedAlleles, alleles) alleles = self.convert_alleles(alleles) # prepare results dictionary result = defaultdict(defaultdict) # keep input peptide objects for later use peptide_objects = {} for peptide in peptides: peptide_objects[str(peptide)] = peptide # group peptides by length pep_groups = peptide_objects.keys() pep_groups.sort(key=len) for length, peps in itertools.groupby(pep_groups, key=len): if length not in self.supportedLength: logging.warn( "Peptide length must be at least %i or at most %i for %s but is %i" % (min(self.supportedLength), max( self.supportedLength), self.name, length)) continue peps = list(peps) # write peptides temporarily, new line separated tmp_input_file = tempfile.NamedTemporaryFile().name with open(tmp_input_file, 'wb') as file: for peptide in peps: file.write(peptide + "\n") # predict bindings for a in alleles: allele_repr = self.revert_allele_repr(a) # workaround for mhcnuggets file i/o buffer bug mhcnuggets_output = cStringIO.StringIO() with capture_stdout(mhcnuggets_output): mhcnuggets_predict(class_='II', peptides_path=tmp_input_file, mhc=a) # read predicted binding affinities back mhcnuggets_output.seek(0) reader = csv.reader(mhcnuggets_output, delimiter=' ', quotechar='|') # skip log statements from mhcnuggets and header for row in reader: if row[0] == 'peptide,ic50': break print ' '.join(row) for row in reader: content = row[0].split(',') # get original peptide object peptide = peptide_objects[content[0]] binding_affinity = content[1] if binary: if binding_affinity <= 500: result[allele_repr][peptide] = 1.0 else: result[allele_repr][peptide] = 0.0 else: result[allele_repr][peptide] = binding_affinity if not result: raise ValueError( "No predictions could be made with " + self.name + " for given input. Check your epitope length and HLA allele combination." ) # create EpitopePredictionResult object. This is a multi-indexed DataFrame # with Peptide and Method as multi-index and alleles as columns df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples( [tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result