def setUp(self): self.seqs = [ Peptide("SYFPEISYFP"), Protein("IHTIEPFYSIHTIEPFYSIHTIEPFYSIHTIEPFYSIHTIEPFYS", "ID-01", "FOXP3") ] self.fragments = [Peptide("FSYFPEITHIR"), Peptide("FIHTIEPFYSR")]
def test_smmtap_abitrary_peptide_length(self): smmtap = TAPPredictorFactory("smmtap") peptides = [ Peptide("SYFPEITHI"), Peptide("IHTIEPFYSA"), Peptide("IHTIEPFYSAA") ] print smmtap.predict(peptides)
def generate_peptides_from_proteins(proteins, window_size, peptides=None): """ Creates all :class:`~Fred2.Core.Peptide.Peptide` for a given window size, from a given :class:`~Fred2.Core.Protein.Protein`. The result is a generator. :param proteins: (Iterable of) protein(s) from which a list of unique peptides should be generated :type proteins: list(:class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Protein.Protein` :param int window_size: Size of peptide fragments :param peptides: A list of peptides to update during peptide generation (usa case: Adding and updating Peptides of newly generated Proteins) :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) :return: A unique generator of peptides :rtype: Generator(:class:`~Fred2.Core.Peptide.Peptide`) """ def gen_peptide_info(protein): # Generate peptide sequences and returns the sequence # #and start position within the protein res = [] seq = str(protein) for i in xrange(len(protein)+1-window_size): # generate peptide fragment end = i+window_size pep_seq = seq[i:end] res.append((pep_seq, i)) return res if isinstance(peptides, Peptide): peptides = [peptides] final_peptides = {} if peptides: for p in peptides: if not isinstance(p, Peptide): raise ValueError("Specified list of Peptides contain non peptide objects") final_peptides[str(p)] = p if isinstance(proteins, Protein): proteins = [proteins] for prot in proteins: if not isinstance(prot, Protein): raise ValueError("Input does contain non protein objects.") # generate all peptide sequences per protein: for (seq, pos) in gen_peptide_info(prot): if all(a in _allowed_aas for a in seq.upper()): t_id = prot.transcript_id if seq not in final_peptides: final_peptides[seq] = Peptide(seq) final_peptides[seq].proteins[t_id] = prot final_peptides[seq].proteinPos[t_id].append(pos) return final_peptides.itervalues()
def test_simple_assembly(self): """ Simple test if everything works. Solution manually tested for optimality. :return: """ pred = CleavageSitePredictorFactory("PCM") assembler = EpitopeAssembly(self.peptides, pred, solver="glpk", verbosity=0) r = assembler.solve() self.assertEqual(r, [Peptide("YLYDHLAPM"), Peptide("ALYDVVSTL"), Peptide("KLLPRLPGV")])
def generate_peptides_from_protein(proteins, window_size, peptides=None): """ Creates all peptides for a given window size, from a given protein. The result is a generator. :param Protein protein: (iterable of) protein(s) from which a list of unique peptides should be generated :param int window_size: size of peptide fragments :param list(Peptide) peptides: a list of peptides to update during peptide generation (usa case: Adding and updating Peptides of newly generated Proteins) """ def gen_peptide_info(protein): # Generate peptide sequences and find the variants within each res = [] seq = str(protein) for i in xrange(len(protein)+1-window_size): # generate peptide fragment end = i+window_size pep_seq = seq[i:end] res.append((pep_seq, i)) return res if isinstance(peptides, Peptide): peptides = [peptides] if peptides and any(not isinstance(p, Peptide) for p in peptides): raise ValueError("Specified list of Peptides contain non peptide objects") final_peptides = {} if peptides is None else {str(p):p for p in peptides} if isinstance(proteins, Protein): proteins = [proteins] for prot in proteins: if not isinstance(prot, Protein): raise ValueError("Input does contain non protein objects.") # generate all peptide sequences per protein: for (seq, pos) in gen_peptide_info(prot): t_id = prot.transcript_id if seq not in final_peptides: final_peptides[seq] = Peptide(seq) final_peptides[seq].proteins[t_id] = prot final_peptides[seq].proteinPos[t_id].append(pos) return final_peptides.values()
def setUp(self): self.proteins=[] self.alleles = [Allele("HLA-A*01:01"),Allele("HLA-B*07:02"), Allele("HLA-C*03:01")] self.peptides = [Peptide(p) for p in """SFSIFLLAL GHRMAWDMM VYEADDVIL CFTPSPVVV FLLLADARV GPADGMVSK YLYDHLAPM GLRDLAVAV GPTPLLYRL TWVLVGGVL IELGGKPAL LAGGVLAAV QYLAGLSTL NFVSGIQYL VLSDFKTWL ARPDYNPPL KLLPRLPGV RHTPVNSWL GLYLFNWAV ALYDVVSTL RRCRASGVL WPLLLLLLA VTYSLTGLW YFVIFFVAA""".split()] self.result= EpitopePredictorFactory("BIMAS").predict(self.peptides, self.alleles) self.thresh = {"A*01:01":10,"B*07:02":10,"C*03:01":10}
def extractEpitopesAndConservationFromConsensus(consensus_info, epitope_length): # print "consensus_info", consensus_info error = '' epitope_data = [] epitope_antigen_data = {} antigens = consensus_info.keys() conservation = {} epitopes = {} # print "type of consensus_info ",type(consensus_info) for ci in consensus_info.values(): consensus = str(ci[0][0]) frequencies = ci[0][1] # consensus = consensus.upper() # if isValidAASequence(consensus): for i in xrange(len(consensus) - epitope_length + 1): epitope = Peptide(consensus[i:i + epitope_length]) # print "peptide", epitope co = numpy.product(frequencies[i:i + epitope_length]) if not conservation.has_key(epitope): # print "test type prot in epitopes", epitope.proteins conservation[epitope] = co epitopes[epitope] = epitope else: epitope = epitopes[epitope] # print "test type prot in epitopes", epitope.proteins if conservation[epitope] < co: conservation[epitope] = co return (error, conservation)
def read_peptide_input(filename): peptides = [] metadata = [] '''expected columns (min required): id sequence''' with open(filename, 'r') as peptide_input: reader = csv.DictReader(peptide_input, delimiter='\t') for row in reader: pep = Peptide(row['sequence']) for col in row: if col != 'sequence': pep.log_metadata(col, row[col]) metadata.append(col) peptides.append(pep) metadata = set(metadata) return peptides, metadata
def read_peptide_input(filename): peptides = [] '''expected columns (min required): id sequence''' with open(filename, 'r') as peptide_input: # enable listing of protein names for each peptide csv.field_size_limit(600000) reader = csv.DictReader(peptide_input, delimiter='\t') for row in reader: pep = Peptide(row['sequence']) peptides.append(pep) return peptides
def setUp(self): self.peptides = [Peptide("SYFPEITHI"), Peptide("IHTIEPFYS")] testsequences_file = pkg_resources.resource_filename( 'Fred2', path.join('Data', 'examples', 'testSequences.fasta')) with open(testsequences_file, "rU") as handle: records = list(SeqIO.parse(handle, "fasta")) prot_set = [Protein(str(r.seq)) for r in records] unique_test_pep_set = generate_peptides_from_proteins(prot_set, 9) self.selfpeptides = [str(x) for x in unique_test_pep_set] small_prot_set = [ Protein( "MKERRIDMKEKNVKAKAPNKKVLGLTTKIFIALLAGAILGIVLCYLVPDSSFKKDVIVEGILYVIGQGFIRLMKMLVVPLVFCSLVCGSMAIGDTKKLGTVGVRTLAFYLATTALAVVALGVGNLINPGVGLDMSAIQSSAASVETMEATSLTDTILNIIPDNPINSLASGSMLQVIVFALIVGVILAKMGERAETVANFFSQFNDIMMEMTMMIMSLAPIGVFCLISRTFANIGFSAFIPLAKYMIGVLLALAIQCFGVYQILLKIFTGLNPIRFIKKFFPVMAFAFSTATSNATIPMSIDTLSKKVGVSKKISSFTIPLGATINMDGTSIMQGVAVVFAAQAFGIHLTPMDYVTVIGTATLASVGTAGVPSVGLVTLTMVFNSVGLPVEAIGLIMGIDRILDMTRTAVNITGDAVCTTIVAHQNGALDKKVFNETE" ), Protein( "MLKVWIAGASGQIGRALNDVLDPMQIEALNTDLDELDITDTDEVINFGTVNRPDVIINCTGITDTDECEANPEHAYRVNALGARNLSIVARKCGSKIVQLSTDDVFDGQSKKPYTEFDDTNPLTVYGRSKRAGENYVKEFTHKHFVIRSNWVYGHGGHNFVNRVLAAAEAGNGLSVASDQFGSPTSAKDLAKMIMYLISTNEYGTYHVTCRGVCSRYEFAQEILKLAGKDIELRAVPTEQSDLSAVRPPYAVLDNFILRIIEVYDMPDWKESLKEYMDERTED" ) ] small_unique_test_pep_set = generate_peptides_from_proteins( small_prot_set, 9) self.fewselfpeptides = [str(x) for x in small_unique_test_pep_set]
def setUp(self): self.peptides = [Peptide("SYFPEITHI"), Peptide("IHTIEPFYS")]
def approximate(self, start=0, threads=1, options=None): """ Approximates the Eptiope Assembly problem by applying Lin-Kernighan traveling salesman heuristic LKH implementation must be downloaded, compiled, and globally executable. Source code can be found here: http://www.akira.ruc.dk/~keld/research/LKH/ :param int start: Start length for spacers (default 0). :param int threads: Number of threads used for spacer design. Be careful, if options contain solver threads it will allocate threads*solver_threads cores! :param dict(str,str) options: Solver specific options (threads for example) :return: A list of ordered :class:`~Fred2.Core.Peptide.Peptide` :rtype: list(:class:`~Fred2.Core.Peptide.Peptide`) """ def __load_model(name, model): return getattr( __import__("Fred2.Data.pssms." + name + ".mat." + model, fromlist=[model]), model) options = dict() if options is None else options threads = mp.cpu_count() if threads is None else threads pool = mp.Pool(threads) #prepare parameters cn = min(self.__clev_pred.supportedLength) cl_pssm = __load_model(self.__clev_pred.name, self.__clev_pred.name + "_" + str(cn)) cleav_pos = self.__clev_pred.cleavagePos en = self.__en epi_pssms = {} allele_prob = {} for a in self.__alleles: allele_prob[a.name] = a.prob pssm = __load_model( self.__epi_pred.name, "%s_%i" % (self.__epi_pred.convert_alleles([a])[0], en)) for j, v in pssm.iteritems(): for aa, score in v.iteritems(): if self.__epi_pred.name in [ "smm", "smmpmbec", "comblibsidney" ]: epi_pssms[j, aa, a.name] = 1 / 10. - math.log( math.pow(10, score), 50000) self.__thresh = { k: (1 - math.log(v, 50000) if v != 0 else 0) for k, v in self.__thresh.iteritems() } else: epi_pssms[j, aa, a.name] = score if not epi_pssms: raise ValueError( "Selected alleles with epitope length are not supported by the prediction method." ) #print "run spacer designs in parallel using multiprocessing" res = pool.map( _runs_lexmin, ((str(ei), str(ej), i, en, cn, cl_pssm, epi_pssms, cleav_pos, allele_prob, self.__alpha, self.__thresh, self.__solver, self.__beta, options) for i in xrange(start, self.__k + 1) for ei, ej in itr.product(self.__peptides, repeat=2) if ei != ej)) pool.close() pool.join() opt_spacer = {} adj_matrix = {} inf = float("inf") #print res #print "find best scoring spacer for each epitope pair" for ei, ej, score, epi, spacer, c1, c2, non_c in res: if adj_matrix.get((ei, ej), inf) > -min(c1, c2): adj_matrix[(ei, ej)] = -min(c1, c2) opt_spacer[(ei, ej)] = spacer self.spacer = opt_spacer #print "solve assembly with generated adjacency matrix" assembler = EpitopeAssembly(self.__peptides, self.__clev_pred, solver=self.__solver, matrix=adj_matrix) res = assembler.approximate() #generate output sob = [] for i in xrange(len(res) - 1): ei = str(res[i]) ej = str(res[i + 1]) if not i: sob.append(Peptide(ei)) sob.append(Peptide(opt_spacer[ei, ej])) sob.append(Peptide(ej)) return sob
def solve(self, start=0, threads=None, options=None): """ Solve the epitope assembly problem with spacers optimally using integer linear programming. .. note:: This can take quite long and should not be done for more and 30 epitopes max! Also, one has to disable pre-solving steps in order to use this model. :param int start: Start length for spacers (default 0). :param int threads: Number of threads used for spacer design. Be careful, if options contain solver threads it will allocate threads*solver_threads cores! :param dict(str,str) options: Solver specific options as keys and parameters as values :return: A list of ordered :class:`~Fred2.Core.Peptide.Peptide` :rtype: list(:class:`~Fred2.Core.Peptide.Peptide`) """ def __load_model(name, model): return getattr( __import__("Fred2.Data.pssms." + name + ".mat." + model, fromlist=[model]), model) options = dict() if options is None else options threads = mp.cpu_count() if threads is None else threads pool = mp.Pool(threads) #prepare parameters cn = min(self.__clev_pred.supportedLength) cl_pssm = __load_model(self.__clev_pred.name, self.__clev_pred.name + "_" + str(cn)) cleav_pos = self.__clev_pred.cleavagePos en = self.__en epi_pssms = {} allele_prob = {} for a in self.__alleles: allele_prob[a.name] = a.prob pssm = __load_model( self.__epi_pred.name, "%s_%i" % (self.__epi_pred.convert_alleles([a])[0], en)) for j, v in pssm.iteritems(): for aa, score in v.iteritems(): if self.__epi_pred.name in [ "smm", "smmpmbec", "comblibsidney" ]: epi_pssms[j, aa, a.name] = 1 / 10. - math.log( math.pow(10, score), 50000) self.__thresh = { k: (1 - math.log(v, 50000) if v != 0 else 0) for k, v in self.__thresh.iteritems() } else: epi_pssms[j, aa, a.name] = score #print "run spacer designs in parallel using multiprocessing" res = pool.map( _runs_lexmin, ((str(ei), str(ej), i, en, cn, cl_pssm, epi_pssms, cleav_pos, allele_prob, self.__alpha, self.__thresh, self.__solver, self.__beta, options) for i in xrange(start, self.__k + 1) for ei, ej in itr.product(self.__peptides, repeat=2) if ei != ej)) pool.close() pool.join() opt_spacer = {} adj_matrix = {} inf = float("inf") #print res #print "find best scoring spacer for each epitope pair" for ei, ej, score, epi, spacer, c1, c2, non_c in res: #print ei,spacer,ej,min(c1,c2),c1,c2 if adj_matrix.get((ei, ej), inf) > -min(c1, c2): adj_matrix[(ei, ej)] = -min(c1, c2) opt_spacer[(ei, ej)] = spacer self.spacer = opt_spacer #print "solve assembly with generated adjacency matrix" assembler = EpitopeAssembly(self.__peptides, self.__clev_pred, solver=self.__solver, matrix=adj_matrix) res = assembler.solve(options=options) #generate output sob = [] for i in xrange(len(res) - 1): ei = str(res[i]) ej = str(res[i + 1]) if not i: sob.append(Peptide(ei)) sob.append(Peptide(opt_spacer[ei, ej])) sob.append(Peptide(ej)) return sob
def __init__(self, peptides, pred, solver="glpk", weight=0.0, matrix=None, verbosity=0): if not isinstance(pred, ACleavageSitePrediction): raise ValueError( "Cleave site predictor must be of type ACleavageSitePrediction" ) if len(peptides) > 60: warnings.warn( "The peptide set exceeds 60. Above this level one has to expect " + "considerably long running times due to the complexity of the problem." ) #Generate model #1. Generate peptides for which cleave sites have to be predicted #2. generate graph with dummy element self.__verbosity = verbosity pep_tmp = peptides[:] pep_tmp.append("Dummy") edge_matrix = {} fragments = {} seq_to_pep = {} self.neo_cleavage = {} self.good_cleavage = {} if matrix is None: for start, stop in itr.combinations(pep_tmp, 2): if start == "Dummy" or stop == "Dummy": seq_to_pep[str(start)] = start seq_to_pep[str(stop)] = stop edge_matrix[(str(start), str(stop))] = 0 edge_matrix[(str(stop), str(start))] = 0 else: start_str = str(start) stop_str = str(stop) frag = Peptide(start_str + stop_str) garf = Peptide(stop_str + start_str) fragments[frag] = (start_str, stop_str) fragments[garf] = (stop_str, start_str) cleave_pred = pred.predict(fragments.keys()) #cleave_site_df = cleave_pred.xs((slice(None), (cleavage_pos-1))) for i in set(cleave_pred.index.get_level_values(0)): fragment = "".join(cleave_pred.ix[i]["Seq"]) start, stop = fragments[fragment] cleav_pos = len(str(start)) - 1 edge_matrix[(start, stop)] = -1.0 * ( cleave_pred.loc[(i, len(str(start)) - 1), pred.name] - weight * sum(cleave_pred.loc[(i, j), pred.name] for j in xrange(cleav_pos - 1, cleav_pos + 4, 1) if j != cleav_pos)) self.neo_cleavage[(start, stop)] = sum( cleave_pred.loc[(i, j), pred.name] for j in xrange(cleav_pos - 1, cleav_pos + 4, 1) if j != cleav_pos) self.good_cleavage[(start, stop)] = cleave_pred.loc[(i, len(str(start)) - 1), pred.name] else: edge_matrix = matrix seq_to_pep = {str(p): p for p in pep_tmp} for p in seq_to_pep.iterkeys(): if p != "Dummy": edge_matrix[(p, "Dummy")] = 0 edge_matrix[("Dummy", p)] = 0 self.__seq_to_pep = seq_to_pep #3. initialize ILP self.__solver = SolverFactory(solver) model = ConcreteModel() E = filter(lambda x: x != "Dummy", seq_to_pep.keys()) model.E = Set(initialize=E) model.E_prime = Set(initialize=seq_to_pep.keys()) model.ExE = Set(initialize=itr.permutations(E, 2), dimen=2) model.w_ab = Param(model.E_prime, model.E_prime, initialize=edge_matrix) model.card = Param(initialize=len(model.E_prime)) model.x = Var(model.E_prime, model.E_prime, within=Binary) model.u = Var(model.E, domain=PositiveIntegers, bounds=(2, model.card)) model.obj = Objective( rule=lambda mode: sum(model.w_ab[a, b] * model.x[a, b] for a in model.E_prime for b in model.E_prime if a != b), sense=minimize) model.tour_constraint_1 = Constraint( model.E_prime, rule=lambda model, a: sum(model.x[a, b] for b in model.E_prime if a != b) == 1) model.tour_constraint_2 = Constraint( model.E_prime, rule=lambda model, a: sum(model.x[b, a] for b in model.E_prime if a != b) == 1) model.cardinality_constraint = Constraint( model.ExE, rule=lambda model, a, b: model.u[a] - model.u[b] + 1 <= (model.card - 1) * (1 - model.x[a, b])) self.instance = model if self.__verbosity > 0: print "MODEL INSTANCE" self.instance.pprint()
def setUp(self): #Peptides of different length 9,10,11,12,13,14,15 self.peptides_mhcI = [Peptide("SYFPEITHI"), Peptide("IHTIEPFYS")] self.peptides_fragment = [Peptide("IHTIEPFYSAA")] self.mhcI = [Allele("HLA-B*15:01"), Allele("HLA-A*02:01")] self.mhcII = [Allele("HLA-DRB1*07:01"), Allele("HLA-DRB1*15:01")]
def generate_peptides_from_protein(proteins, window_size): """ Creates all peptides for a given window size, from a given protein. The result is a generator. :param Protein protein: (list of) protein(s) from which a list of unique peptides should be generated :param int window_size: size of peptide fragments """ def frameshift_influences(tid, _vars, res, start): # find variants out side the peptide frame, still influencing it via a # frameshift accu = [] # accumulator for relevant variants _vars.sort(key=lambda v: v.genomePos) # necessary? shift = 0 for var in _vars: pos = var.get_protein_position(tid) new_shift = var.get_shift() if pos < start: # does a variant yield a frame shift? if shift + new_shift: shift += new_shift accu.append(var) else: accu = {} # here: var.get_protein_position >= start, we are done! else: res += accu break def gen_peptide_info(protein): # Generate peptide sequences and find the variants within each res = [] seq = str(protein) for i in xrange(len(protein)+1-window_size): # generate peptide fragment end = i+window_size pep_seq = seq[i:end] # get the variants affecting the peptide: if protein.vars: # variants within the peptide: pep_var = [var for pos, var_list in protein.vars.iteritems() \ for var in var_list if i <= pos <= end] # outside variants that affect the peptide via frameshift: frameshift_influences(protein.transcript_id, protein.orig_transcript.vars.values(), pep_var, i) else: pep_var = [] res.append((pep_seq, pep_var)) return res final_peptides = {} # sequence : peptide-instance if isinstance(proteins, Protein): proteins = [proteins] if any(not isinstance(p, Protein) for p in proteins): raise ValueError("Input does contain non protein objects.") for prot in proteins: # generate all peptide sequences per protein: for (seq, _vars) in gen_peptide_info(prot): t_id = prot.transcript_id if seq not in final_peptides: final_peptides[seq] = Peptide(seq) final_peptides[seq].proteins[t_id] = prot final_peptides[seq].vars[t_id] = _vars final_peptides[seq].transcripts[t_id] = prot.orig_transcript return final_peptides.values()
def __main__(): parser = argparse.ArgumentParser(version=VERSION) parser.add_argument('-c', dest="mhcclass", help='<Required> MHC class', required=True) parser.add_argument('-in', dest="inf", help='<Required> full path to the input file', required=True) parser.add_argument('-out', dest="out", help="<Required> full path to the output file", required=True) parser.add_argument( '-allele', dest="allele", help= "<Required> full path to an allele file, if 'in', allele file will be deduced from in file name", required=True) parser.add_argument( '-dirallele', dest="dirallele", help= "for use with '-allele in', describes full base path to the allele files" ) options = parser.parse_args() if len(sys.argv) <= 1: parser.print_help() sys.exit(1) if not (options.inf or options.out or options.allele): parser.print_help() sys.exit(1) target_alleles_set = set() #Fred2.FileReader.read_lines is broken #alleles = FileReader.read_lines(options.allele, type=Allele) if options.allele == "in" and options.dirallele: if "_W_" not in options.inf: print "No class 1 type run detected." sys.exit(0) af = None for sp in options.inf.split("_"): if sp.startswith("BD"): af = join(options.dirallele, sp.split("-")[1] + ".allele") with open(af, 'r') as handle: for line in handle: target_alleles_set.add(Allele(line.strip().upper())) else: with open(options.allele, 'r') as handle: for line in handle: target_alleles_set.add(Allele(line.strip().upper())) if not target_alleles_set: parser.print_help() sys.exit(1) if options.mhcclass == "I": ttn = EpitopePredictorFactory('netmhcpan', version='3.0') lowerBound = 8 upperBound = 12 elif options.mhcclass == "II": ttn = EpitopePredictorFactory('netmhcIIpan', version='3.1') lowerBound = 15 upperBound = 25 pros = list() peps = list() f = oms.IdXMLFile() f.load(options.inf, pros, peps) pepstr = set() for pep in peps: for h in pep.getHits(): #if "decoy" not in h.getMetaValue("target_decoy"): unmod = h.getSequence().toUnmodifiedString() if lowerBound <= len(unmod) <= upperBound \ and 'U' not in unmod and 'B' not in unmod and 'X' not in unmod and 'Z' not in unmod: pepstr.add(h.getSequence().toUnmodifiedString()) es = [Peptide(x) for x in pepstr] try: preds_n = ttn.predict(es, alleles=target_alleles_set) except Exception as e: print "something went wrong with the netMHC prediction", options.inf, "what:", str( e) sys.exit(1) #only max preds = dict() for index, row in preds_n.iterrows(): score = row.max() #bigger_is_better allele = str(row.idxmax()) categ = categorize(score) seq = row.name[0].tostring() if categ: preds[seq] = (allele, categ, score) npeps = list() for pep in peps: hits = pep.getHits() nhits = list() for h in hits: if h.getSequence().toUnmodifiedString() in preds: x = preds[h.getSequence().toUnmodifiedString()] h.setMetaValue('binder', x[0]) h.setMetaValue(str(x[1]), x[2]) nhits.append(h) else: nhits.append(h) pep.setHits(nhits) f.store(options.out, pros, peps)
def run_sequential(input_epitopes, input_alleles, input_affinities, output_vaccine, num_epitopes, min_alleles, min_proteins, solver, **kwargs): epitope_data = { k: v for k, v in utilities.load_epitopes(input_epitopes).items() if 'X' not in k } LOGGER.info('Loaded %d epitopes', len(epitope_data)) peptide_coverage = { # we don't really need the actual protein sequence, just fill it with the id to make it unique Peptide(r['epitope']): set(Protein(gid, gene_id=gid) for gid in r['proteins']) for r in epitope_data.values() } allele_data = utilities.get_alleles_and_thresholds(input_alleles).to_dict( 'index') alleles = [ Allele(allele.replace('HLA-', ''), prob=data['frequency'] / 100) for allele, data in allele_data.items() ] threshold = { allele.replace('HLA-', ''): data['threshold'] for allele, data in allele_data.items() } LOGGER.info('Loaded %d alleles', len(threshold)) affinities = affinities_from_csv(input_affinities, allele_data, peptide_coverage=peptide_coverage) LOGGER.info('Loaded %d affinities', len(affinities)) LOGGER.info('Selecting epitopes...') model = OptiTope(affinities, threshold, k=num_epitopes, solver=solver) if min_alleles is not None: model.activate_allele_coverage_const(min_alleles) if min_proteins is not None: model.activate_antigen_coverage_const(min_proteins) selected_epitopes = model.solve() LOGGER.info('Creating spacers...') vaccine = EpitopeAssemblyWithSpacer(selected_epitopes, PCM(), BIMAS(), alleles, threshold=threshold, solver=solver).solve() immunogen = sum(epitope_data[str(e)]['immunogen'] for e in vaccine[::2]) sequence = ''.join(map(str, vaccine)) cleavage = pcm.DoennesKohlbacherPcm().cleavage_per_position(sequence) with open(output_vaccine, 'w') as f: writer = csv.DictWriter( f, ('immunogen', 'vaccine', 'spacers', 'cleavage')) writer.writeheader() writer.writerow({ 'immunogen': immunogen, 'vaccine': sequence, 'spacers': ';'.join(str(e) for e in vaccine[1::2]), 'cleavage': ';'.join('%.3f' % c for c in cleavage) })
def setUp(self): self.peptides = [Peptide("KLLPRLPGV"), Peptide("YLYDHLAPM"), Peptide("ALYDVVSTL")]
def setUp(self): epis ="""GHRMAWDMM VYEADDVIL""".split("\n") self.epis = map(lambda x: Peptide(x.strip()),epis) self.alleles =[Allele("HLA-A*02:01",prob=0.5)]
def setUp(self): epis = """GHRMAWDMM VYEADDVIL""".split("\n") self.epis = [Peptide(x.strip()) for x in epis] self.alleles = [Allele("HLA-A*02:01", prob=0.5)]