def parse_external_result(self, _file): result = {"Seq":{}, self.name:{}} count = 0 is_new_seq = 0 for l in _file: l = l.strip() if not len(l): continue #print l if not is_new_seq % 4 and is_new_seq: #print "New seq starts", l count +=1 is_new_seq = 0 elif l[0] == "-": #print "in counter", l is_new_seq += 1 elif l[0].isdigit(): pos,aa,_,s,_ = l.split() pos = int(pos) - 1 seq_id = "seq_%i"%count result["Seq"][(seq_id, pos)] = aa result[self.name][(seq_id, pos)] = float(s) df_result = CleavageSitePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i,j in df_result.index], names=['ID','Pos']) return df_result
def predict(self, _aa_seq, command=None, options=None, **kwargs): """ Overwrites ACleavageSitePrediction.predict :param list(Peptide/Protein)/Peptide/Protein _aa_seq: A list of or a single Peptide or Protein object :param str command: The path to a alternative binary (can be used if binary is not globally executable) :param str options: A string of additional options directly past to the external tool. :return: CleavageSitePredictionResult - A CleavageSitePredictionResult object """ if not self.is_in_path() and "path" not in kwargs: raise RuntimeError("{name} {version} could not be found in PATH".format(name=self.name, version=self.version)) external_version = self.get_external_version(path=command) if self.version != external_version and external_version is not None: raise RuntimeError("Internal version {internal_version} does " "not match external version {external_version}".format(internal_version=self.version, external_version=external_version)) if isinstance(_aa_seq, Peptide) or isinstance(_aa_seq, Protein): pep_seqs = {str(_aa_seq): _aa_seq} else: if any((not isinstance(p, Peptide)) and (not isinstance(p, Protein)) for p in _aa_seq): raise ValueError("Input is not of type Protein or Peptide") pep_seqs = {str(p): p for p in _aa_seq} tmp_out = NamedTemporaryFile(delete=False) tmp_file = NamedTemporaryFile(delete=False) self.prepare_input(pep_seqs.iterkeys(), tmp_file) tmp_file.close() #allowe customary executable specification if command is not None: exe = self.command.split()[0] _command = self.command.replace(exe, command) else: _command = self.command try: stdo = None stde = None cmd = _command.format(peptides=tmp_file.name, options="" if options is None else options, out=tmp_out.name) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdo, stde = p.communicate() stdr = p.returncode if stdr > 0: raise RuntimeError("Unsuccessful execution of " + cmd + " (EXIT!=0) with error: " + stde) except Exception as e: raise RuntimeError(e) result = self.parse_external_result(tmp_out) df_result = CleavageSitePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i, j in df_result.index], names=['ID', 'Pos']) os.remove(tmp_file.name) tmp_out.close() os.remove(tmp_out.name) return df_result
def predict(self, aa_seq, length=None, **kwargs): """ Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models are made. :param list(Peptide)/Peptide peptides: A single Peptide or a list of Peptides :param kwargs: optional parameter (not used yet) :return: Returns a AResult object with the prediction results """ def __load_model(length): model = "%s_%i"%(self.name, length) return getattr(__import__("Fred2.Data.pssms."+self.name+".mat."+model, fromlist=[model]), model) if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein): pep_seqs = {str(aa_seq):aa_seq} else: if any((not isinstance(p, Peptide)) and (not isinstance(p, Protein)) for p in aa_seq): raise ValueError("Input is not of type Protein or Peptide") pep_seqs = {str(p):p for p in aa_seq} length = min(self.supportedLength) if length is None else length if length not in self.supportedLength: raise ValueError("Length %i is not supported by %s"%(length, self.name)) #group peptides by length and result = {"Seq":{},self.name:{}} try: pssm = __load_model(length) except ImportError: raise KeyError("No model found for %s with length %i"%(self.name, length)) diff = length - self.cleavagePos for j,seq in enumerate(pep_seqs.iterkeys()): seq_id = "seq_%i"%j p = pep_seqs[seq] if isinstance(p, Protein): if p.transcript_id: seq_id = p.transcript_id else: for t in p.proteins.iterkeys(): if t: seq_id = t break #dynamicaly import prediction PSSMS for alleles and predict if len(seq) < length: warnings.warn("Sequence length of %i is to small for specified window of %i"%(len(seq),length), RuntimeWarning) continue for i in xrange(len(seq)): if i < (length-1): result["Seq"][(seq_id, i)] = seq[i] result[self.name][(seq_id, i)] = 0.0 else: result[self.name][(seq_id, i)] = 0.0 result["Seq"][(seq_id, i)] = seq[i] score = sum(pssm.get(i,{}).get(aa,0) for i,aa in enumerate(seq[i-(length-1):(i+1)]))+pssm.get(-1,{}).get("con",0) result[self.name][(seq_id, i-diff)] = score if not result["Seq"]: raise ValueError("No predictions could be made for the given input.") df_result = CleavageSitePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i,j in df_result.index], names=['ID','Pos']) return df_result
def predict(self, aa_seq, command=None, options=None, **kwargs): """ Overwrites ACleavageSitePrediction.predict :param aa_seq: A list of or a single :class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein` object :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein` :param str command: The path to a alternative binary (can be used if binary is not globally executable) :param str options: A string of additional options directly past to the external tool :return: A :class:`~Fred2.Core.CleavageSitePredictionResult` object :rtype: :class:`~Fred2.Core.CleavageSitePredictionResult` """ if not self.is_in_path() and "path" not in kwargs and command is None: raise RuntimeError( "{name} {version} could not be found in PATH".format( name=self.name, version=self.version)) external_version = self.get_external_version(path=command) if self.version != external_version and external_version is not None: raise RuntimeError( "Internal version {internal_version} does " "not match external version {external_version}".format( internal_version=self.version, external_version=external_version)) if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein): pep_seqs = {str(aa_seq): aa_seq} else: pep_seqs = {} for p in aa_seq: if not isinstance(p, Peptide) and not isinstance(p, Protein): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p chunksize = len(pep_seqs) if 'chunks' in kwargs: chunksize = kwargs['chunks'] result = {} peps = list(pep_seqs.values()) for i in range(0, len(peps), chunksize): tmp_out = NamedTemporaryFile(delete=False) tmp_file = NamedTemporaryFile(delete=False) self.prepare_input(peps[i:i + chunksize], tmp_file) tmp_file.close() #allowe customary executable specification if command is not None: exe = self.command.split()[0] _command = self.command.replace(exe, command) else: _command = self.command try: stdo = None stde = None cmd = _command.format( input=tmp_file.name, options="" if options is None else options, out=tmp_out.name) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdo, stde = p.communicate() stdr = p.returncode if stdr > 0: raise RuntimeError("Unsuccessful execution of " + cmd + " (EXIT!=0) with error: " + stde) except Exception as e: raise RuntimeError(e) result.update(self.parse_external_result(tmp_out)) os.remove(tmp_file.name) tmp_out.close() os.remove(tmp_out.name) df_result = CleavageSitePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples( [tuple((i, j)) for i, j in df_result.index], names=['ID', 'Pos']) return df_result
def predict(self, aa_seq, length=None, **kwargs): """ Returns predictions for given peptides. :param aa_seq: A single :class:`~Fred2.Core.Peptide.Peptide` or `~Fred2.Core.Protein.Protein` or a list of :class:`~Fred2.Core.Peptide` or :class:`~Fred2.Core.Protein.Protein` :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Peptide`/:class:`~Fred2.Core.Protein.Protein` :param int length: The peptide length of the cleavage site model. If None the default value is used. :return: Returns a :class:`~Fred2.Core.Result.CleavageSitePredictionResult` object :rtype: :class:`~Fred2.Core.Result.CleavageSitePredictionResult` """ def __load_model(length): model = "%s_%i" % (self.name, length) return getattr( __import__("Fred2.Data.pssms." + self.name + ".mat." + model, fromlist=[model]), model) if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein): pep_seqs = {str(aa_seq): aa_seq} else: pep_seqs = {} for p in aa_seq: if not isinstance(p, Peptide) and not isinstance(p, Protein): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p length = min(self.supportedLength) if length is None else length if length not in self.supportedLength: raise ValueError("Length %i is not supported by %s" % (length, self.name)) #group peptides by length and result = {"Seq": {}, self.name: {}} try: pssm = __load_model(length) except ImportError: raise KeyError("No model found for %s with length %i" % (self.name, length)) diff = length - self.cleavagePos for j, seq in enumerate(pep_seqs.keys()): seq_id = "seq_%i" % j p = pep_seqs[seq] if isinstance(p, Protein): if p.transcript_id: seq_id = p.transcript_id else: for t in p.proteins.keys(): if t: seq_id = t break #dynamicaly import prediction PSSMS for alleles and predict if len(seq) < length: warnings.warn( "Sequence length of %i is to small for specified window of %i" % (len(seq), length), RuntimeWarning) continue for i in range(len(seq)): if i < (length - 1): result["Seq"][(seq_id, i)] = seq[i] result[self.name][(seq_id, i)] = 0.0 else: result[self.name][(seq_id, i)] = 0.0 result["Seq"][(seq_id, i)] = seq[i] score = sum( pssm.get(i, {}).get(aa, 0) for i, aa in enumerate(seq[i - (length - 1):( i + 1)])) + pssm.get(-1, {}).get("con", 0) result[self.name][(seq_id, i - diff)] = score if not result["Seq"]: raise ValueError( "No predictions could be made for the given input.") df_result = CleavageSitePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples( [tuple((i, j)) for i, j in df_result.index], names=['ID', 'Pos']) return df_result
def predict(self, aa_seq, command=None, options=None, **kwargs): """ Overwrites ACleavageSitePrediction.predict :param aa_seq: A list of or a single :class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein` object :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein` :param str command: The path to a alternative binary (can be used if binary is not globally executable) :param str options: A string of additional options directly past to the external tool :return: A :class:`~Fred2.Core.CleavageSitePredictionResult` object :rtype: :class:`~Fred2.Core.CleavageSitePredictionResult` """ if not self.is_in_path() and "path" not in kwargs: raise RuntimeError("{name} {version} could not be found in PATH".format(name=self.name, version=self.version)) external_version = self.get_external_version(path=command) if self.version != external_version and external_version is not None: raise RuntimeError("Internal version {internal_version} does " "not match external version {external_version}".format(internal_version=self.version, external_version=external_version)) if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein): pep_seqs = {str(aa_seq): aa_seq} else: pep_seqs = {} for p in aa_seq: if not isinstance(p, Peptide) and not isinstance(p, Protein): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p chunksize = len(pep_seqs) if 'chunks' in kwargs: chunksize = kwargs['chunks'] result = {} peps = list(pep_seqs.values()) for i in xrange(0, len(peps), chunksize): tmp_out = NamedTemporaryFile(delete=False) tmp_file = NamedTemporaryFile(delete=False) self.prepare_input(peps[i:i+chunksize], tmp_file) tmp_file.close() #allowe customary executable specification if command is not None: exe = self.command.split()[0] _command = self.command.replace(exe, command) else: _command = self.command try: stdo = None stde = None cmd = _command.format(input=tmp_file.name, options="" if options is None else options, out=tmp_out.name) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdo, stde = p.communicate() stdr = p.returncode if stdr > 0: raise RuntimeError("Unsuccessful execution of " + cmd + " (EXIT!=0) with error: " + stde) except Exception as e: raise RuntimeError(e) result.update(self.parse_external_result(tmp_out)) os.remove(tmp_file.name) tmp_out.close() os.remove(tmp_out.name) df_result = CleavageSitePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i, j in df_result.index], names=['ID', 'Pos']) return df_result
def predict(self, aa_seq, command=None, options=None, **kwargs): """ Overwrites ACleavageSitePrediction.predict :param aa_seq: A list of or a single :class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein` object :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein` :param str command: The path to a alternative binary (can be used if binary is not globally executable) :param str options: A string of additional options directly past to the external tool :return: A :class:`~Fred2.Core.CleavageSitePredictionResult` object :rtype: :class:`~Fred2.Core.CleavageSitePredictionResult` """ if not self.is_in_path() and "path" not in kwargs: raise RuntimeError( "{name} {version} could not be found in PATH".format( name=self.name, version=self.version)) external_version = self.get_external_version(path=command) if self.version != external_version and external_version is not None: raise RuntimeError( "Internal version {internal_version} does " "not match external version {external_version}".format( internal_version=self.version, external_version=external_version)) #Since NetChop 3.1 cuts identifiers to 10-digits we use #An integer hashing to generate unique ids for #With is we can predict 99999999999 protein sequences simultaniously #After reaching the limit one could revers the counting in the negative #direction if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein): pep_seqs = {str(aa_seq): aa_seq} else: pep_seqs = {} for i, p in enumerate(aa_seq): if not isinstance(p, Peptide) and not isinstance(p, Protein): raise ValueError("Input is not of type Protein or Peptide") if i < 99999999999: pep_seqs[i] = p else: pep_seqs[i - 99999999999] = p tmp_out = NamedTemporaryFile(delete=False) tmp_file = NamedTemporaryFile(delete=False) self.prepare_input(pep_seqs, tmp_file) tmp_file.close() #allowe customary executable specification if command is not None: exe = self.command.split()[0] _command = self.command.replace(exe, command) else: _command = self.command try: stdo = None stde = None cmd = _command.format(input=tmp_file.name, options="" if options is None else options, out=tmp_out.name) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdo, stde = p.communicate() stdr = p.returncode if stdr > 0: raise RuntimeError("Unsuccessful execution of " + cmd + " (EXIT!=0) with error: " + stde) except Exception as e: raise RuntimeError(e) result = self.parse_external_result(tmp_out, pep_seqs) df_result = CleavageSitePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples( [tuple((i, j)) for i, j in df_result.index], names=['ID', 'Pos']) os.remove(tmp_file.name) tmp_out.close() os.remove(tmp_out.name) return df_result