Пример #1
0
    def parse_external_result(self, _file):
        result = {"Seq":{}, self.name:{}}
        count = 0
        is_new_seq = 0

        for l in _file:
            l = l.strip()
            if not len(l):
                continue
            #print l
            if not is_new_seq % 4 and is_new_seq:
                #print "New seq starts", l
                count +=1
                is_new_seq = 0
            elif l[0] == "-":
                #print "in counter", l
                is_new_seq += 1
            elif l[0].isdigit():
                pos,aa,_,s,_ = l.split()
                pos = int(pos) - 1
                seq_id = "seq_%i"%count
                result["Seq"][(seq_id, pos)] = aa
                result[self.name][(seq_id, pos)] = float(s)
        df_result = CleavageSitePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i,j in df_result.index],
                                                        names=['ID','Pos'])
        return df_result
Пример #2
0
    def predict(self, _aa_seq, command=None, options=None, **kwargs):
        """
        Overwrites ACleavageSitePrediction.predict

        :param list(Peptide/Protein)/Peptide/Protein _aa_seq: A list of or a single Peptide or Protein object
        :param str command: The path to a alternative binary (can be used if binary is not globally executable)
        :param str options: A string of additional options directly past to the external tool.
        :return: CleavageSitePredictionResult - A CleavageSitePredictionResult object
        """
        if not self.is_in_path() and "path" not in kwargs:
            raise RuntimeError("{name} {version} could not be found in PATH".format(name=self.name,
                                                                                    version=self.version))
        external_version = self.get_external_version(path=command)
        if self.version != external_version and external_version is not None:
            raise RuntimeError("Internal version {internal_version} does "
                               "not match external version {external_version}".format(internal_version=self.version,
                                                                                      external_version=external_version))

        if isinstance(_aa_seq, Peptide) or isinstance(_aa_seq, Protein):
            pep_seqs = {str(_aa_seq): _aa_seq}
        else:
            if any((not isinstance(p, Peptide)) and (not isinstance(p, Protein)) for p in _aa_seq):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p): p for p in _aa_seq}

        tmp_out = NamedTemporaryFile(delete=False)
        tmp_file = NamedTemporaryFile(delete=False)
        self.prepare_input(pep_seqs.iterkeys(), tmp_file)
        tmp_file.close()

        #allowe customary executable specification
        if command is not None:
            exe = self.command.split()[0]
            _command = self.command.replace(exe, command)
        else:
            _command = self.command

        try:
            stdo = None
            stde = None
            cmd = _command.format(peptides=tmp_file.name, options="" if options is None else options, out=tmp_out.name)
            p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stdo, stde = p.communicate()
            stdr = p.returncode
            if stdr > 0:
                raise RuntimeError("Unsuccessful execution of " + cmd + " (EXIT!=0) with error: " + stde)
        except Exception as e:
            raise RuntimeError(e)

        result = self.parse_external_result(tmp_out)

        df_result = CleavageSitePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i, j in df_result.index],
                                                        names=['ID', 'Pos'])
        os.remove(tmp_file.name)
        tmp_out.close()
        os.remove(tmp_out.name)

        return df_result
Пример #3
0
    def predict(self, aa_seq, length=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param list(Peptide)/Peptide peptides: A single Peptide or a list of Peptides
        :param kwargs: optional parameter (not used yet)
        :return: Returns a AResult object with the prediction results
        """
        def __load_model(length):
            model = "%s_%i"%(self.name, length)
            return getattr(__import__("Fred2.Data.pssms."+self.name+".mat."+model, fromlist=[model]),
                           model)

        if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein):
            pep_seqs = {str(aa_seq):aa_seq}
        else:
            if any((not isinstance(p, Peptide)) and (not isinstance(p, Protein)) for p in aa_seq):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in aa_seq}


        length = min(self.supportedLength) if length is None else length
        if length not in self.supportedLength:
            raise ValueError("Length %i is not supported by %s"%(length, self.name))

        #group peptides by length and
        result = {"Seq":{},self.name:{}}

        try:
            pssm = __load_model(length)
        except ImportError:
            raise KeyError("No model found for %s with length %i"%(self.name, length))

        diff = length - self.cleavagePos
        for j,seq in enumerate(pep_seqs.iterkeys()):


            seq_id = "seq_%i"%j
            p = pep_seqs[seq]

            if isinstance(p, Protein):
                if p.transcript_id:
                    seq_id = p.transcript_id
            else:
                for t in p.proteins.iterkeys():
                    if t:
                        seq_id = t
                        break

            #dynamicaly import prediction PSSMS for alleles and predict
            if len(seq) < length:
                warnings.warn("Sequence length of %i is to small for specified window of %i"%(len(seq),length), RuntimeWarning)
                continue

            for i in xrange(len(seq)):
                if i < (length-1):

                    result["Seq"][(seq_id, i)] = seq[i]
                    result[self.name][(seq_id, i)] = 0.0
                else:
                    result[self.name][(seq_id, i)] = 0.0
                    result["Seq"][(seq_id, i)] = seq[i]
                    score = sum(pssm.get(i,{}).get(aa,0) for i,aa in enumerate(seq[i-(length-1):(i+1)]))+pssm.get(-1,{}).get("con",0)
                    result[self.name][(seq_id, i-diff)] = score

        if not result["Seq"]:
            raise ValueError("No predictions could be made for the given input.")
        df_result = CleavageSitePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i,j in df_result.index],
                                                        names=['ID','Pos'])

        return df_result
Пример #4
0
    def predict(self, aa_seq, command=None, options=None, **kwargs):
        """
        Overwrites ACleavageSitePrediction.predict

        :param aa_seq: A list of or a single :class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein` object
        :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`
        :param str command: The path to a alternative binary (can be used if binary is not globally executable)
        :param str options: A string of additional options directly past to the external tool
        :return: A :class:`~Fred2.Core.CleavageSitePredictionResult` object
        :rtype: :class:`~Fred2.Core.CleavageSitePredictionResult`
        """
        if not self.is_in_path() and "path" not in kwargs and command is None:
            raise RuntimeError(
                "{name} {version} could not be found in PATH".format(
                    name=self.name, version=self.version))
        external_version = self.get_external_version(path=command)
        if self.version != external_version and external_version is not None:
            raise RuntimeError(
                "Internal version {internal_version} does "
                "not match external version {external_version}".format(
                    internal_version=self.version,
                    external_version=external_version))

        if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein):
            pep_seqs = {str(aa_seq): aa_seq}
        else:
            pep_seqs = {}
            for p in aa_seq:
                if not isinstance(p, Peptide) and not isinstance(p, Protein):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        chunksize = len(pep_seqs)
        if 'chunks' in kwargs:
            chunksize = kwargs['chunks']

        result = {}
        peps = list(pep_seqs.values())

        for i in range(0, len(peps), chunksize):
            tmp_out = NamedTemporaryFile(delete=False)
            tmp_file = NamedTemporaryFile(delete=False)
            self.prepare_input(peps[i:i + chunksize], tmp_file)
            tmp_file.close()

            #allowe customary executable specification
            if command is not None:
                exe = self.command.split()[0]
                _command = self.command.replace(exe, command)
            else:
                _command = self.command

            try:
                stdo = None
                stde = None
                cmd = _command.format(
                    input=tmp_file.name,
                    options="" if options is None else options,
                    out=tmp_out.name)
                p = subprocess.Popen(cmd,
                                     shell=True,
                                     stdin=subprocess.PIPE,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
                stdo, stde = p.communicate()
                stdr = p.returncode
                if stdr > 0:
                    raise RuntimeError("Unsuccessful execution of " + cmd +
                                       " (EXIT!=0) with error: " + stde)
            except Exception as e:
                raise RuntimeError(e)

            result.update(self.parse_external_result(tmp_out))
            os.remove(tmp_file.name)
            tmp_out.close()
            os.remove(tmp_out.name)

        df_result = CleavageSitePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples(
            [tuple((i, j)) for i, j in df_result.index], names=['ID', 'Pos'])

        return df_result
Пример #5
0
    def predict(self, aa_seq, length=None, **kwargs):
        """
        Returns predictions for given peptides.

        :param aa_seq: A single :class:`~Fred2.Core.Peptide.Peptide` or `~Fred2.Core.Protein.Protein` or a list of
                       :class:`~Fred2.Core.Peptide` or :class:`~Fred2.Core.Protein.Protein`
        :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein`)
                      or :class:`~Fred2.Core.Peptide`/:class:`~Fred2.Core.Protein.Protein`
        :param int length: The peptide length of the cleavage site model. If None the default value is used.
        :return: Returns a :class:`~Fred2.Core.Result.CleavageSitePredictionResult` object
        :rtype: :class:`~Fred2.Core.Result.CleavageSitePredictionResult`
        """
        def __load_model(length):
            model = "%s_%i" % (self.name, length)
            return getattr(
                __import__("Fred2.Data.pssms." + self.name + ".mat." + model,
                           fromlist=[model]), model)

        if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein):
            pep_seqs = {str(aa_seq): aa_seq}
        else:
            pep_seqs = {}
            for p in aa_seq:
                if not isinstance(p, Peptide) and not isinstance(p, Protein):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        length = min(self.supportedLength) if length is None else length
        if length not in self.supportedLength:
            raise ValueError("Length %i is not supported by %s" %
                             (length, self.name))

        #group peptides by length and
        result = {"Seq": {}, self.name: {}}

        try:
            pssm = __load_model(length)
        except ImportError:
            raise KeyError("No model found for %s with length %i" %
                           (self.name, length))

        diff = length - self.cleavagePos
        for j, seq in enumerate(pep_seqs.keys()):

            seq_id = "seq_%i" % j
            p = pep_seqs[seq]

            if isinstance(p, Protein):
                if p.transcript_id:
                    seq_id = p.transcript_id
            else:
                for t in p.proteins.keys():
                    if t:
                        seq_id = t
                        break

            #dynamicaly import prediction PSSMS for alleles and predict
            if len(seq) < length:
                warnings.warn(
                    "Sequence length of %i is to small for specified window of %i"
                    % (len(seq), length), RuntimeWarning)
                continue

            for i in range(len(seq)):
                if i < (length - 1):

                    result["Seq"][(seq_id, i)] = seq[i]
                    result[self.name][(seq_id, i)] = 0.0
                else:
                    result[self.name][(seq_id, i)] = 0.0
                    result["Seq"][(seq_id, i)] = seq[i]
                    score = sum(
                        pssm.get(i, {}).get(aa, 0)
                        for i, aa in enumerate(seq[i - (length - 1):(
                            i + 1)])) + pssm.get(-1, {}).get("con", 0)
                    result[self.name][(seq_id, i - diff)] = score

        if not result["Seq"]:
            raise ValueError(
                "No predictions could be made for the given input.")
        df_result = CleavageSitePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples(
            [tuple((i, j)) for i, j in df_result.index], names=['ID', 'Pos'])
        return df_result
Пример #6
0
    def predict(self, aa_seq, command=None, options=None, **kwargs):
        """
        Overwrites ACleavageSitePrediction.predict

        :param aa_seq: A list of or a single :class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein` object
        :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`
        :param str command: The path to a alternative binary (can be used if binary is not globally executable)
        :param str options: A string of additional options directly past to the external tool
        :return: A :class:`~Fred2.Core.CleavageSitePredictionResult` object
        :rtype: :class:`~Fred2.Core.CleavageSitePredictionResult`
        """
        if not self.is_in_path() and "path" not in kwargs:
            raise RuntimeError("{name} {version} could not be found in PATH".format(name=self.name,
                                                                                    version=self.version))
        external_version = self.get_external_version(path=command)
        if self.version != external_version and external_version is not None:
            raise RuntimeError("Internal version {internal_version} does "
                               "not match external version {external_version}".format(internal_version=self.version,
                                                                                      external_version=external_version))

        if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein):
            pep_seqs = {str(aa_seq): aa_seq}
        else:
            pep_seqs = {}
            for p in aa_seq:
                if not isinstance(p, Peptide) and not isinstance(p, Protein):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        chunksize = len(pep_seqs)
        if 'chunks' in kwargs:
            chunksize = kwargs['chunks']

        result = {}
        peps = list(pep_seqs.values())

        for i in xrange(0, len(peps), chunksize):
            tmp_out = NamedTemporaryFile(delete=False)
            tmp_file = NamedTemporaryFile(delete=False)
            self.prepare_input(peps[i:i+chunksize], tmp_file)
            tmp_file.close()

            #allowe customary executable specification
            if command is not None:
                exe = self.command.split()[0]
                _command = self.command.replace(exe, command)
            else:
                _command = self.command

            try:
                stdo = None
                stde = None
                cmd = _command.format(input=tmp_file.name, options="" if options is None else options, out=tmp_out.name)
                p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                stdo, stde = p.communicate()
                stdr = p.returncode
                if stdr > 0:
                    raise RuntimeError("Unsuccessful execution of " + cmd + " (EXIT!=0) with error: " + stde)
            except Exception as e:
                raise RuntimeError(e)

            result.update(self.parse_external_result(tmp_out))
            os.remove(tmp_file.name)
            tmp_out.close()
            os.remove(tmp_out.name)

        df_result = CleavageSitePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i,j)) for i, j in df_result.index],
                                                        names=['ID', 'Pos'])

        return df_result
Пример #7
0
    def predict(self, aa_seq, command=None, options=None, **kwargs):
        """
        Overwrites ACleavageSitePrediction.predict

        :param aa_seq: A list of or a single :class:`~Fred2.Core.Peptide.Peptide` or :class:`~Fred2.Core.Protein.Protein` object
        :type aa_seq: list(:class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`) or :class:`~Fred2.Core.Peptide.Peptide`/:class:`~Fred2.Core.Protein.Protein`
        :param str command: The path to a alternative binary (can be used if binary is not globally executable)
        :param str options: A string of additional options directly past to the external tool
        :return: A :class:`~Fred2.Core.CleavageSitePredictionResult` object
        :rtype: :class:`~Fred2.Core.CleavageSitePredictionResult`
        """
        if not self.is_in_path() and "path" not in kwargs:
            raise RuntimeError(
                "{name} {version} could not be found in PATH".format(
                    name=self.name, version=self.version))
        external_version = self.get_external_version(path=command)
        if self.version != external_version and external_version is not None:
            raise RuntimeError(
                "Internal version {internal_version} does "
                "not match external version {external_version}".format(
                    internal_version=self.version,
                    external_version=external_version))

        #Since NetChop 3.1 cuts identifiers to 10-digits we use
        #An integer hashing to generate unique ids for
        #With is we can predict 99999999999 protein sequences simultaniously
        #After reaching the limit one could revers the counting in the negative
        #direction
        if isinstance(aa_seq, Peptide) or isinstance(aa_seq, Protein):
            pep_seqs = {str(aa_seq): aa_seq}
        else:
            pep_seqs = {}
            for i, p in enumerate(aa_seq):
                if not isinstance(p, Peptide) and not isinstance(p, Protein):
                    raise ValueError("Input is not of type Protein or Peptide")
                if i < 99999999999:
                    pep_seqs[i] = p
                else:
                    pep_seqs[i - 99999999999] = p

        tmp_out = NamedTemporaryFile(delete=False)
        tmp_file = NamedTemporaryFile(delete=False)
        self.prepare_input(pep_seqs, tmp_file)
        tmp_file.close()

        #allowe customary executable specification
        if command is not None:
            exe = self.command.split()[0]
            _command = self.command.replace(exe, command)
        else:
            _command = self.command

        try:
            stdo = None
            stde = None
            cmd = _command.format(input=tmp_file.name,
                                  options="" if options is None else options,
                                  out=tmp_out.name)
            p = subprocess.Popen(cmd,
                                 shell=True,
                                 stdin=subprocess.PIPE,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            stdo, stde = p.communicate()
            stdr = p.returncode
            if stdr > 0:
                raise RuntimeError("Unsuccessful execution of " + cmd +
                                   " (EXIT!=0) with error: " + stde)
        except Exception as e:
            raise RuntimeError(e)

        result = self.parse_external_result(tmp_out, pep_seqs)

        df_result = CleavageSitePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples(
            [tuple((i, j)) for i, j in df_result.index], names=['ID', 'Pos'])
        os.remove(tmp_file.name)
        tmp_out.close()
        os.remove(tmp_out.name)

        return df_result