Пример #1
0
    def get_subsequence(self, resnums, new_id=None, copy_letter_annotations=True):
        """Get a subsequence as a new SeqProp object given a list of residue numbers"""
        # XTODO: documentation

        if not self.seq_record:
            raise ValueError('No chain sequence stored')

        biop_compound_list = []
        for resnum in resnums:
            feat = FeatureLocation(resnum - 1, resnum)
            biop_compound_list.append(feat)

        if len(biop_compound_list) == 0:
            log.info('Zero length subsequences')
            return
        elif len(biop_compound_list) == 1:
            log.debug('Subsequence only one residue long')
            sub_feature_location = biop_compound_list[0]
        else:
            sub_feature_location = CompoundLocation(biop_compound_list)

        sub_feature = sub_feature_location.extract(self.seq_record)

        if not new_id:
            new_id = '{}_subseq'.format(self.id)

        new_sp = SeqProp(id=new_id, seq=sub_feature)
        if copy_letter_annotations:
            new_sp.letter_annotations = sub_feature.letter_annotations
        return new_sp
Пример #2
0
    def get_subsequence(self, resnums):
        """Get a subsequence as a new SeqProp object given a list of residue numbers"""
        biop_compound_list = []
        for resnum in resnums:
            feat = FeatureLocation(resnum - 1, resnum)
            biop_compound_list.append(feat)

        sub_feature_location = CompoundLocation(biop_compound_list)
        sub_feature = sub_feature_location.extract(self)

        new_sp = SeqProp(id='{}_subseq'.format(self.id), seq=sub_feature)
        new_sp.letter_annotations = sub_feature.letter_annotations
        return new_sp
Пример #3
0
    def nucleic_sequence(self, feature_type):
        """
        :type feature_type: str
        :rtype: Bio.Seq.Seq
        """
        locations = self._get_features(feature_type)
        if locations:
            if len(locations) > 1:
                feature = CompoundLocation(locations)
            else:
                feature = locations[0]

            return feature.extract(self.chromosome.nucleic_sequence)
        return None
Пример #4
0
    def get_subsequence_from_property(self,
                                      property_key,
                                      property_value,
                                      condition,
                                      return_resnums=False):
        """Get a subsequence as a new SeqProp object given a certain property you want to find in
        this chain's letter_annotation

        See documentation for :func:`ssbio.protein.sequence.seqprop.SeqProp.get_subsequence_from_property`

        Args:
            property_key (str): Property key in the ``letter_annotations`` attribute that you want to filter using
            property_value (str): Property value that you want to filter by
            condition (str): ``<``, ``=``, ``>``, ``>=``, or ``<=`` to filter the values by
            return_resnums (bool): If resnums should be returned as well

        Returns:
            SeqProp: New SeqProp object that you can run computations on or just extract its properties

        """
        if not self.seq_record:
            raise ValueError('No chain sequence stored')

        if property_key not in self.seq_record.letter_annotations:
            raise KeyError(
                '{}: {} not contained in the letter annotations'.format(
                    self.seq_record.id, property_key))

        subfeat_indices = list(
            locate(
                self.seq_record.letter_annotations[property_key], lambda x:
                ssbio.utils.check_condition(x, condition, property_value)))

        biop_compound_list = []
        for idx in subfeat_indices:
            feat = FeatureLocation(idx, idx + 1)
            biop_compound_list.append(feat)

        sub_feature_location = CompoundLocation(biop_compound_list)
        sub_feature = sub_feature_location.extract(self.seq_record)

        new_sp = SeqProp(id='{}-{}_{}_{}_{}_extracted'.format(
            self.pdb_parent, self.id, property_key, condition, property_value),
                         seq=sub_feature)
        new_sp.letter_annotations = sub_feature.letter_annotations

        if return_resnums:
            return new_sp, [x + 1 for x in subfeat_indices]
        else:
            return new_sp
Пример #5
0
    def nucleic_coding_sequence(self):
        """
        :rtype: Bio.Seq.Seq
        """
        if not self._nucleic_coding_sequence:
            if self.cds and len(self.cds) > 1:
                cds = CompoundLocation(self.cds)
            else:
                cds = self.cds[0]

            self._nucleic_coding_sequence = cds.extract(
                self.chromosome.nucleic_sequence)
            if self.location.strand == -1:
                self._nucleic_coding_sequence = Seq(
                    self._nucleic_coding_sequence).reverse_complement()

        return self._nucleic_coding_sequence
Пример #6
0
    def get_subsequence(self,
                        resnums,
                        new_id=None,
                        copy_letter_annotations=True):
        """Get a subsequence as a new SeqProp object given a list of residue numbers"""
        # XTODO: documentation
        biop_compound_list = []
        for resnum in resnums:
            # XTODO can be sped up by separating into ranges based on continuous resnums
            feat = FeatureLocation(resnum - 1, resnum)
            biop_compound_list.append(feat)

        if len(biop_compound_list) == 0:
            log.debug('Zero length subsequence')
            return
        elif len(biop_compound_list) == 1:
            log.debug('Subsequence only one residue long')
            sub_feature_location = biop_compound_list[0]
        else:
            sub_feature_location = CompoundLocation(biop_compound_list)

        try:
            sub_feature = sub_feature_location.extract(self)
        except TypeError:
            log.critical(
                'SeqProp {}: unknown error when trying to get subsequence - please investigate! '
                'Try using a feature to extract a subsequence from the SeqProp'
                .format(self.id))
            return

        if not new_id:
            new_id = '{}_subseq'.format(self.id)

        new_sp = SeqProp(id=new_id, seq=sub_feature.seq)
        if copy_letter_annotations:
            new_sp.letter_annotations = sub_feature.letter_annotations
        return new_sp
Пример #7
0
    def get_subsequence_from_property(self,
                                      property_key,
                                      property_value,
                                      condition,
                                      return_resnums=False):
        """Get a subsequence as a new SeqProp object given a certain property you want to find in the
        original SeqProp's letter_annotation

        This can be used to do something like extract the subsequence of exposed residues, so you can can run
        calculations on that subsequence. Useful if you have questions like "are there any predicted surface exposed
        cysteines in my protein sequence?"

        Example:
            >>> sp = SeqProp(id='tester', seq='MQSLE')
            >>> sp.letter_annotations['a_key'] = [2, 2, 3, 1, 0]
            >>> pk = 'a_key'
            >>> pv = 2
            >>> cond = '<'
            >>> new_sp = sp.get_subsequence_from_property(pk, pv, cond)
            >>> new_sp.letter_annotations[pk]
            [1, 0]
            >>> new_sp
            SeqProp(seq=Seq('LE', ExtendedIUPACProtein()), id='tester_a_key_<_2_extracted', name='<unknown name>', description='<unknown description>', dbxrefs=[])

        Args:
            property_key (str): Property key in the ``letter_annotations`` attribute that you want to filter using
            property_value (str): Property value that you want to filter by
            condition (str): ``<``, ``=``, ``>``, ``>=``, or ``<=`` to filter the values by

        Returns:
            SeqProp: New SeqProp object that you can run computations on or just extract its properties

        """

        if property_key not in self.letter_annotations:
            raise KeyError(
                '{}: {} not contained in the letter annotations'.format(
                    self.id, property_key))

        subfeat_indices = list(
            locate(
                self.letter_annotations[property_key], lambda x: ssbio.utils.
                check_condition(x, condition, property_value)))

        biop_compound_list = []
        for idx in subfeat_indices:
            feat = FeatureLocation(idx, idx + 1)
            biop_compound_list.append(feat)

        sub_feature_location = CompoundLocation(biop_compound_list)
        sub_feature = sub_feature_location.extract(self)

        new_sp = SeqProp(id='{}_{}_{}_{}_extracted'.format(
            self.id, property_key, condition, property_value),
                         seq=sub_feature)
        new_sp.letter_annotations = sub_feature.letter_annotations

        if return_resnums:
            return new_sp, [x + 1 for x in subfeat_indices]
        else:
            return new_sp