Пример #1
0
def get_locations(CDSs, start, end, strand):
    """Return mRNA and CDS locations
    CDS has exact boundaries, while mRNA not.
    """
    #gff is 1-based, gb also, but sf is 0-based
    if len(CDSs) > 1:
        parts, mrnaparts = [], []
        for cdsi, (s, e) in enumerate(CDSs):
            parts.append(FeatureLocation(s - 1, e, strand=strand))
            if cdsi == 0:
                mrnaparts.append(
                    FeatureLocation(BeforePosition(s - 1), e, strand=strand))
            elif cdsi == len(CDSs) - 1:
                mrnaparts.append(
                    FeatureLocation(s - 1, AfterPosition(e), strand=strand))
            else:
                mrnaparts.append(FeatureLocation(s - 1, e, strand=strand))
        cdsloc = CompoundLocation(parts)
        mrnaloc = CompoundLocation(parts)
    else:
        cdsloc = FeatureLocation(start - 1, end, strand=strand)
        mrnaloc = FeatureLocation(BeforePosition(start - 1),
                                  AfterPosition(end),
                                  strand=strand)
    return cdsloc, mrnaloc
Пример #2
0
    def get_subsequence(self, resnums, new_id=None, copy_letter_annotations=True):
        """Get a subsequence as a new SeqProp object given a list of residue numbers"""
        # XTODO: documentation

        if not self.seq_record:
            raise ValueError('No chain sequence stored')

        biop_compound_list = []
        for resnum in resnums:
            feat = FeatureLocation(resnum - 1, resnum)
            biop_compound_list.append(feat)

        if len(biop_compound_list) == 0:
            log.info('Zero length subsequences')
            return
        elif len(biop_compound_list) == 1:
            log.debug('Subsequence only one residue long')
            sub_feature_location = biop_compound_list[0]
        else:
            sub_feature_location = CompoundLocation(biop_compound_list)

        sub_feature = sub_feature_location.extract(self.seq_record)

        if not new_id:
            new_id = '{}_subseq'.format(self.id)

        new_sp = SeqProp(id=new_id, seq=sub_feature)
        if copy_letter_annotations:
            new_sp.letter_annotations = sub_feature.letter_annotations
        return new_sp
Пример #3
0
 def gb(self):
     l = self.length()
     g = SeqRecord(
         Seq(self.sequence(),IUPAC.IUPACUnambiguousDNA()),
         id=self.name[0:8],
         name=self.name[0:8],
         description=self.description
     )
     g.features = []
     for f in self.features():
         t = f.type
         if f.direction == 'f':
             strand = 1
         else:
             strand = -1
         if self.shape == 'c' and f.end > l:
             f1 = FeatureLocation(ExactPosition(f.start), ExactPosition(l), strand)
             f2 = FeatureLocation(ExactPosition(0), ExactPosition(f.end - l), strand)
             if strand == 1:
                 floc = CompoundLocation([f1, f2])
             else:
                 floc = CompoundLocation([f2, f1])
         else:
             floc = FeatureLocation(ExactPosition(f.start),ExactPosition(f.end), strand)
         sf = SeqFeature(floc, f.type, qualifiers=dict([[q.name,q.data] for q in f.qualifiers.all()]))
         g.features.append(sf)
     return g.format('genbank')
Пример #4
0
    def test_eq_not_identical(self):
        """Test two different locations are not equal."""
        loc1 = FeatureLocation(12, 17, 1) + FeatureLocation(23, 42, 1)
        loc2 = (
            FeatureLocation(12, 17, 1)
            + FeatureLocation(23, 42, 1)
            + FeatureLocation(50, 60, 1)
        )
        self.assertNotEqual(loc1, loc2)

        loc1 = FeatureLocation(12, 17, 1) + FeatureLocation(23, 42, 1)
        loc2 = FeatureLocation(12, 17, -1) + FeatureLocation(23, 42, -1)
        self.assertNotEqual(loc1, loc2)

        loc1 = CompoundLocation(
            [FeatureLocation(12, 17, 1), FeatureLocation(23, 42, 1)]
        )
        loc2 = CompoundLocation(
            [FeatureLocation(12, 17, 1), FeatureLocation(23, 42, 1)], "order"
        )
        self.assertNotEqual(loc1, loc2)

        loc1 = FeatureLocation(12, 17, 1) + FeatureLocation(23, 42, 1)
        loc2 = 5
        self.assertNotEqual(loc1, loc2)
Пример #5
0
def get_deletions(gb_m1, l, e, nc_margin):
    """Define a list of deletions according to parameters L and E.

    A deletion is defined as any region longer than (or equal to) L not
    containing any gene with essentiality score higher than E.
    Args:
        gb_m1 (Bio.SeqRecord.SeqRecord): GenBank annotation.
        l (int): minimum deletion length.
        e (float): gene essentiality threshold.
        nc_margin (int): non-coding margin around essential genes.
    Returns:
        deletions (Bio.SeqFeature.CompoundLocation): list of proposed
            deletions.
    """
    essential_genes = [
        gene for gene in gb_m1.features if is_essential(gene, e)
    ]
    print("Essential gene count with threshold %.3f: %d" %
          (e, len(essential_genes)))
    essential_regions = CompoundLocation([
        FeatureLocation(max(gene.location.start - nc_margin, 0),
                        min(gene.location.end + nc_margin, len(gb_m1)))
        for gene in essential_genes
    ])
    essential_regions = merge_overlaps(essential_regions)
    nonessential_regions = complementary_compoundloc(0, len(gb_m1),
                                                     essential_regions)
    try:
        deletions = CompoundLocation([
            region for region in nonessential_regions.parts if len(region) >= l
        ])
    except ValueError:
        raise SystemExit("No deletions defined! Try readjusting L and/or E.")
    return deletions
Пример #6
0
def build_location_from_others(
        locations: Sequence[Location]) -> FeatureLocation:
    """ Builds a new location from non-overlapping others.
        If location boundaries are equal, they will be merged.
        If at least one provided location is a CompoundLocation or the locations
            are not continuous, the resulting location will be a CompoundLocation.

        Arguments:
            locations: a sequence of FeatureLocations to merge

        Returns:
            a FeatureLocation if the locations are continuous, otherwise a CompoundLocation
    """
    if not locations:
        raise ValueError("at least one FeatureLocation required")
    location = locations[0]
    for loc in locations[1:]:
        if loc.start == location.end:
            new_sub = FeatureLocation(location.parts[-1].start,
                                      loc.parts[0].end, location.strand)
            if len(location.parts) > 1 or len(loc.parts) > 1:
                location = CompoundLocation(location.parts[:-1] + [new_sub] +
                                            loc.parts[1:])
            else:
                location = new_sub
        else:
            location = CompoundLocation(location.parts + loc.parts)
    return location
Пример #7
0
    def test_other(self):
        location = CompoundLocation([FeatureLocation(5922, 6190, strand=1),
                                     FeatureLocation(5741, 5877, strand=1),
                                     FeatureLocation(4952, 5682, strand=1)])
        assert self.func(97, 336, location) == (5243, 6064)

        location = CompoundLocation([FeatureLocation(5922, 6190, strand=-1),
                                     FeatureLocation(5741, 5877, strand=-1),
                                     FeatureLocation(4952, 5682, strand=-1)])
        assert self.func(97, 336, location) == (5078, 5854)
Пример #8
0
 def _feature(self, seq, s, e, strand):
     if s < e:
         return FeatureLocation(s, e, strand=strand)
     if strand == 1:
         return CompoundLocation([
             FeatureLocation(s, len(seq.seq), strand=1),
             FeatureLocation(0, e, strand=1)
         ])
     return CompoundLocation([
         FeatureLocation(0, e, strand=-1),
         FeatureLocation(s, len(seq.seq), strand=-1)
     ])
Пример #9
0
def check_sub(feature, sequence) -> List[SeqFeature]:
    """ Recursively checks a GFF feature for any subfeatures and generates any
        appropriate SeqFeature instances from them.
    """
    new_features = []
    locations = []  # type: List[FeatureLocation]
    trans_locations = []  # type: List[FeatureLocation]
    qualifiers = {}  # type: Dict[str, List[str]]
    mismatching_qualifiers = set()  # type: Set[str]
    for sub in feature.sub_features:
        if sub.sub_features:  # If there are sub_features, go deeper
            new_features.extend(check_sub(sub, sequence))
        elif sub.type == 'CDS':
            sub_mismatch = generate_details_from_subfeature(
                sub, qualifiers, locations, trans_locations)
            mismatching_qualifiers.update(sub_mismatch)

    for qualifier in mismatching_qualifiers:
        del qualifiers[qualifier]
    if 'Parent' in qualifiers:
        del qualifiers['Parent']

    # if nothing to work on
    if not new_features and not locations:
        return []

    # Only works in tip of the tree, when there's no new_feature built yet. If there is,
    # it means the script just came out of a check_sub and it's ready to return.
    if not new_features:
        new_loc = locations[0]
        # construct a compound location if required
        if len(locations) > 1:
            locations = sorted(locations, key=lambda x: x.start.real)
            trans_locations = sorted(trans_locations,
                                     key=lambda x: x.start.real)
            if locations[0].strand == 1:
                new_loc = CompoundLocation(locations)
            else:
                new_loc = CompoundLocation(list(reversed(locations)))
                trans_locations = list(reversed(trans_locations))
        # TODO: use new secmet features
        new_feature = SeqFeature(new_loc)
        new_feature.qualifiers = qualifiers
        new_feature.type = 'CDS'
        trans = ''.join([
            n.extract(sequence.seq).translate(stop_symbol='')._data
            for n in trans_locations
        ])
        new_feature.qualifiers['translation'] = [str(trans)]
        new_features.append(new_feature)

    return new_features
Пример #10
0
    def get_subsequence(self, resnums):
        """Get a subsequence as a new SeqProp object given a list of residue numbers"""
        biop_compound_list = []
        for resnum in resnums:
            feat = FeatureLocation(resnum - 1, resnum)
            biop_compound_list.append(feat)

        sub_feature_location = CompoundLocation(biop_compound_list)
        sub_feature = sub_feature_location.extract(self)

        new_sp = SeqProp(id='{}_subseq'.format(self.id), seq=sub_feature)
        new_sp.letter_annotations = sub_feature.letter_annotations
        return new_sp
Пример #11
0
    def test_position_conversion_nonzero_start_compound(self):
        location = CompoundLocation([FeatureLocation(6, 18, strand=1),
                                     FeatureLocation(24, 27, strand=1)])
        assert len(location) == 15
        assert self.func(0, 2, location) == (6, 12)
        assert self.func(1, 4, location) == (9, 18)
        assert self.func(3, 5, location) == (15, 27)

        location = CompoundLocation([FeatureLocation(6, 15, strand=-1),
                                     FeatureLocation(21, 27, strand=-1)])
        assert len(location) == 15
        assert self.func(0, 2, location) == (21, 27)
        assert self.func(1, 4, location) == (9, 24)
        assert self.func(3, 5, location) == (6, 12)
Пример #12
0
    def get_subsequence_from_property(self,
                                      property_key,
                                      property_value,
                                      condition,
                                      return_resnums=False):
        """Get a subsequence as a new SeqProp object given a certain property you want to find in
        this chain's letter_annotation

        See documentation for :func:`ssbio.protein.sequence.seqprop.SeqProp.get_subsequence_from_property`

        Args:
            property_key (str): Property key in the ``letter_annotations`` attribute that you want to filter using
            property_value (str): Property value that you want to filter by
            condition (str): ``<``, ``=``, ``>``, ``>=``, or ``<=`` to filter the values by
            return_resnums (bool): If resnums should be returned as well

        Returns:
            SeqProp: New SeqProp object that you can run computations on or just extract its properties

        """
        if not self.seq_record:
            raise ValueError('No chain sequence stored')

        if property_key not in self.seq_record.letter_annotations:
            raise KeyError(
                '{}: {} not contained in the letter annotations'.format(
                    self.seq_record.id, property_key))

        subfeat_indices = list(
            locate(
                self.seq_record.letter_annotations[property_key], lambda x:
                ssbio.utils.check_condition(x, condition, property_value)))

        biop_compound_list = []
        for idx in subfeat_indices:
            feat = FeatureLocation(idx, idx + 1)
            biop_compound_list.append(feat)

        sub_feature_location = CompoundLocation(biop_compound_list)
        sub_feature = sub_feature_location.extract(self.seq_record)

        new_sp = SeqProp(id='{}-{}_{}_{}_{}_extracted'.format(
            self.pdb_parent, self.id, property_key, condition, property_value),
                         seq=sub_feature)
        new_sp.letter_annotations = sub_feature.letter_annotations

        if return_resnums:
            return new_sp, [x + 1 for x in subfeat_indices]
        else:
            return new_sp
Пример #13
0
    def nucleic_sequence(self, feature_type):
        """
        :type feature_type: str
        :rtype: Bio.Seq.Seq
        """
        locations = self._get_features(feature_type)
        if locations:
            if len(locations) > 1:
                feature = CompoundLocation(locations)
            else:
                feature = locations[0]

            return feature.extract(self.chromosome.nucleic_sequence)
        return None
Пример #14
0
    def test_position_conversion_compound_reverse(self):
        location = CompoundLocation([FeatureLocation(0, 6, strand=-1),
                                     FeatureLocation(9, 18, strand=-1)])
        assert len(location) == 15
        assert self.func(0, 4, location) == (3, 18)
        assert self.func(1, 5, location) == (0, 15)

        location = CompoundLocation([FeatureLocation(0, 6, strand=-1),
                                     FeatureLocation(12, 15, strand=-1),
                                     FeatureLocation(21, 27, strand=-1)])
        assert len(location) == 15
        assert self.func(0, 4, location) == (3, 27)
        assert self.func(1, 5, location) == (0, 24)
        assert self.func(2, 3, location) == (12, 15)
Пример #15
0
 def set_annotation(self, seq_ident, seq_rec, gb_file):
     if self._annotation:
         SeqIO = import_bio_seq_io()
         # If SeqIO exists than these should be installed!
         from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
         from Bio.Alphabet import DNAAlphabet
         _seg_length = lambda s, e: (e - s) if e > s else (s + self._length
                                                           - e)
         _get_loc = lambda s, e: FeatureLocation(s, e) if e > s else \
             CompoundLocation([FeatureLocation(s, self._length), FeatureLocation(0, e)])
         for a in self._annotation:
             length = max(_seg_length(a.start_1, a.end_1),
                          _seg_length(a.start_1, a.end_1))
             m = 100 if a.matched == length else round(
                 100 * length / a.matched, 2)
             note_1 = f'repeat_hit position {a.start_2} - {a.end_2}, match {m}.%, length {a.matched}'
             note_2 = f'repeat_hit position {a.start_1} - {a.end_1}, match {m}.%, length {a.matched}'
             rpt = ('rpt_type', a.rpt_type)
             seq_rec.features.append(
                 SeqFeature(_get_loc(a.start_1, a.end_1),
                            type='repeat_region',
                            qualifiers=OrderedDict([rpt,
                                                    ('note', note_1)])))
             seq_rec.features.append(
                 SeqFeature(_get_loc(a.start_2, a.end_2),
                            type='repeat_region',
                            qualifiers=OrderedDict([rpt,
                                                    ('note', note_2)])))
         # Fix other data
         seq_rec.id = seq_ident
         seq_rec.seq.alphabet = DNAAlphabet()
         SeqIO.write([seq_rec], gb_file, 'genbank')
         return True
Пример #16
0
def save_genbank_m4(gb_m3, gb_m4):
    """Generate modified-IV GenBank file (reduced version of the genome).

    Args:
        gb_m3 (Bio.SeqRecord.SeqRecord): modified-III GenBank annotation.
        gb_m4 (str): modified-IV GenBank file path.
    """
    deletions = []
    for feature in gb_m3.features:
        if is_deletion(feature):
            deletions.append(feature.location)
    deletions = CompoundLocation(deletions)
    non_deletions = complementary_compoundloc(0, len(gb_m3), deletions)
    reduced_annot = SeqRecord(seq=non_deletions.extract(gb_m3.seq),
                              id=gb_m3.id,
                              name=gb_m3.name,
                              description=gb_m3.description,
                              dbxrefs=gb_m3.dbxrefs,
                              annotations=gb_m3.annotations)
    # Shift features' positions according to deletions
    end = 0
    for nondel in non_deletions.parts:
        offset = nondel.start - end
        for feature in gb_m3.features:
            if (feature.location.start in nondel
                    and feature.location.end in nondel):
                feature.location = feature.location + (-offset)
                reduced_annot.features.append(feature)
        end = nondel.end - offset + 2
    SeqIO.write(reduced_annot, gb_m4, "genbank")
Пример #17
0
def feature_data_to_seqfeature(coordinates, feature_type):
    """Converts coordinates received from feature table parsing to a SeqFeature.

    :param coordinates: Start and end positions for the feature.
    :type coordinates: list[tuple(int, int)]
    :param feature_type: Label of the parsed feature.
    :type str
    :returns: Returns a Biopython SeqFeature loaded with given coordinates.
    :rtype: SeqFeature
    """
    locations = []
    for coordinate in coordinates:
        if coordinate[0] <= coordinate[1]:
            start = int(coordinate[0]) - 1
            stop = int(coordinate[1])
            strand = 1
        else:
            start = int(coordinate[1]) - 1
            stop = int(coordinate[0])
            strand = -1

        locations.append(FeatureLocation(start, stop, strand=strand))

    if len(locations) == 1:
        feature = SeqFeature(locations[0], type=feature_type)
    else:
        feature = SeqFeature(CompoundLocation(locations), type=feature_type)

    return feature
Пример #18
0
def create_3_part_seqfeature(start1=0,
                             stop1=0,
                             strand1=1,
                             start2=0,
                             stop2=0,
                             strand2=1,
                             start3=0,
                             stop3=0,
                             strand3=1,
                             type="",
                             qualifiers=None):
    """Constructs simple BioPython SeqFeature.

    Start1, Start2, Start3 = int
    Stop1, Stop2, Stop3 = int
    Strand1, Strand2, Strand3 = int (-1, 1)
    Type = 'CDS', 'Source', 'tRNA', etc.
    Qualifiers = dictionary of feature descriptions."""
    seq_ftr = SeqFeature(CompoundLocation([
        FeatureLocation(
            ExactPosition(start1), ExactPosition(stop1), strand=strand1),
        FeatureLocation(
            ExactPosition(start2), ExactPosition(stop2), strand=strand2),
        FeatureLocation(
            ExactPosition(start3), ExactPosition(stop3), strand=strand3)
    ], "join"),
                         type=type,
                         location_operator="join",
                         qualifiers=qualifiers)
    return seq_ftr
Пример #19
0
    def set_seqfeature(self):
        """
        Create a SeqFeature object with which to populate the
        `seqfeature` attribute.
        :return:
        """
        # SeqFeature coordinates are 0-based half-open
        start, stop = basic.reformat_coordinates(self.start, self.stop,
                                                 self.coordinate_format,
                                                 "0_half_open")

        # SeqFeature orientation is (-1, 1) instead of ("R", "F")
        strand = basic.reformat_strand(self.orientation, "numeric")

        # Standard genes will have start < stop
        if self.start <= self.stop:
            self.seqfeature = SeqFeature(FeatureLocation(start, stop),
                                         strand=strand,
                                         type=self.type)
        # Wrap-around genes will have stop < start
        else:
            self.seqfeature = SeqFeature(CompoundLocation([
                FeatureLocation(start, self.genome_length),
                FeatureLocation(0, stop)
            ]),
                                         strand=strand,
                                         type=self.type)
        # Add feature qualifiers
        self.seqfeature.qualifiers = self.get_qualifiers()
Пример #20
0
    def wraparound(feature):
        new_start = length -(shift-feature.location.start)
        new_end   = feature.location.end-shift

        c = SeqFeature(CompoundLocation( [FeatureLocation(0, new_end),
                                          FeatureLocation(new_start, length)]),
                       type=feature.type,
                       location_operator="join",
                       strand=feature.strand,
                       id=feature.id,
                       qualifiers=feature.qualifiers)
        sub_features=[]
        for sf in feature.sub_features:
            if feature.location.end<shift:
                sub_features.append(SeqFeature(FeatureLocation(length-feature.location.start,
                                                               length-feature.location.end),
                                    type=feature.type,
                                    location_operator=feature.location_operator,
                                    strand=feature.strand,
                                    id=feature.id,
                                    qualifiers=feature.qualifiers,
                                    sub_features=None))
            elif feature.location.start>shift:
                sub_features.append(SeqFeature(FeatureLocation(feature.location.start-shift,
                                                               feature.location.end-shift),
                                    type=feature.type,
                                    location_operator=feature.location_operator,
                                    strand=feature.strand,
                                    id=feature.id,
                                    qualifiers=feature.qualifiers,
                                     sub_features=None))
            else:
                sub_features.extend(wraparound(sf))
        c.sub_features.extend(sub_features)
        return c
Пример #21
0
def new_compound_location(
    indices: List[Union[Tuple[int, int], Tuple[int, int, int]]], strand: int
) -> CompoundLocation:
    locations = []
    for index in indices:
        if not isinstance(index, Tuple):
            raise ValueError(
                "Expects a tuple of integers size 2 or 3, not a {}".format(
                    indices.__class__
                )
            )
        if not len(index) in [2, 3]:
            raise ValueError("Expects a tuple of integers of size 2 or 3")
        if len(index) == 2:
            i, j = index
            s = strand
        elif len(index) == 3:
            i, j, s = index
        else:
            raise ValueError("Must be tuple of 2 or 3 integers")
        if not isinstance(i, int) or not isinstance(j, int) or not isinstance(s, int):
            raise ValueError(
                "Expects a tuple of integers of size 2 or 3. Found {}".format(index)
            )
        locations.append(FeatureLocation(ExactPosition(i), ExactPosition(j), strand=s))
    return CompoundLocation(locations)
Пример #22
0
def _adjust_location_by_offset(location: Location, offset: int) -> Location:
    """ Adjusts the given location to account for an offset (e.g. start_codon)
    """
    assert -2 <= offset <= 2, "invalid offset %d" % offset

    def adjust_single_location(part: FeatureLocation) -> FeatureLocation:
        """ only functions on FeatureLocation """
        assert not isinstance(part, CompoundLocation)
        start = part.start
        end = part.end
        if part.strand == -1:
            end = type(end)(end + offset)
        else:
            start = type(start)(start + offset)
        return FeatureLocation(start, end, part.strand)

    if isinstance(location, CompoundLocation):
        part = location.parts[0]
        if location.strand == -1:
            assert part.end == location.end
        else:
            assert part.start == location.start
        location = CompoundLocation([adjust_single_location(part)] + location.parts[1:])
    else:
        location = adjust_single_location(location)

    return location
Пример #23
0
    def nucleic_coding_sequence(self):
        """
        :rtype: Bio.Seq.Seq
        """
        if not self._nucleic_coding_sequence:
            if self.cds and len(self.cds) > 1:
                cds = CompoundLocation(self.cds)
            else:
                cds = self.cds[0]

            self._nucleic_coding_sequence = cds.extract(
                self.chromosome.nucleic_sequence)
            if self.location.strand == -1:
                self._nucleic_coding_sequence = Seq(
                    self._nucleic_coding_sequence).reverse_complement()

        return self._nucleic_coding_sequence
Пример #24
0
 def test_get_list_of_locations(self):
     f1 = FeatureLocation(10, 40, strand=+1)
     f2 = FeatureLocation(50, 59, strand=+1)
     f = CompoundLocation([f1, f2])
     location_list = hf.decompose_compound_location(f)
     self.assertTrue(isinstance(location_list, list))
     self.assertTrue(isinstance(location_list[0], FeatureLocation))
     self.assertTrue(isinstance(location_list[1], FeatureLocation))
Пример #25
0
def check_sub(feature, sequence):
    new_features = []
    loc_list = []
    qual_list = {}
    topop = []
    for sub in feature.sub_features:
        if sub.sub_features:  # If there are sub_features, go deeper
            new_features.extend(check_sub(sub, sequence))
        elif sub.type == 'CDS':
            loc = [sub.location.start.real, sub.location.end.real]
            loc_list.append(FeatureLocation(loc[0], loc[1], strand=sub.strand))
            # For split features (CDSs), the final feature will have the same qualifiers as the children ONLY if
            # they're the same, i.e.: all children have the same "protein_ID" (key and value).
            for qual in sub.qualifiers.keys():
                if qual not in qual_list:
                    qual_list[qual] = sub.qualifiers[qual]
                if qual in qual_list and not qual_list[qual] == sub.qualifiers[
                        qual]:
                    topop.append(qual)

    for n in topop:  # Pop mismatching qualifers over split features
        qual_list.pop(n, None)
    qual_list.pop('Parent', None)  # Pop parent.

    # Only works in tip of the tree, when there's no new_feature built yet. If there is,
    # it means the script just came out of a check_sub and it's ready to return.
    if not new_features:
        if len(loc_list) > 1:
            loc_list = sorted(loc_list, key=lambda x: x.start.real)
            if loc_list[0].strand == 1:
                new_loc = CompoundLocation(loc_list)
            else:
                new_loc = CompoundLocation(list(reversed(loc_list)))
        elif len(loc_list) == 0:
            return new_features
        else:
            new_loc = loc_list[0]

        new_feature = SeqFeature(new_loc)
        new_feature.qualifiers = qual_list
        new_feature.type = 'CDS'
        trans = new_feature.extract(sequence.seq).translate(stop_symbol='')
        new_feature.qualifiers['translation'] = [str(trans)]
        new_features.append(new_feature)

    return new_features
Пример #26
0
	def coordinatesToLocation(self, coordinates):
		locationParts = [ FeatureLocation(int(p[0]), int(p[1]), int(p[2]) ) for p in [ s.split(',') for s in coordinates.split(';')] ]
		if len(locationParts) == 1:
			return locationParts[0]
		elif len(locationParts) > 1:
			return CompoundLocation(locationParts)
		else:
			return None
Пример #27
0
    def test_eq_identical(self):
        """Test two identical locations are equal."""
        loc1 = FeatureLocation(12, 17, 1) + FeatureLocation(23, 42, 1)
        loc2 = FeatureLocation(12, 17, 1) + FeatureLocation(23, 42, 1)
        self.assertEqual(loc1, loc2)

        loc1 = FeatureLocation(12, 17, 1) + FeatureLocation(23, 42, 1)
        loc2 = CompoundLocation([FeatureLocation(12, 17, 1), FeatureLocation(23, 42, 1)])
        self.assertEqual(loc1, loc2)
Пример #28
0
 def setUp(self):
     self.magic_split = Seq("ATGGCAxxxxxxGGTxxxxxxATTTGT")
     self.magic = Seq("ATGGCAGGTATTTGT")
     self.translation = "MAGIC"
     self.sub_locations = [FeatureLocation(0, 6, strand=1),
                           FeatureLocation(12, 15, strand=1),
                           FeatureLocation(21, 27, strand=1)]
     self.location = CompoundLocation(self.sub_locations)
     self.cds = CDSFeature(self.location, locus_tag="compound")
Пример #29
0
def new_feature_location(start: int, end: int, length: int, strand: int):
    """Makes a FeatureLocation.

    If necessary, makes CompoundLocation.
    """
    if start > end:
        if length is None:
            raise ValueError(
                "A length must be provided to create a feature with start > end."
            )
        f1 = FeatureLocation(start, length, strand)
        f2 = FeatureLocation(1, end, strand)
        if strand == -1:
            location = CompoundLocation([f2, f1])
        else:
            location = CompoundLocation([f1, f2])
    else:
        location = FeatureLocation(start, end, strand=strand)
    return location
Пример #30
0
def find_repeat(self, contig, fn, st, ppno, extra_dna):
    """
    Find repeats in the DNA sequence
    :param self: The data object
    :param contig: the name of the contig we are searching on
    :param fn: the nuclotide sequence to search
    :param st: the start to find repeats at
    :param ppno: the prophage number
    :param extra_dna: the extra dna that flanks the sequence
    :return: a list of repeat regions
    """

    if len(fn) == 0:
        log_and_message("Len sequence is 0 so ignoring\n", c="RED", stderr=True, loglevel="WARNING")
        return {}

    rep = {}
    index = 0

    # with open(os.path.join(output_dir, "repeat_finding"), 'a') as rptout:
    #     rptout.write(f">pp{ppno} {st}\n{fn}\n")

    try:
        # set the False parameter to True to enable debugging of repeat finder
        repeats = PhiSpyRepeatFinder.repeatFinder(fn, 3, self.min_repeat_len, ppno, False)
    except Exception as e:
        log_and_message(f"There was an error running repeatfinder for {fn}:{e}\n", c="RED", stderr=True,
                        loglevel="WARNING")
        return {}

    for r in repeats:
        if (r['first_start'] < (3 * extra_dna)) and (r['second_start'] > (len(fn) - (3 * extra_dna))):
            # check that start is always less than end
            # This always causes an off by one error, so we have to increment our ends
            if r['first_end'] < r['first_start']:
                [r['first_start'], r['first_end']] = [r['first_end'] + 1, r['first_start'] + 1]
            if r['second_end'] < r['second_start']:
                [r['second_start'], r['second_end']] = [r['second_end'] + 1, r['second_start'] + 1]

            rep[index] = {}
            rep[index]['s1'] = r['first_start'] + st
            rep[index]['e1'] = r['first_end'] + st
            rep[index]['s2'] = r['second_start'] + st
            rep[index]['e2'] = r['second_end'] + st
            if self.include_all_repeats:
                replen = max(rep[index]['e1'] - rep[index]['s1'], rep[index]['e2'] - rep[index]['s2'])
                r1loc = FeatureLocation(rep[index]['s1'], rep[index]['e1'], strand=+1)
                r2loc = FeatureLocation(rep[index]['s2'], rep[index]['e2'], strand=+1)
                rptloc = CompoundLocation([r1loc, r2loc])
                rptsf = SeqFeature(rptloc,type="repeat_region",
                                   qualifiers={'note':f"{replen}bp repeat identified by PhiSpy v{version.__version__}"})
                self.record.get_entry(contig).features.append(rptsf)
            index += 1

    return rep
Пример #31
0
def subfeatures(feature):
    """ Return a location object from GFF output.

    The BCBio GFF parser adds exons, mRNA, and CDS's to features as
    sub_features which has since been depreciated in Biopython in favour
    of the CompoundLocation object. This function returns a Location
    object that we can use to extract sequences.

    Keyword arguments:
    feature -- A SeqFeature record with the _sub_features attribute.

    Returns:
    An ExactLocation or CompundLocation object.
    """
    new_features = list()

    if not hasattr(feature, "_sub_features"):
        return [feature]
    if feature._sub_features is None:
        return [feature]

    sub_features = feature._sub_features
    feature._sub_features = []
    new_features.append(feature)

    sub_features_cds = [f for f in sub_features if f.type.lower() == 'cds']
    sub_features_exons = [f for f in sub_features if f.type.lower() == 'exon']
    sub_features_mrna = [f for f in sub_features if f.type.lower() == 'mrna']
    sub_features_others = [f for f in sub_features
                           if f.type.lower() not in {'cds', 'exon', 'mrna'}]
    new_features.extend(sub_features_others)

    strand = feature.strand
    if len(sub_features_exons) > 0:
        if len(sub_features_cds) > 1:
            sub_features_exons.sort(key=lambda l: l.location.start)
            locations = [f.location for f in sub_features_exons]
            if strand == -1:
                """ When calling CompoundLocation.extract() the sequences are
                extracted in the order that they are encountered. For features
                on the - strand, we need to reverse this order. """
                locations.reverse()
            locations = CompoundLocation(locations)
        else:
            # One CDS returns an ExactLocation
            locations = sub_features_exons[0].location

        qualifiers = sub_features_exons[0].qualifiers
        sub_feature = SeqFeature(
            id=sub_features_exons[0].id,
            type="exon",
            strand=strand,
            qualifiers=qualifiers,
            location=locations
            )
        new_features.append(sub_feature)

    if len(sub_features_cds) > 0:
        if len(sub_features_cds) > 1:
            sub_features_cds.sort(key=lambda l: l.location.start)
            locations = [f.location for f in sub_features_cds]
            if strand == -1:
                locations.reverse()
            locations = CompoundLocation(locations)
        else:
            # One CDS returns an ExactLocation
            locations = sub_features_cds[0].location

        qualifiers = sub_features_cds[0].qualifiers
        sub_feature = SeqFeature(
            id=sub_features_cds[0].id,
            type="CDS",
            strand=strand,
            qualifiers=qualifiers,
            location=locations,
            )
        new_features.append(sub_feature)

    if len(sub_features_mrna) > 0:
        for mrna in sub_features_mrna:
            new_features.extend(subfeatures(mrna))

    return new_features