def test_require_parents_equal_except_location(self): with pytest.raises(MismatchedParentException): ObjectValidation.require_parents_equal_except_location( Parent(id="parent1"), Parent(id="parent2")) ObjectValidation.require_parents_equal_except_location( Parent(id="parent", location=SingleInterval(0, 5, Strand.PLUS)), Parent(id="parent", location=SingleInterval(10, 20, Strand.MINUS)), )
def test_init(self): sequence = Sequence( "ACTG", Alphabet.NT_STRICT, id="id", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", location=SingleInterval(5, 9, Strand.MINUS, parent="parent"), ), ) # Sequence data assert sequence.sequence == Seq("ACTG") assert str(sequence) == "ACTG" # Alphabet assert sequence.alphabet == Alphabet.NT_STRICT # ID assert sequence.id == "id" # Sequence type assert sequence.sequence_type == "seqtype_1" # Parent ID assert sequence.parent_id == "parent" assert Sequence("A", Alphabet.NT_STRICT, parent="parent").parent_id == "parent" assert (Sequence( "A", Alphabet.NT_STRICT, parent=Parent( location=SingleInterval(5, 6, Strand.MINUS, parent="parent")), ).parent_id == "parent") assert Sequence("A", Alphabet.NT_STRICT).parent_id is None # Parent type assert sequence.parent_type == "seqtype_2" # Parent strand assert sequence.parent_strand == Strand.MINUS assert Sequence( "A", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED # Location on parent assert (Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.UNSTRANDED), ).parent_strand == Strand.UNSTRANDED) assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None assert sequence.location_on_parent == SingleInterval(5, 9, Strand.MINUS, parent="parent") # No alphabet validation Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False)
def test_require_parent_has_parent_with_location(self): with pytest.raises(NullParentException): ObjectValidation.require_parent_has_parent_with_location( Parent(id="parent")) with pytest.raises(NullParentException): ObjectValidation.require_parent_has_parent_with_location( Parent(id="parent", parent="grandparent")) ObjectValidation.require_parent_has_parent_with_location( Parent( id="parent", parent=Parent(id="grandparent", location=SingleInterval(0, 5, Strand.PLUS)), ))
def liftover_to_parent_or_seq_chunk_parent( self, parent_or_seq_chunk_parent: Parent, ) -> "AbstractInterval": """ This function returns a copy of this interval lifted over to a new coordinate system. If this interval is already in chunk-relative coordinates, it is first lifted back up the chromosome coordinates before the liftover occurs. This means that there *must* be a Parent somewhere in the ancestry with type "chromosome", and that Parent must match the supplied parent except for location information. Validation has to happen here in addition to in ``liftover_location_to_seq_chunk_parent()``, because at this point the parent of this current interval is still known. Once the ``to_dict()`` operation is performed, this information is list, and the new parent is applied under the assumption that it is valid. """ if self.chunk_relative_location.has_ancestor_of_type(SequenceType.SEQUENCE_CHUNK): if not self.chunk_relative_location.has_ancestor_of_type(SequenceType.CHROMOSOME): raise NoSuchAncestorException( "This location does not have a chromosome ancestor of its sequence chunk, " "which means it is not possible to lift to a new a chunk through the chromosome coordinates." ) if self.chunk_relative_location.has_ancestor_of_type(SequenceType.CHROMOSOME): loc_chrom = self.chunk_relative_location.first_ancestor_of_type(SequenceType.CHROMOSOME) par_chrom = parent_or_seq_chunk_parent.first_ancestor_of_type(SequenceType.CHROMOSOME) if loc_chrom.sequence and par_chrom.sequence: ObjectValidation.require_parents_equal_except_location(loc_chrom, par_chrom) else: ObjectValidation.require_parents_equal_except_location_and_sequence(loc_chrom, par_chrom) return self.from_dict(self.to_dict(), parent_or_seq_chunk_parent)
def test_init(self, id, sequence_type, strand, location, sequence, expected): assert expected == Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, )
def test_seq_chunk_to_parent(strand): obs = seq_chunk_to_parent("ATGCATGC", "TestSeq", 200, 208, strand=strand) assert obs == Parent( id="TestSeq:200-208", sequence_type=SequenceType.SEQUENCE_CHUNK, strand=None, location=None, sequence=Sequence( data="ATGCATGC", id="TestSeq:200-208", alphabet=Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.SEQUENCE_CHUNK, parent=Parent( id="TestSeq", sequence_type=SequenceType.CHROMOSOME, strand=strand, location=SingleInterval( 200, 208, strand, parent=Parent( id="TestSeq", sequence_type=SequenceType.CHROMOSOME, strand=strand, location=SingleInterval(200, 208, strand), sequence=None, parent=None, ), ), sequence=None, parent=Parent( id="TestSeq", sequence_type=SequenceType.CHROMOSOME, strand=strand, location=SingleInterval(200, 208, strand, parent=None), sequence=None, parent=None, ), ), ), ) assert obs.has_ancestor_of_type(SequenceType.CHROMOSOME) assert obs.has_ancestor_of_type(SequenceType.SEQUENCE_CHUNK)
def test_init_error(self, id, sequence_type, strand, location, sequence, parent, expected_exception): with pytest.raises(expected_exception): Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, parent=parent, )
def seq_chunk_to_parent( seq: str, sequence_name: Union[UUID, str], start: int, end: int, strand: Optional[Strand] = Strand.PLUS, alphabet: Optional[Alphabet] = Alphabet.NT_EXTENDED_GAPPED, ) -> Parent: """Construct a sequence chunk parent from a sequence. This is used when an annotation collection is being instantiated with a subset of a genome sequence. NOTE: This sequence is assumed to be a subset of a chromosome. There is no way to validate that within this function. Args: seq: Sequence subset to use. sequence_name: The name of the sequence. start: The genomic start position of this sequence. end: The genomic end position of this sequence. strand: The strand this chunk is relative to the genome. alphabet: The alphabet the sequence is in. Returns: An instantiated Parent object ready to be passed to a constructor. """ chunk_id = f"{sequence_name}:{start}-{end}" return Parent( id=chunk_id, sequence=Sequence( seq, alphabet, id=chunk_id, type=SequenceType.SEQUENCE_CHUNK, parent=Parent(location=SingleInterval( start, end, strand, parent=Parent(id=sequence_name, sequence_type=SequenceType.CHROMOSOME), )), ), )
def test_seq_to_parent(): seq = "ATGCATGC" seq_id = "TestSeq" obs = seq_to_parent(seq, seq_id=seq_id) assert obs == Parent( sequence=Sequence(seq, Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.CHROMOSOME, id=seq_id), location=SingleInterval(0, len(seq), Strand.PLUS), )
def test_require_location_has_parent_with_sequence(self): with pytest.raises(NullParentException): ObjectValidation.require_location_has_parent_with_sequence( SingleInterval(0, 5, Strand.PLUS)) with pytest.raises(NullSequenceException): ObjectValidation.require_location_has_parent_with_sequence( SingleInterval(0, 5, Strand.PLUS, parent="parent")) ObjectValidation.require_location_has_parent_with_sequence( SingleInterval( 0, 5, Strand.PLUS, parent=Parent(id="parent", sequence=Sequence("AAAAA", Alphabet.NT_STRICT)), ))
def first_ancestor_of_type(self, sequence_type: str, include_self: bool = True) -> Parent: """Returns the Parent object representing the closest ancestor (parent, parent of parent, etc.) of this sequence which has the given sequence type. If include_self is True and this sequence has the given type, returns a new Parent object representing this sequence. Raises NoSuchAncestorException if no ancestor with the given type exists. Parameters ---------- sequence_type: Sequence type include_self: Include this sequence as a candidate """ if include_self and self.sequence_type == sequence_type: return Parent(sequence=self) if self.parent: return self.parent.first_ancestor_of_type(sequence_type, True) raise NoSuchAncestorException
def test_init_invalid_params( self, data, alphabet, parent_id, parent_type, parent_strand, location_on_parent, expected_exception, ): with pytest.raises(expected_exception): Sequence( data, alphabet, parent=Parent( id=parent_id, sequence_type=parent_type, strand=parent_strand, location=location_on_parent, ), )
def seq_to_parent( seq: str, alphabet: Optional[Alphabet] = Alphabet.NT_EXTENDED_GAPPED, seq_id: Optional[str] = None, seq_type: Optional[str] = SequenceType.CHROMOSOME, ) -> Parent: """Convert a string into a Parent object. This is the intermediate that transfers a BioPython sequence object to a BioCantor sequence object. NOTE: This sequence is assumed to be the entire chromosome. Args: seq: String of sequence. alphabet: Alphabet this sequence is in. seq_id: ID to attach to the Parent. seq_type: Sequence type to attach to the Parent. Returns: A :class:`Parent` object. """ return Parent(sequence=Sequence(seq, alphabet, type=seq_type, id=seq_id), location=SingleInterval(0, len(seq), Strand.PLUS))
def reverse_complement(self, new_id: str = None, new_type: str = None) -> "Sequence": """Returns a new Sequence corresponding to the reverse complement of this Sequence. Location on parent, if it exists, is converted appropriately. Parameters ---------- new_id ID for the returned Sequence. If no value is provided, None is used. new_type Sequence type for the returned Sequence. If no value is provided, None is used. """ if not self.alphabet.is_nucleotide_alphabet(): raise AlphabetError( "Cannot reverse complement sequence with alphabet {}".format( self.alphabet)) location = self.location_on_parent.reverse_strand( ) if self.location_on_parent else None strand = self.parent_strand.reverse() if self.parent_strand else None rc_map = ALPHABET_TO_NUCLEOTIDE_COMPLEMENT[self.alphabet] try: seq_data = "".join((rc_map[c] for c in reversed(str(self)))) except KeyError as e: raise AlphabetError( "Character {} not found for alphabet {}".format( str(e), self.alphabet)) rc_parent = Parent(strand=strand, location=location) if strand or location else None return Sequence( seq_data, self.alphabet, id=new_id, type=new_type, parent=rc_parent, validate_alphabet=False, )
def test_require_parent_has_location(self): with pytest.raises(NullParentException): ObjectValidation.require_parent_has_location(Parent(id="parent")) ObjectValidation.require_parent_has_location( Parent(location=SingleInterval(5, 6, Strand.PLUS)))
def _(obj) -> Parent: return Parent(strand=obj)
def _(obj) -> Parent: return Parent(location=obj)
class TestParent: @pytest.mark.parametrize( "id,sequence_type,strand,location,sequence,expected", [ (None, None, None, None, None, Parent()), ( "id", "seqtype", Strand.MINUS, SingleInterval( 0, 1, Strand.MINUS, Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent(id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2")), ), Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, location=SingleInterval( 0, 1, Strand.MINUS, Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), ], ) def test_init(self, id, sequence_type, strand, location, sequence, expected): assert expected == Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, ) @pytest.mark.parametrize( "id,sequence_type,strand,location,sequence,parent,expected_exception", [ ("id1", None, None, SingleInterval(0, 5, Strand.PLUS, parent="id2"), None, None, ParentException), ("id1", None, None, None, Sequence("AAA", Alphabet.NT_STRICT, id="id2"), None, ParentException), ( None, None, None, SingleInterval(0, 5, Strand.PLUS, parent="id1"), Sequence("AAC", Alphabet.NT_STRICT, id="id2"), None, ParentException, ), ( None, "seqtype", None, SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")), None, None, ParentException, ), (None, "seqtype", None, None, Sequence("AAT", Alphabet.NT_STRICT, type="unknown"), None, ParentException), ( None, None, None, SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")), Sequence("AAG", Alphabet.NT_STRICT, type="seqtype"), None, ParentException, ), (None, None, Strand.MINUS, SingleInterval(0, 5, Strand.PLUS), None, None, InvalidStrandException), ( None, None, None, SingleInterval(0, 10, Strand.PLUS), Sequence("A", Alphabet.NT_STRICT), None, InvalidPositionException, ), ( None, None, None, None, Sequence("AA", Alphabet.NT_STRICT), Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), LocationException, ), (None, None, Strand.PLUS, SingleInterval(5, 10, Strand.MINUS), None, None, InvalidStrandException), ( None, None, None, None, Sequence("AA", Alphabet.NT_STRICT, parent="id1"), Parent(id="id2"), MismatchedParentException, ), ], ) def test_init_error(self, id, sequence_type, strand, location, sequence, parent, expected_exception): with pytest.raises(expected_exception): Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, parent=parent, ) @pytest.mark.parametrize( "obj,expected", [ ( Sequence("AAA", Alphabet.NT_STRICT), Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)), ), ("parent", Parent(id="parent")), ( SingleInterval(5, 10, Strand.PLUS), Parent(location=SingleInterval(5, 10, Strand.PLUS)), ), ( CompoundInterval([5], [10], Strand.PLUS), Parent(location=CompoundInterval([5], [10], Strand.PLUS)), ), (EmptyLocation(), Parent(location=EmptyLocation())), (Strand.MINUS, Parent(strand=Strand.MINUS)), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, parent=Parent(id="grandparent"), ), Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, parent=Parent(id="grandparent"), ), ), ], ) def test_make_parent(self, obj, expected): assert make_parent(obj) == expected @pytest.mark.parametrize( "parent1,parent2,expected", [ (Parent(), Parent(), True), (Parent(), Parent(id=None, sequence_type=None), True), (Parent(id="id1"), Parent(id="id2"), False), ( Parent(sequence_type=None), Parent(sequence_type="unknown"), False, ), (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), False), ( Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")), Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")), False, ), ( Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent=Parent(id="parent"))), False, ), ( Parent(parent="parent1"), Parent(parent="parent2"), False, ), ], ) def test_eq(self, parent1, parent2, expected): assert (parent1 == parent2) is expected @pytest.mark.parametrize( "parent1,parent2,expected", [ (Parent(), Parent(), True), (Parent(id="id1"), Parent(id="id2"), False), ( Parent(sequence_type=None), Parent(sequence_type="unknown"), False, ), (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), True), ( Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")), Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")), False, ), ( Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent="parent")), False, ), ( Parent(parent="parent1"), Parent(parent="parent2"), False, ), ], ) def test_equals_except_location(self, parent1, parent2, expected): assert parent1.equals_except_location(parent2) is expected @pytest.mark.parametrize( "id,location,sequence,expected", [ ("id", None, None, "id"), ( None, SingleInterval(0, 1, Strand.PLUS, parent="id"), None, "id", ), ( None, None, Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"), "id", ), ( "id", SingleInterval(0, 1, Strand.PLUS, parent="id"), Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"), "id", ), ], ) def test_id(self, id, location, sequence, expected): assert Parent(id=id, location=location, sequence=sequence).id == expected @pytest.mark.parametrize( "sequence_type,location,sequence,expected", [ ("seqtype", None, None, "seqtype"), ( None, SingleInterval( 0, 5, Strand.PLUS, parent=Parent(sequence_type="seqtype"), ), None, "seqtype", ), ( None, None, Sequence("A", Alphabet.NT_STRICT, type="seqtype"), "seqtype", ), ( None, None, Sequence( "A", Alphabet.NT_STRICT, type="seqtype", parent=Parent(sequence_type="seqtype_2"), ), "seqtype", ), ], ) def test_sequence_type(self, sequence_type, location, sequence, expected): assert Parent(sequence_type=sequence_type, location=location, sequence=sequence).sequence_type == expected @pytest.mark.parametrize( "strand,location,sequence,expected", [ (Strand.PLUS, None, None, Strand.PLUS), (None, SingleInterval(0, 5, Strand.MINUS), None, Strand.MINUS), ( Strand.PLUS, None, Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS), Strand.PLUS, ), ], ) def test_strand(self, strand, location, sequence, expected): assert Parent(strand=strand, location=location, sequence=sequence).strand == expected def test_location(self): assert Parent(location=SingleInterval(0, 1, Strand.PLUS)).location == SingleInterval(0, 1, Strand.PLUS) def test_sequence(self): assert Parent(sequence=Sequence("A", Alphabet.NT_STRICT)).sequence == Sequence("A", Alphabet.NT_STRICT) @pytest.mark.parametrize( "parent,expected", [ (Parent(parent="id"), Parent(id="id")), ( Parent( sequence=Sequence( "AA", Alphabet.NT_STRICT, parent=Parent(sequence_type="chr"), ) ), Parent(sequence_type="chr"), ), ], ) def test_parent(self, parent, expected): assert parent.parent == expected @pytest.mark.parametrize( "parent,expected", [ (Parent(), Parent()), (Parent(strand=Strand.PLUS), Parent()), ( Parent(strand=Strand.PLUS, location=SingleInterval(5, 10, Strand.PLUS)), Parent(), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.PLUS, location=SingleInterval(0, 1, Strand.PLUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), Parent( id="parent", sequence_type="unknown", sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), ], ) def test_strip_location_info(self, parent, expected): assert parent.strip_location_info() == expected @pytest.mark.parametrize( "parent,sequence_type,include_self,expected", [ ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), ), ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, Parent(id="parent", sequence_type="seqtype"), ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), ], ) def test_first_ancestor_of_type(self, parent, sequence_type, include_self, expected): assert parent.first_ancestor_of_type(sequence_type, include_self=include_self) == expected @pytest.mark.parametrize( "parent,sequence_type,include_self", [ (Parent(id="self"), "seqtype_2", True), ( Parent(id="self", parent="parent"), "seqtype_2", True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "chr", True, ), ], ) def test_first_ancestor_of_type_error(self, parent, sequence_type, include_self): with pytest.raises(NoSuchAncestorException): parent.first_ancestor_of_type(sequence_type, include_self=include_self) @pytest.mark.parametrize( "parent,sequence_type,include_self,expected", [ ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, True, ), ( Parent(id="self"), "seqtype_2", True, False, ), ( Parent(id="self", parent="parent"), "seqtype_2", True, False, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "chr", True, False, ), ], ) def test_has_ancestor_of_type(self, parent, sequence_type, include_self, expected): assert parent.has_ancestor_of_type(sequence_type, include_self=include_self) is expected @pytest.mark.parametrize( "parent,expected", [ ( Parent( id="parent", location=SingleInterval(3, 5, Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), SingleInterval(13, 15, Strand.PLUS, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(0, 5, Strand.PLUS), sequence_type="unknown", strand=Strand.PLUS, parent=Parent( id="grandparent", location=SingleInterval(100, 200, Strand.MINUS), ), ), SingleInterval(195, 200, Strand.MINUS, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(6, 9, Strand.MINUS), parent=Parent( id="grandparent", location=SingleInterval(0, 10, Strand.PLUS), sequence_type="chr", strand=Strand.PLUS, parent="great grandparent", ), ), SingleInterval( 6, 9, Strand.MINUS, parent=Parent( id="grandparent", sequence_type="chr", parent="great grandparent", ), ), ), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, location=SingleInterval(6, 8, Strand.MINUS), parent=Parent( id="grandparent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(5, 15, Strand.MINUS), parent="great grandparent", ), ), SingleInterval( 7, 9, Strand.PLUS, parent=Parent( id="grandparent", sequence_type="unknown", parent="great grandparent", ), ), ), ( Parent( id="parent", location=SingleInterval(3, 5, Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), SingleInterval(13, 15, Strand.UNSTRANDED, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(3, 5, Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)), ), SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"), ), ], ) def test_lift_child_location_contiguous_to_parent_single_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected", [ ( Parent( id="parent", location=CompoundInterval([3, 7], [5, 10], Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), CompoundInterval([13, 17], [15, 20], Strand.PLUS, parent="grandparent"), ), ( Parent( id="parent", location=CompoundInterval([0, 10], [5, 15], Strand.PLUS), sequence_type="unknown", strand=Strand.PLUS, parent=Parent( id="grandparent", location=SingleInterval(100, 200, Strand.MINUS), ), ), CompoundInterval( [185, 195], [190, 200], Strand.MINUS, parent="grandparent", ), ), ( Parent( id="parent", location=CompoundInterval([6], [9], Strand.MINUS), parent=Parent( id="grandparent", location=SingleInterval(0, 10, Strand.PLUS), sequence_type="chr", strand=Strand.PLUS, parent="great grandparent", ), ), SingleInterval( 6, 9, Strand.MINUS, parent=Parent( id="grandparent", sequence_type="chr", parent="great grandparent", ), ), ), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, location=CompoundInterval([6], [8], Strand.MINUS), parent=Parent( id="grandparent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(5, 15, Strand.MINUS), parent="great grandparent", ), ), SingleInterval( 7, 9, Strand.PLUS, parent=Parent( id="grandparent", sequence_type="unknown", parent="great grandparent", ), ), ), ( Parent( id="parent", location=CompoundInterval([3, 7], [5, 10], Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), CompoundInterval( [13, 17], [15, 20], Strand.UNSTRANDED, parent="grandparent", ), ), ( Parent( id="parent", location=CompoundInterval([3], [5], Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)), ), SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"), ), ], ) def test_lift_child_location_discontiguous_to_parent_single_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected_error", [ # No location ( Parent(parent=SingleInterval(5, 10, Strand.PLUS)), NullParentException, ), # Parent has no location ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent="grandparent", ), NullParentException, ), # Location on parent can't be unstranded ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent=Parent( id="grandparent", location=SingleInterval(0, 100, Strand.UNSTRANDED), ), ), InvalidStrandException, ), # Location must fit inside location on parent ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(30, 31, Strand.PLUS)), ), ValueError, ), ], ) def test_lift_child_location_to_parent_single_interval_error(self, parent, expected_error): with pytest.raises(expected_error): parent.lift_child_location_to_parent() @pytest.mark.parametrize( "parent,expected", [ # Location takes up entire parent location ( Parent( id="parent", location=SingleInterval(0, 10, Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([5, 20], [10, 25], Strand.PLUS), ), ), CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"), ), # Location (unstranded) takes up part of parent location (minus) ( Parent( id="parent", location=SingleInterval(10, 20, Strand.UNSTRANDED), parent=Parent( id="grandparent", location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS), ), ), CompoundInterval( [14, 20], [18, 26], Strand.UNSTRANDED, parent="grandparent", ), ), # Location (minus) takes up one block of parent location (plus); location is at end of sequence ( Parent( id="parent", location=SingleInterval(5, 10, Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.PLUS), ), ), SingleInterval(40, 45, Strand.MINUS, parent="grandparent"), ), # Location (minus) takes up part of one block of parent location (minus) ( Parent( id="parent", location=SingleInterval(0, 4, Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.MINUS), ), ), SingleInterval(41, 45, Strand.PLUS, parent="grandparent"), ), ], ) def test_lift_child_location_contiguous_to_parent_compound_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected", [ # Location takes up entire parent location ( Parent( id="parent", location=CompoundInterval([0, 5], [5, 10], Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([5, 20], [10, 25], Strand.PLUS), ), ), CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"), ), # Location (unstranded) takes up part of parent location (minus) ( Parent( id="parent", location=CompoundInterval([10, 22], [20, 23], Strand.UNSTRANDED), parent=Parent( id="grandparent", location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS), ), ), CompoundInterval( [11, 14, 20], [12, 18, 26], Strand.UNSTRANDED, parent="grandparent", ), ), # Location (minus) takes up one block of parent location (plus); location is at end of sequence ( Parent( id="parent", location=CompoundInterval([5], [10], Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.PLUS), ), ), SingleInterval(40, 45, Strand.MINUS, parent="grandparent"), ), # Location (minus) takes up part of one block of parent location (minus) ( Parent( id="parent", location=CompoundInterval([0, 3], [1, 4], Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.MINUS), ), ), CompoundInterval([41, 44], [42, 45], Strand.PLUS, parent="grandparent"), ), ], ) def test_lift_child_location_discontiguous_to_parent_compound_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected_error", [ # Location must fit inside location on parent ( Parent( location=SingleInterval(5, 50, Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([10, 20], [15, 25], Strand.PLUS), ), ), InvalidPositionException, ), ], ) def test_lift_child_location_to_parent_compound_interval_error(self, parent, expected_error): with pytest.raises(expected_error): parent.lift_child_location_to_parent() @pytest.mark.parametrize( "parent,location,expected", [ ( Parent(), SingleInterval(5, 10, Strand.PLUS), Parent(location=SingleInterval(5, 10, Strand.PLUS), strand=Strand.PLUS), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(0, 2, Strand.MINUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), SingleInterval(2, 3, Strand.PLUS), Parent( id="parent", sequence_type="unknown", strand=Strand.PLUS, location=SingleInterval(2, 3, Strand.PLUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(0, 2, Strand.MINUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), None, Parent( id="parent", sequence_type="unknown", sequence=Sequence("AAA", Alphabet.NT_STRICT), ), ), ], ) def test_reset_location(self, parent, location, expected): assert parent.reset_location(location) == expected @pytest.mark.parametrize( "parent,location,expected_exception", [ ( Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)), SingleInterval(0, 5, Strand.PLUS), InvalidPositionException, ), ( Parent(id="id1", sequence=Sequence("AAA", Alphabet.NT_STRICT)), SingleInterval( 0, 1, Strand.PLUS, parent=Parent(id="id2", sequence=Sequence("AAA", Alphabet.NT_STRICT)), ), ParentException, ), ], ) def test_reset_location_error(self, parent, location, expected_exception): with pytest.raises(expected_exception): parent.reset_location(location) @pytest.mark.parametrize( "parent,sequence,include_self,expected", [ (Parent(), Sequence("AA", Alphabet.NT_STRICT), True, False), (Parent(), Sequence("AA", Alphabet.NT_STRICT), False, False), ( Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)), Sequence("AA", Alphabet.NT_STRICT), True, True, ), ( Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)), Sequence("AA", Alphabet.NT_STRICT), False, False, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AA", Alphabet.NT_STRICT), ), Sequence("AA", Alphabet.NT_STRICT), False, True, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AAT", Alphabet.NT_STRICT), ), Sequence("AAT", Alphabet.NT_STRICT), False, True, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AAT", Alphabet.NT_STRICT), ), Sequence("AAT", Alphabet.NT_STRICT, id="id"), True, False, ), ( Parent( parent=Parent(parent=Parent(parent=Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT, id="seq")))) ), Sequence("AAA", Alphabet.NT_STRICT, id="seq"), True, True, ), ], ) def test_has_ancestor_sequence(self, parent, sequence, include_self, expected): assert parent.has_ancestor_sequence(sequence, include_self) == expected
def test_id(self, id, location, sequence, expected): assert Parent(id=id, location=location, sequence=sequence).id == expected
def test_sequence_type(self, sequence_type, location, sequence, expected): assert Parent(sequence_type=sequence_type, location=location, sequence=sequence).sequence_type == expected
def test_strand(self, strand, location, sequence, expected): assert Parent(strand=strand, location=location, sequence=sequence).strand == expected
class TestSequence: def test_init(self): sequence = Sequence( "ACTG", Alphabet.NT_STRICT, id="id", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", location=SingleInterval(5, 9, Strand.MINUS, parent="parent"), ), ) # Sequence data assert sequence.sequence == Seq("ACTG") assert str(sequence) == "ACTG" # Alphabet assert sequence.alphabet == Alphabet.NT_STRICT # ID assert sequence.id == "id" # Sequence type assert sequence.sequence_type == "seqtype_1" # Parent ID assert sequence.parent_id == "parent" assert Sequence("A", Alphabet.NT_STRICT, parent="parent").parent_id == "parent" assert (Sequence( "A", Alphabet.NT_STRICT, parent=Parent( location=SingleInterval(5, 6, Strand.MINUS, parent="parent")), ).parent_id == "parent") assert Sequence("A", Alphabet.NT_STRICT).parent_id is None # Parent type assert sequence.parent_type == "seqtype_2" # Parent strand assert sequence.parent_strand == Strand.MINUS assert Sequence( "A", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED # Location on parent assert (Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.UNSTRANDED), ).parent_strand == Strand.UNSTRANDED) assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None assert sequence.location_on_parent == SingleInterval(5, 9, Strand.MINUS, parent="parent") # No alphabet validation Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False) @pytest.mark.parametrize( "data,alphabet,parent_id,parent_type,parent_strand,location_on_parent,expected_exception", [ ("A-C", Alphabet.NT_STRICT, None, None, None, None, AlphabetError), ( "ACG", Alphabet.NT_STRICT, None, None, None, SingleInterval(0, 4, Strand.PLUS), ParentException, ), ( "ATT", Alphabet.NT_STRICT, "parent1", None, None, SingleInterval(0, 3, Strand.PLUS, parent="parent2"), ParentException, ), ( "GGG", Alphabet.NT_STRICT, None, None, Strand.MINUS, SingleInterval(0, 3, Strand.PLUS), InvalidStrandException, ), ( "GGG", Alphabet.NT_STRICT, None, "seqtype_2", None, SingleInterval( 0, 3, Strand.PLUS, parent=Parent(sequence_type="seqtype_3"), ), ParentException, ), ], ) def test_init_invalid_params( self, data, alphabet, parent_id, parent_type, parent_strand, location_on_parent, expected_exception, ): with pytest.raises(expected_exception): Sequence( data, alphabet, parent=Parent( id=parent_id, sequence_type=parent_type, strand=parent_strand, location=location_on_parent, ), ) @pytest.mark.parametrize( "sequence,other,expected", [ ( Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False), "AAAA", False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False), Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=True), True, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), True, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAa", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_EXTENDED, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq2", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype_2", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.UNSTRANDED, None), validate_alphabet=False, ), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent="parent1"), Sequence("AAAA", Alphabet.NT_STRICT, parent="parent2"), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent="parent"), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.PLUS), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, parent="seqtype", ), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ], ) def test_equals(self, sequence, other, expected): assert (sequence == other) is expected assert (other == sequence) is expected def test_str(self): assert str(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == "AAAAt" def test_len(self): assert len(Sequence("", Alphabet.NT_EXTENDED_GAPPED)) == 0 assert len(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == 5 @pytest.mark.parametrize( "seq,key,exp", [ # No parent (Sequence("acgtacgt", Alphabet.NT_STRICT), 3, Sequence("t", Alphabet.NT_STRICT)), (Sequence("acgtacgt", Alphabet.NT_STRICT), slice( 3, 6), Sequence("tac", Alphabet.NT_STRICT)), (Sequence("acgtacgt", Alphabet.NT_STRICT), slice( 3, 10), Sequence("tacgt", Alphabet.NT_STRICT)), # Parent with location; slice ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.PLUS)), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=SingleInterval(3, 6, Strand.PLUS)), ), ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.MINUS)), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=SingleInterval(2, 5, Strand.MINUS)), ), # Parent with location; single position ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.PLUS)), 3, Sequence("g", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.PLUS)), ), ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.MINUS)), 3, Sequence("g", Alphabet.NT_STRICT, parent=SingleInterval(4, 5, Strand.MINUS)), ), # Parent without full location ( Sequence("actgactg", Alphabet.NT_STRICT, parent="parent"), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent="parent"), ), ( Sequence( "actgactg", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), ), ], ) def test_getitem(self, seq, key, exp): assert seq[key] == exp def test_getitem_error(self): with pytest.raises(InvalidStrandException): Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.UNSTRANDED))[3:6] @pytest.mark.parametrize( "sequence,alphabet,validate_alphabet", [ ("", Alphabet.NT_STRICT, True), ("acgtACGT", Alphabet.NT_STRICT, True), ("N", Alphabet.NT_STRICT, False), ("acNNNw", Alphabet.NT_EXTENDED, True), ("AN-", Alphabet.NT_EXTENDED, False), ("GG--AAA", Alphabet.NT_STRICT_GAPPED, True), ("AN-", Alphabet.NT_STRICT_GAPPED, False), ("nnAAw-cg", Alphabet.NT_EXTENDED_GAPPED, True), ("xxx", Alphabet.NT_EXTENDED_GAPPED, False), ("MWT*", Alphabet.AA, True), ("T*-", Alphabet.AA, False), ("ABCDE-", Alphabet.GENERIC, True), ("*", Alphabet.GENERIC, False), ], ) def test_validate_alphabet(self, sequence, alphabet, validate_alphabet): Sequence(sequence, alphabet, validate_alphabet=validate_alphabet) @pytest.mark.parametrize( "sequence,alphabet", [ ("N", Alphabet.NT_STRICT), ("A-", Alphabet.NT_EXTENDED), ("AN-", Alphabet.NT_STRICT_GAPPED), ("E", Alphabet.NT_EXTENDED_GAPPED), ("R-", Alphabet.AA), ("?", Alphabet.GENERIC), ], ) def test_validate_alphabet_error(self, sequence, alphabet): with pytest.raises(AlphabetError): Sequence(sequence, alphabet, validate_alphabet=True) @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT, parent="parent"), "parent"), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval( 10, 11, Strand.UNSTRANDED)), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent( id="parent", location=SingleInterval( 10, 11, Strand.UNSTRANDED, parent="parent"), ), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 10, 11, Strand.UNSTRANDED, parent="parent")), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.UNSTRANDED), ), None, ), (Sequence("A", Alphabet.NT_STRICT), None), ], ) def test_parent_id(self, sequence, expected): assert sequence.parent_id == expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT), None), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(sequence_type="seqtype"), ), "seqtype", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 0, 1, Strand.PLUS, parent=Parent(sequence_type="seqtype"), )), ), "seqtype", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 0, 1, Strand.PLUS, parent=Sequence( "AA", Alphabet.NT_STRICT, type="seqtype"), )), ), "seqtype", ), ], ) def test_parent_type(self, sequence, expected): assert sequence.parent_type == expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT), None), ( Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.MINUS), ), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.MINUS), ), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent( strand=Strand.MINUS, location=SingleInterval(10, 11, Strand.MINUS), ), ), Strand.MINUS, ), ], ) def test_parent_strand(self, sequence, expected): assert sequence.parent_strand == expected @pytest.mark.parametrize( "sequence,new_id,new_type,expected", [ ( Sequence("", Alphabet.NT_STRICT), None, None, Sequence("", Alphabet.NT_STRICT), ), ( Sequence("ACGtacgT", Alphabet.NT_STRICT), None, None, Sequence("AcgtaCGT", Alphabet.NT_STRICT), ), ( Sequence("ATUGCYRSWKMBdhvnNVHDbmkwsrycguta", Alphabet.NT_EXTENDED), None, None, Sequence("taacgryswmkvHDBNnbdhVKMWSYRGCAAT", Alphabet.NT_EXTENDED), ), ( Sequence("--A-CGta", Alphabet.NT_STRICT_GAPPED), None, None, Sequence("taCG-T--", Alphabet.NT_STRICT_GAPPED), ), ( Sequence("AtUC-N-", Alphabet.NT_EXTENDED_GAPPED), None, None, Sequence("-N-GAaT", Alphabet.NT_EXTENDED_GAPPED), ), ( Sequence("ACGta", Alphabet.NT_STRICT), "new_id", "seqtype", Sequence( "taCGT", Alphabet.NT_STRICT, id="new_id", type="seqtype", ), ), ( Sequence("ACGta", Alphabet.NT_STRICT, parent=Strand.PLUS), None, None, Sequence("taCGT", Alphabet.NT_STRICT, parent=Strand.MINUS), ), ( Sequence( "ACGta", Alphabet.NT_STRICT, parent=SingleInterval(5, 10, Strand.PLUS), ), None, None, Sequence( "taCGT", Alphabet.NT_STRICT, parent=SingleInterval(5, 10, Strand.MINUS), ), ), ], ) def test_reverse_complement(self, sequence, new_id, new_type, expected): assert sequence.reverse_complement(new_id=new_id, new_type=new_type) == expected @pytest.mark.parametrize( "sequence", [ Sequence("AAA", Alphabet.AA), Sequence("AAA", Alphabet.GENERIC), Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False), ], ) def test_reverse_complement_error(self, sequence): with pytest.raises(AlphabetError): sequence.reverse_complement() @pytest.mark.parametrize( "seq1,seq2,new_id,data_only,expected", [ ( Sequence("", Alphabet.NT_STRICT, parent="parent1"), Sequence("", Alphabet.NT_STRICT, parent="parent2"), "new_id", True, Sequence("", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence("AA", Alphabet.NT_STRICT, parent="parent1"), Sequence("TT", Alphabet.NT_STRICT, parent="parent2"), "new_id", True, Sequence("AATT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype_1", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype_2", parent=Parent( id="parent1", strand=Strand.MINUS, location=SingleInterval(20, 22, Strand.MINUS), ), ), None, True, Sequence("AATT", Alphabet.NT_STRICT), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent1", strand=Strand.PLUS), ), None, False, Sequence( "AATT", Alphabet.NT_STRICT, type="seqtype", parent="parent1", ), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent1", strand=Strand.PLUS), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), None, False, Sequence( "AATT", Alphabet.NT_STRICT, type="seqtype", parent="parent1", ), ), ( Sequence( "CC", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(3, 5, Strand.PLUS)), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(10, 12, Strand.PLUS)), ), "new_id", True, Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence( "CC", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(3, 5, Strand.PLUS)), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(0, 2, Strand.PLUS)), ), "new_id", True, Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence("AA", Alphabet.NT_STRICT, id="seq1", parent="parent"), Sequence("", Alphabet.NT_STRICT, id="seq2", parent="parent"), None, False, Sequence("AA", Alphabet.NT_STRICT, parent="parent"), ), ( Sequence( "ACT", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(2, 5, Strand.PLUS)), ), Sequence( "GGA", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(8, 11, Strand.PLUS)), ), "new_id", False, Sequence( "ACTGGA", Alphabet.NT_STRICT, id="new_id", parent=Parent( id="parent", location=CompoundInterval.from_single_intervals([ SingleInterval(2, 5, Strand.PLUS), SingleInterval(8, 11, Strand.PLUS), ]), ), ), ), ( Sequence( "ACT", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(8, 11, Strand.MINUS)), ), Sequence( "GGA", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(2, 5, Strand.MINUS)), ), "new_id", False, Sequence( "ACTGGA", Alphabet.NT_STRICT, id="new_id", parent=Parent( id="parent", location=CompoundInterval.from_single_intervals([ SingleInterval(2, 5, Strand.MINUS), SingleInterval(8, 11, Strand.MINUS), ]), ), ), ), ( Sequence( "ACT", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=SingleInterval(5, 8, Strand.PLUS), ), ), Sequence( "GCG", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=SingleInterval(15, 18, Strand.PLUS), ), ), None, False, Sequence( "ACTGCG", Alphabet.NT_STRICT, type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=CompoundInterval.from_single_intervals([ SingleInterval(5, 8, Strand.PLUS), SingleInterval(15, 18, Strand.PLUS), ]), ), ), ), ], ) def test_append(self, seq1, seq2, new_id, data_only, expected): assert seq1.append(seq2, new_id, data_only) == expected @pytest.mark.parametrize( "seq1,seq2,new_id,data_only", [ ( Sequence("AA", Alphabet.NT_STRICT), Sequence("TT", Alphabet.NT_EXTENDED), None, True, ), ( Sequence("AA", Alphabet.NT_STRICT, type="seqtype_1"), Sequence("AA", Alphabet.NT_STRICT, type="seqtype_2"), None, False, ), ( Sequence("AA", Alphabet.NT_STRICT, parent="parent1"), Sequence("AA", Alphabet.NT_STRICT, parent="parent2"), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.PLUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(5, 7, Strand.PLUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(2, 4, Strand.MINUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(5, 7, Strand.MINUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.PLUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(11, 13, Strand.PLUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.UNSTRANDED), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(15, 17, Strand.UNSTRANDED), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent1", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent2", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype_1", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAAT", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent1", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent2", ), ), None, False, ), ], ) def test_append_error(self, seq1, seq2, new_id, data_only): with pytest.raises(ValueError): seq1.append(seq2, new_id, data_only) @pytest.mark.parametrize( "sequence,sequence_type,include_self,expected", [ ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, Parent(sequence=Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), )), ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, Parent(id="parent", sequence_type="seqtype"), ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), ], ) def test_first_ancestor_of_type(self, sequence, sequence_type, include_self, expected): assert sequence.first_ancestor_of_type( sequence_type, include_self=include_self) == expected @pytest.mark.parametrize( "sequence,sequence_type,include_self", [ ( Sequence("A", Alphabet.NT_STRICT, id="self"), "seqtype_1", True, ), ( Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"), "seqtype_1", True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_2", parent=Parent( id="parent", sequence_type="seqtype_1", parent=Parent(id="grandparent", sequence_type="seqtype_1"), ), ), "seqtype_3", True, ), ], ) def test_first_ancestor_of_type_error(self, sequence, sequence_type, include_self): with pytest.raises(NoSuchAncestorException): sequence.first_ancestor_of_type(sequence_type, include_self=include_self) @pytest.mark.parametrize( "sequence,sequence_type,include_self,expected", [ ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, True, ), ( Sequence("A", Alphabet.NT_STRICT, id="self"), "seqtype_1", True, False, ), ( Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"), "seqtype_1", True, False, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_2", parent=Parent( id="parent", sequence_type="seqtype_1", parent=Parent(id="grandparent", sequence_type="seqtype_1"), ), ), "seqtype_3", True, False, ), ], ) def test_has_ancestor_of_type(self, sequence, sequence_type, include_self, expected): assert sequence.has_ancestor_of_type( sequence_type, include_self=include_self) is expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("ATGCATATTTGGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nATGCATATTT\nGGAAACCAA"), (Sequence("ATGCATATTTGGAAACCAA", Alphabet.NT_STRICT), ">None\nATGCATATTT\nGGAAACCAA"), (Sequence("GGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nGGAAACCAA"), ( Sequence("ATGCATATTTGGAAACCAAGGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nATGCATATTT\nGGAAACCAAG\nGAAACCAA", ), ( Sequence( data="AAAAAAA", alphabet=Alphabet.NT_STRICT, id="test", parent=Parent( location=SingleInterval(33, 40, Strand.MINUS)), ), ">test\nAAAAAAA", ), ], ) def test_to_fasta(self, sequence, expected): s_fa = sequence.to_fasta(num_chars=10) assert s_fa == expected def test_empty_to_fasta(self): s = Sequence("", Alphabet.NT_STRICT) with pytest.raises(EmptySequenceFastaError): s.to_fasta()
def _(obj) -> Parent: return Parent(sequence=obj)
def test_location(self): assert Parent(location=SingleInterval(0, 1, Strand.PLUS)).location == SingleInterval(0, 1, Strand.PLUS)
def test_sequence(self): assert Parent(sequence=Sequence("A", Alphabet.NT_STRICT)).sequence == Sequence("A", Alphabet.NT_STRICT)
def test_require_parent_has_parent(self): with pytest.raises(NullParentException): ObjectValidation.require_parent_has_parent(Parent(id="parent")) ObjectValidation.require_parent_has_parent( Parent(id="parent", parent="grandparent"))