Exemplo n.º 1
0
def test_initialization():

    import pytest
    from pydna.dseq import Dseq

    obj = Dseq("a", "t", 0)
    assert obj * 3 == Dseq("aaa", "ttt", 0)
    assert not obj == 123
    assert obj * 0 == Dseq("")

    with pytest.raises(TypeError):
        obj * 2.3

    assert obj.seguid() == "bc1M4j2I4u6VaLpUbAB8Y9kTHBs"

    assert obj == Dseq("a", "t", circular=False, linear=True)

    with pytest.raises(ValueError):
        Dseq("a", ovhg=0)

    with pytest.raises(ValueError):
        Dseq("ttt", "tt")

    with pytest.raises(ValueError):
        Dseq("ttt", "aa")

    obj2 = Dseq("gata")

    assert obj2.linear == True
    assert obj2.circular == False

    l = Dseq("gt")
    c = l.looped()

    assert l.linear
    assert not l.circular
    assert c.circular
    assert not c.linear

    assert Dseq("gt", linear=None, circular=None) == l
    assert Dseq("gt", linear=None, circular=False) == l
    assert Dseq("gt", linear=None, circular=True) == c
    assert Dseq("gt", linear=False, circular=None) == c
    assert Dseq("gt", linear=False, circular=False) == l
    assert Dseq("gt", linear=False, circular=True) == c
    assert Dseq("gt", linear=True, circular=None) == l
    assert Dseq("gt", linear=True, circular=False) == l
    assert Dseq("gt", linear=True, circular=True) == l

    assert Dseq.from_string("A") == Dseq("A") == Dseq("A", linear=True)
    assert (
        Dseq.from_string("A", linear=False, circular=True)
        == Dseq("A", circular=True)
        == Dseq("A", linear=False)
    )
Exemplo n.º 2
0
def makeDseqFromDF(part, partslist, col="part"):
    """looks up the part named "part" in the column specified as col, and
    converts it into a pydna object."""
    pseq = partslist[partslist[col] == part].sequence.iloc[0].lower()
    pcirc = partslist[partslist[col] == part].circular.iloc[0]
    p5pover = int(partslist[partslist[col] == part]["5pend"].iloc[0])
    p3pover = int(partslist[partslist[col] == part]["3pend"].iloc[0])

    povhg = int(p5pover)
    pseqRC = str(Dseq(pseq).rc()).lower()
    if (p5pover > 0):
        pseq = pseq[p5pover:]
    elif (p5pover < 0):
        pseqRC = pseqRC[:p5pover]
    if (p3pover < 0):
        pseq = pseq[:p3pover]
    elif (p3pover > 0):
        pseqRC = pseqRC[p5pover:]
    pDseq = Dseq(pseq, pseqRC, ovhg=povhg)
    #this defines a dsdna linear sequence
    if (pcirc):
        #this makes the sequence circular, if we have to
        pDseq = pDseq.looped()
    return pDseq
Exemplo n.º 3
0
def test_dseq():

    import textwrap
    from pydna.dseq import Dseq

    obj1 = Dseq("a", "t", circular=True)
    obj2 = Dseq("a", "t")

    with pytest.raises(TypeError):
        obj1 + obj2

    with pytest.raises(TypeError):
        obj2 + obj1

    with pytest.raises(TypeError):
        obj1 + ""

    with pytest.raises(AttributeError):
        obj2 + ""

    obj1 = Dseq("at", "t")
    obj2 = Dseq("a", "t")

    with pytest.raises(TypeError):
        obj1 + obj2

    obj = Dseq("aaa", "ttt", circular=True)
    assert obj[1:2] == Dseq("a", "t", 0)

    assert obj[:] == Dseq("aaa", "ttt", circular=False)

    obj = Dseq("atg", "cat", 0, circular=False)

    assert obj[1:2]._data == "atg"[1:2]

    assert obj[2:1]._data == "atg"[2:1]

    assert obj.reverse_complement() == obj.rc() == Dseq("cat", "atg", 0)

    obj = Dseq("atg", "cat", circular=True)

    assert obj.looped() == obj

    assert obj[:] == Dseq("atg", "cat", 0, circular=False)

    assert obj[1:2]._data == "atg"[1:2]

    assert obj[2:1]._data == "ga"

    obj = Dseq("G", "", 0)
    assert obj.five_prime_end() == ("5'", "g")
    obj = Dseq("", "C", 0)
    assert obj.five_prime_end() == ("3'", "c")

    obj = Dseq("ccGGATCC", "aaggatcc", -2)
    assert obj._data == "ccGGATCCtt"
    assert str(obj.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
    ccGGATCC
      cctaggaa
    """
    ).strip()
    assert repr(obj) == rpr

    assert obj[3] == Dseq("G", "c", 0)

    assert obj.fill_in() == Dseq("ccGGATCCtt", "aaggatccgg", 0)

    assert obj + Dseq("") == obj
    assert Dseq("") + obj == obj

    obj = Dseq("gatcAAAAAA", "gatcTTTTTT")
    assert obj.fill_in("gatc") == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc")
    assert obj.fill_in("atc") == obj
    assert obj.fill_in("ac") == obj
    assert obj.fill_in("at") == obj

    obj = Dseq("AAAAAAgatc", "TTTTTTgatc")
    assert obj.fill_in("gatc") == obj
    assert obj.fill_in("atc") == obj
    assert obj.fill_in("ac") == obj
    assert obj.fill_in("at") == obj

    obj = Dseq("gatcAAAAAA", "gatcTTTTTT")
    assert obj.t4() == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc")

    assert obj.t4("at") == obj
    assert obj.t4("atg") == Dseq("gatcAAAAAAgat", "gatcTTTTTTgat")
    assert obj.t4("atgc") == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc")
    assert obj.mung() == Dseq("AAAAAA", "TTTTTT")

    obj = Dseq("AAAAAAgatc", "TTTTTTgatc")
    assert obj.t4() == obj.t4("at") == Dseq("AAAAAA")
    assert obj.t4("atc") == obj.t4("atg") == obj.t4("atcg") == Dseq("AAAAAA")

    assert Dseq("GGATCC", "GGATCC").t4() == Dseq("GGATCC", "GGATCC")
    assert Dseq("GGATCCa", "GGATCC").t4() == Dseq("GGATCC", "GGATCC")
    assert Dseq("aGGATCC", "GGATCC").t4() == Dseq("aGGATCC", "GGATCCt")
    assert Dseq("aGGATCCa", "GGATCC").t4() == Dseq("aGGATCC", "GGATCCt")
    assert Dseq("GGATCC", "aGGATCC").t4() == Dseq("GGATCCt", "aGGATCC")
    assert Dseq("GGATCC", "GGATCCa").t4() == Dseq("GGATCC", "GGATCC")
    assert Dseq("GGATCC", "aGGATCCa").t4() == Dseq("GGATCCt", "aGGATCC")

    assert Dseq("GGATCC", "ATCC").t4("g") == Dseq("gg", "", ovhg=0)
    assert Dseq("GGATCC", "GGATCC").t4("gat") == Dseq("ggat", "ggat", ovhg=-2)

    a2 = Dseq("ccGGATCCaa", "ggatcc", -2)
    assert a2._data == "ccGGATCCaa"
    assert a2._data == "ccGGATCCaa"
    assert str(a2.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
    ccGGATCCaa
      cctagg
    """
    ).strip()
    assert repr(a2) == rpr

    a3 = Dseq("ccGGATCC", "ggatcc", -2)
    assert a3._data == "ccGGATCC"
    assert a3._data == "ccGGATCC"
    assert str(a3.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-8)
    ccGGATCC
      cctagg
    """
    ).strip()
    assert repr(a3) == rpr

    b = Dseq("GGATCC", "aaggatcccc", 2)
    assert b._data == "ggGGATCCtt"
    assert b._data == "ggGGATCCtt"
    assert str(b.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
      GGATCC
    cccctaggaa
    """
    ).strip()
    assert repr(b) == rpr

    b2 = Dseq("GGATCCaa", "ggatcccc", 2)
    assert b2._data == "ggGGATCCaa"
    assert b2._data == "ggGGATCCaa"
    assert str(b2.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
      GGATCCaa
    cccctagg
    """
    ).strip()
    assert repr(b2) == rpr

    assert b2.seguid() == "hPNrcQ0sluXyfu4XuUh1trsnygc"

    b3 = Dseq("GGATCC", "ggatcccc", 2)
    assert b3._data == "ggGGATCC"
    assert b3._data == "ggGGATCC"
    assert str(b3.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-8)
      GGATCC
    cccctagg
    """
    ).strip()
    assert repr(b3) == rpr

    c = Dseq("GGATCCaaa", "ggatcc", 0)
    assert c._data == "GGATCCaaa"
    assert c._data == "GGATCCaaa"
    assert str(c.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-9)
    GGATCCaaa
    cctagg
    """
    ).strip()
    assert repr(c) == rpr

    d = Dseq("GGATCC", "aaaggatcc", 0)
    assert d._data == "GGATCCttt"
    assert d._data == "GGATCCttt"
    assert str(d.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-9)
    GGATCC
    cctaggaaa
    """
    ).strip()
    assert repr(d) == rpr

    obj = Dseq("GGATCCaaa", "ggatcc", 0)
    from Bio.Restriction import BamHI

    frag1 = Dseq("G", "gatcc", 0)
    frag2 = Dseq("GATCCaaa", "g", -4)

    assert obj.cut(BamHI) == (frag1, frag2)

    assert frag1 + frag2 == obj

    obj.seguid() == "HtK7-_BmOJw0BmtYE8f1yGdHc0c"

    assert frag1.seguid() == "yJkorWG5V2etvSLp6E6QNK-KMlQ"
    assert frag2.seguid() == "Aw3buI-N85OztBZAzeGJvXGlwO8"

    obj = Dseq("tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta")
    assert (
        repr(obj)
        == "Dseq(-30)\ntagcgtagctgtagtatgtgatctggtcta\natcgcatcgacatcatacactagaccagat"
    )

    obj2 = Dseq("tagcgtagctgtagtatgtgatctggtcta")

    obj3 = obj = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", 0
    )

    assert obj == obj2 == obj3

    assert obj.find("ggatcc") == -1

    assert obj.find("tgtagta") == 9

    obj = Dseq("tagcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta")

    obj = Dseq("tagcgtagctgtagtatgtgatctggtctaa", "CCCttagaccagatcacatactacagctacgcta")

    assert repr(obj) == "Dseq(-34)\ntagc..ctaa   \natcg..gattCCC"

    obj = Dseq("tagcgtagctgtagtatgtgatctggtctaaCCC", "ttagaccagatcacatactacagctacgcta")

    assert repr(obj) == "Dseq(-34)\ntagc..ctaaCCC\natcg..gatt   "

    obj = Dseq("agcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta")
    assert repr(obj) == "Dseq(-31)\n agcg..ctaa\natcgc..gatt"

    obj = Dseq("Atagcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta")
    assert repr(obj) == "Dseq(-32)\nAtagc..ctaa\n atcg..gatt"

    obj = Dseq(
        "tagcgtagctgtagtatgtgatctggtctaa", "tatcgcatcgacatcatacactagaccagatt"[::-1]
    )

    assert repr(obj) == "Dseq(-32)\n tagc..ctaa\ntatcg..gatt"

    assert round(obj.mw(), 1) == 19535.6

    obj1 = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta",
        "tagaccagatcacatactacagctacgcta",
        circular=True,
        linear=False,
    )
    obj2 = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta",
        "tagaccagatcacatactacagctacgcta",
        circular=True,
    )
    obj3 = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", linear=False
    )

    assert obj1 == obj2 == obj3

    assert obj1.find("ggatcc") == -1

    assert obj1.find("tgtagta") == 9

    assert (
        Dseq(
            "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta"
        ).looped()
        == obj1
    )

    from Bio.Restriction import BglII, BamHI

    obj = Dseq("ggatcc")

    assert BglII in obj.no_cutters()
    assert BamHI not in obj.no_cutters()

    assert BamHI in obj.unique_cutters()

    assert BamHI in obj.once_cutters()

    assert BamHI in (obj + obj).twice_cutters()
    assert BamHI not in obj.twice_cutters()

    assert BamHI in obj.n_cutters(1)
    assert BamHI in obj.cutters()

    from Bio.Restriction import RestrictionBatch

    rb = RestrictionBatch((BamHI, BglII))

    assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI)

    obj = Dseq("ggatccAGATCT")

    assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI)

    obj = Dseq("AGATCTggatcc")

    assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI)

    obj = Dseq("ggatccAGATCT", circular=True)

    assert obj.cut(rb) == obj.cut(BamHI, BglII) != obj.cut(BglII, BamHI)

    obj = Dseq("AGATCTggatcc", circular=True)

    assert obj.cut(rb) == obj.cut(BglII, BamHI) != obj.cut(BamHI, BglII)