示例#1
0
def test_avoid_repeated_small_kmers():
    problem = DnaOptimizationProblem(
        sequence="AGAAGAAGAAGAAGAAGATTTTTTTTTTTTTGGAGGAGGAGGACCCCCCCCCCCCGAGG",
        constraints=[AvoidPattern(RepeatedKmerPattern(3, 3))])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_AvoidStopCodons():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(sequence="ATTGCCATCTAA",
                                     constraints=[AvoidStopCodons()])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_AvoidNonUniqueSegments_from_polyAs():
    problem = DnaOptimizationProblem(
        sequence= 40 * "A",
        constraints=[AvoidNonUniqueSegments(3, location=(10, 30))]
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#4
0
def test_avoid_pattern_basics():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(10000,
                                                                  seed=123),
                                     constraints=[AvoidPattern(enzyme="BsaI")])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#5
0
def test_AvoidNonuniqueSegments_as_constraint():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[AvoidNonuniqueSegments(8)])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#6
0
def test_avoid_pattern_overlapping_locations():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence="AGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG",
        constraints=[AvoidPattern("NAN")])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert "A" not in problem.sequence[1:-1]
示例#7
0
def test_UniquifyAllKmers_as_constraint():
    numpy.random.seed(123)
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[UniquifyAllKmers(8)],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#8
0
def test_UniquifyAllKmers_from_polyAs():
    problem = DnaOptimizationProblem(
        sequence=40 * "A",
        constraints=[UniquifyAllKmers(3, location=(10, 30))],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_EnforceTranlation():
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[AvoidPattern("AAA"), EnforceTranslation()],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#10
0
def test_UniquifyAllKmers_from_polyAs_uncached():
    """Uncaching actually calls another function get_kmer_extractor."""
    constraint = UniquifyAllKmers(3, location=(10, 30))
    constraint.use_cache = False
    problem = DnaOptimizationProblem(sequence=40 * "A",
                                     constraints=[constraint],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#11
0
def test_AvoidStopCodons():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence="".join(["ATT", "TAG", "GCC", "TGA", "ATC", "TAA"]),
        constraints=[AvoidStopCodons()],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    assert "*" not in translate(problem.sequence)
示例#12
0
def test_basics():
    numpy.random.seed(123)
    probas = {'A': 0.2, 'T': 0.2, 'G': 0.3, 'C': 0.3}
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, probas=probas, seed=123),
        constraints=[
            AvoidPattern(enzyme="BsaI"),
            EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50)
        ])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#13
0
def test_codon_optimize_as_hard_constraint():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(2000, seed=123),
        constraints=[
            EnforceTranslation(location=Location(1000, 1300)),
            CodonOptimize(location=Location(1000, 1300), species='e_coli')
        ]
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_AvoidPattern_with_regular_expression():
    sequence = ("ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTG"
                "GTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGCGCGGC"
                "GAGGGCGAGGGCGATGCCACCAACGGCAAGCTGACCCTGAAGTTCATC")
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation(),
                     AvoidPattern(r"GGT(.*)GAT")],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_avoid_blast_matches():
    avoided_seqs = [
        "GTCCTCATGCGAAAGCTACGATCGCCAACCCTGT",
        "ACCCACCTCGTTACGTCCACGGCACGAGGAATGATCTCGAGTTGCTTT"
    ]
    constraint = AvoidBlastMatches(sequences=avoided_seqs, min_align_length=8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[constraint])
    assert not problem.all_constraints_pass()
    cst_eval = constraint.evaluate(problem)
    assert len(cst_eval.locations) == 10
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_EnforceTranlationReversed():
    numpy.random.seed(1234)
    sequence = reverse_translate(random_protein_sequence(50, seed=123))
    rev_sequence = reverse_complement(sequence)
    problem = DnaOptimizationProblem(
        sequence=rev_sequence,
        constraints=[
            AvoidPattern("AGC"),
            EnforceTranslation(location=(0, len(sequence), -1))
        ],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#17
0
def test_EnforceGCContents():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[
            AvoidPattern(enzyme="BsaI"),
            EnforceGCContent(mini=0.3, maxi=0.7, window=50)
        ],
        objectives=[EnforceGCContent(target=0.4)]
    )

    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#18
0
def test_basics():
    numpy.random.seed(123)
    probas = {"A": 0.2, "T": 0.2, "G": 0.3, "C": 0.3}
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, probas=probas, seed=123),
        constraints=[
            AvoidPattern("BsaI_site"),
            EnforceTerminalGCContent(mini=0.2, maxi=0.4, window_size=50),
        ],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_avoid_phage_blast_matches():
    PHAGE_TAXID = "697289"
    collection = GenomeCollection()
    blastdb = collection.get_taxid_blastdb_path(PHAGE_TAXID, db_type="nucl")
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(30,
                                                                  seed=123),
                                     constraints=[
                                         AvoidBlastMatches(blast_db=blastdb,
                                                           min_align_length=10,
                                                           word_size=7)
                                     ],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#20
0
def test_AvoidChanges_with_max_edits():
    numpy.random.seed(1)
    problem = DnaOptimizationProblem(
        sequence="ATATATATATA",
        constraints=[
            AvoidChanges(max_edits=2),
            AvoidPattern("ATATA"),
            EnforcePatternOccurence("A", occurences=6, location=(0, 11, 1)),
            EnforcePatternOccurence("T", occurences=4, location=(0, 11, 1)),
        ],
        logger=None,
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#21
0
def test_EnforceRegionsCompatibility():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(123)

    def compatibility_condition(location1, location2, problem):
        seq1 = location1.extract_sequence(problem.sequence)
        seq2 = location2.extract_sequence(problem.sequence)
        return sequences_differences(seq1, seq2) >= 2

    locations = [(0, 4), (50, 54), (100, 104), (150, 154)]
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(200, seed=123),
        constraints=[
            EnforceRegionsCompatibility(
                locations=locations,
                compatibility_condition=compatibility_condition,
                condition_label="2bp difference",
            ),
            EnforceGCContent(mini=0.4, maxi=0.6, window=40),
        ],
        logger=None,
    )
    assert not any([e.passes for e in problem.constraints_evaluations()])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    seq = problem.sequence
    assert [
        sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2
        for (s1, e1), (s2, e2) in itertools.combinations(locations, 2)
    ]
示例#22
0
    def verify_constraints(self, sequence):
        """Return True iff `sequence` passes all `self.sequence_constraints`

        Will automatically process DNA-Chisel constraints that would be in
        `self.sequence_constraints`

        """
        constraints = self.sequence_constraints
        if not hasattr(self, "dnachisel_constraints"):
            self.dnachisel_constraints = [
                constraint
                for constraint in self.sequence_constraints
                if isinstance(constraint, Specification)
            ]

        if self.dnachisel_constraints != []:
            if not DNACHISEL_AVAILABLE:
                raise ImportError(
                    "Spotted DNA Chisel constraints, while "
                    "DNA Chisel is not installed."
                )
            # We provide an empty mutation space so it won't be recomputed
            # (which would take time and is useless here!)
            problem = DnaOptimizationProblem(
                sequence, self.dnachisel_constraints, mutation_space=[]
            )
            constraints = [
                constraint
                for constraint in constraints
                if not isinstance(constraint, Specification)
            ] + [lambda seq: problem.all_constraints_pass()]

        return all(constraint(sequence) for constraint in constraints)
示例#23
0
def test_avoid_hairpins_on_extremities():
    # see https://github.com/Edinburgh-Genome-Foundry/DnaChisel/issues/37
    problem = DnaOptimizationProblem(
        sequence="attcaatgggggggggggggggggggggggggtagccta",
        constraints=[AvoidHairpins(stem_size=3, hairpin_window=8)])
    evaluation = problem.constraints_evaluations().evaluations[0]
    assert str(evaluation.locations) == "[0-6, 32-39]"
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#24
0
def test_random_compatible_dna_sequence():
    constraints = [
        EnforceGCContent(mini=0.4, maxi=0.6, window=50),
        AvoidPattern('ATC')
    ]
    seq = random_compatible_dna_sequence(1000, constraints=constraints)
    problem = DnaOptimizationProblem(sequence=seq, constraints=constraints)
    assert ("ATC" not in seq)
    assert problem.all_constraints_pass()
示例#25
0
def test_avoid_hairpin_basics():
    numpy.random.seed(123)
    random_sequences = [random_dna_sequence(30) for i in range(10)]

    full_sequence = "".join([
        seq
        for sequence in random_sequences
        for seq in (random_dna_sequence(50),
                    sequence,
                    random_dna_sequence(50),
                    reverse_complement(sequence),
                    random_dna_sequence(50))
    ])

    problem = DnaOptimizationProblem(full_sequence,
                                     constraints=[AvoidHairpins()])
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#26
0
def test_AvoidRareCodons_as_constraint():
    numpy.random.seed(123)

    sequence = "ATG" "TTT" "ATA" "CCA" "CTT" "TAG"
    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation(),
                     AvoidRareCodons(0.11, "e_coli")],
    )
    assert problem.all_constraints_pass()
    assert problem.sequence_edits_as_array().sum() == 4
    assert translate(problem.sequence) == translate(sequence)
示例#27
0
def test_AvoidPattern_with_jaspar_motifs():
    stringio = StringIO(JASPAR_CONTENT)
    motif_patterns = MotifPssmPattern.list_from_file(stringio,
                                                     file_format="jaspar",
                                                     relative_threshold=0.9)
    problem = DnaOptimizationProblem(
        sequence="GGGGGGGGGGTGCGTGATTAAAGGGGG",
        constraints=[AvoidPattern(p) for p in motif_patterns],
    )
    assert 2 == len(problem.constraints_evaluations().all_locations())
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#28
0
def test_EnforceSequence():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(1234)
    for symbol, nucleotides in [("W", "AT"), ("S", "GC")]:
        n_nucleotides = 15
        start = 50
        location = (start, start + n_nucleotides)
        problem = DnaOptimizationProblem(
            sequence=25 * "ATGC",
            constraints=[
                AvoidPattern("ATGC"),
                AvoidPattern("AAA"),
                AvoidPattern("GGG"),
                EnforceSequence(n_nucleotides * symbol, location=location),
            ],
        )
        problem.max_random_iters = 10000
        problem.resolve_constraints()
        s, e = start, start + n_nucleotides
        assert all([n in nucleotides for n in problem.sequence[s:e]])

    # Test -1 strand:
    seq = "ATG" + "CAG" + "AGCAAGGTGCTGCT"
    problem = DnaOptimizationProblem(
        sequence=seq,
        constraints=[
            EnforcePatternOccurence(
                pattern="CTG",  # CAG on strand +1
                occurences=2,
                strand=-1,
                location=Location(start=0, end=50),
            )
        ],
    )
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#29
0
def test_AllowPrimer():
    primers = ["ATTGCGCCAAACT", "TAATCCACCCTAAT", "ATTCACACTTCAA"]
    problem = DnaOptimizationProblem(sequence=40 * "A",
                                     constraints=[
                                         AllowPrimer(
                                             tmin=50,
                                             tmax=60,
                                             max_homology_length=5,
                                             location=(10, 30),
                                             avoid_heterodim_with=primers)
                                     ])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
示例#30
0
def test_enforce_pattern_basics():
    numpy.random.seed(123)
    for seed in [2, 3, 123456]:
        # The seeds cover various cases:
        # 2: the problem has no occurences instead of 1 wanted
        # 3: the pattern has no occurences instead of 1 wanted
        # 123456: the pattern is over-represented (4 times instead of 1)
        sequence = random_dna_sequence(5000, seed=seed)

        constraints = [
            EnforceTranslation(location=Location(1000, 2500)),
            EnforceTranslation(location=Location(3000, 4500)),
            EnforcePatternOccurence("ANANANANTT",
                                    location=Location(1100, 2150)),
        ]

        problem = DnaOptimizationProblem(sequence=sequence,
                                         constraints=constraints,
                                         logger=None)
        assert not problem.all_constraints_pass()
        problem.resolve_constraints()
        assert problem.all_constraints_pass()