Пример #1
0
def test_EnforceRegionsCompatibility():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(123)

    def compatibility_condition(location1, location2, problem):
        seq1 = location1.extract_sequence(problem.sequence)
        seq2 = location2.extract_sequence(problem.sequence)
        return sequences_differences(seq1, seq2) >= 2

    locations = [(0, 4), (50, 54), (100, 104), (150, 154)]
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(200, seed=123),
        constraints=[
            EnforceRegionsCompatibility(
                locations=locations,
                compatibility_condition=compatibility_condition,
                condition_label="2bp difference",
            ),
            EnforceGCContent(mini=0.4, maxi=0.6, window=40),
        ],
        logger=None,
    )
    assert not any([e.passes for e in problem.constraints_evaluations()])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    seq = problem.sequence
    assert [
        sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2
        for (s1, e1), (s2, e2) in itertools.combinations(locations, 2)
    ]
Пример #2
0
def test_avoid_hairpins_on_extremities():
    # see https://github.com/Edinburgh-Genome-Foundry/DnaChisel/issues/37
    problem = DnaOptimizationProblem(
        sequence="attcaatgggggggggggggggggggggggggtagccta",
        constraints=[AvoidHairpins(stem_size=3, hairpin_window=8)])
    evaluation = problem.constraints_evaluations().evaluations[0]
    assert str(evaluation.locations) == "[0-6, 32-39]"
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Пример #3
0
def test_AvoidPattern_with_jaspar_motifs():
    stringio = StringIO(JASPAR_CONTENT)
    motif_patterns = MotifPssmPattern.list_from_file(stringio,
                                                     file_format="jaspar",
                                                     relative_threshold=0.9)
    problem = DnaOptimizationProblem(
        sequence="GGGGGGGGGGTGCGTGATTAAAGGGGG",
        constraints=[AvoidPattern(p) for p in motif_patterns],
    )
    assert 2 == len(problem.constraints_evaluations().all_locations())
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Пример #4
0
def test_avoid_matches_with_phage():
    PHAGE_TAXID = "697289"
    collection = GenomeCollection()
    index = collection.get_taxid_bowtie_index_path(PHAGE_TAXID, version="1")
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(30, seed=123),
        constraints=[AvoidMatches(bowtie_index=index, match_length=10)],
        logger=None,
    )
    all_breaches = problem.constraints_evaluations().all_locations()
    assert len(all_breaches) == 5
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Пример #5
0
 def compute_forbidden_patterns_locations(self, record):
     """Return an array where ``arr[i] == 1`` means that i is surrounded by
     a user-forbidden pattern."""
     pattern_constraints = [
         AvoidPattern(homopolymer_pattern(c, 5)) for c in 'ATGC'
     ]
     kmer_constraints = [
         AvoidPattern(repeated_kmers(k, n))
         for k, n in [(4, 2), (3, 3), (2, 4)]
     ]
     problem = DnaOptimizationProblem(sequence=record,
                                      constraints=pattern_constraints +
                                      kmer_constraints)
     constraints_breaches = group_overlapping_segments([
         (f.location.start, f.location.end)
         for ev in problem.constraints_evaluations()
         for f in ev.locations_to_biopython_features() if not ev.passes
     ])
     return segments_to_array(constraints_breaches, len(record))