Пример #1
0
def load_user_options(args, location):

	assert(isinstance(location, Location))
	#set enforce translation to the whole thing
	constraints = []
	objectives = []

	if args.harmonized:
		opt_mode = 'harmonized'
	else:
		opt_mode = 'best_codon'
	objectives += [
		CodonOptimize(species=args.species, location=location, mode=opt_mode)
	]
	constraints += [
		EnforceTranslation(location=location)
	]

	if args.avoid_homopolymers:
		constraints += [
		AvoidPattern(HomopolymerPattern("A",args.avoid_homopolymers),location=location),
		AvoidPattern(HomopolymerPattern("T",args.avoid_homopolymers),location=location),
		AvoidPattern(HomopolymerPattern("G",args.avoid_homopolymers),location=location),
		AvoidPattern(HomopolymerPattern("C",args.avoid_homopolymers),location=location)]

	if args.avoid_hairpins:
		constraints += [AvoidHairpins(location=location)]

	if args.avoid_patterns:
		constraints += [AvoidPattern(pattern,location=location) for pattern in args.avoid_patterns]

	#NOTE! Printing this to a template is broken
	if args.avoid_restriction_sites:
		constraints += [AvoidPattern(EnzymeSitePattern(enzy),location=location) for enzy in args.avoid_restriction_sites]

	if args.constrain_global_GC_content:
		constraints += [EnforceGCContent(mini=args.global_GC_content_min, maxi=args.global_GC_content_max, location=location)]

	if args.constrain_local_GC_content:
		constraints += [EnforceGCContent(mini=args.local_GC_content_min, maxi=args.global_GC_content_max, window=args.local_GC_content_window, location=location)]

	if args.constrain_terminal_GC_content:
		constraints += [EnforceTerminalGCContent(mini=args.terminal_GC_content_min, maxi=args.terminal_GC_content_max, window_size=8, location=location)]

	if args.constrain_CAI:
		constraints += [ConstrainCAI(species=args.species, minimum=args.constrain_CAI_minimum, location=location)]

	if args.optimize_dicodon_frequency:
		objectives += [MaximizeDicodonAdaptiveIndex()]

	if args.kmers:
		objectives += [MinimizeKmerScore(k=args.kmers, boost=args.avoid_kmers_boost, location=location)]

	if args.avoid_secondary_structure:
		objectives += [MinimizeSecondaryStructure(max_energy=args.avoid_secondary_structure_max_e, location=location, boost=args.avoid_secondary_structure_boost)]

	if args.avoid_initiator_secondary_structure:
		objectives += [MinimizeSecondaryStructure(max_energy=args.avoid_initiator_secondary_structure_max_e, location=location, optimize_initiator=True, boost=args.avoid_initiator_secondary_structure_boost)]

	return objectives, constraints
Пример #2
0
def test_EnforceGCContents():
    numpy.random.seed(123)
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(10000, seed=123),
        constraints=[
            AvoidPattern(enzyme="BsaI"),
            EnforceGCContent(mini=0.3, maxi=0.7, window=50)
        ],
        objectives=[EnforceGCContent(target=0.4)]
    )

    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Пример #3
0
 def optimize(self, codon_table):
     self.optimize_frequent(codon_table)
     # return
     opt_codons = self.__vaccine_codons_gen.copy()
     self.__vaccine_codons_gen.clear()
     vac_strand = self.get_strand(opt_codons)
     #vir_strand = self.get_strand(self.__virus_codons)
     codon_table = pct.get_codons_table(codon_table)
     problem = DnaOptimizationProblem(
         sequence=vac_strand,
         constraints=[
             EnforceTranslation(genetic_table='Standard',
                                start_codon='ATG'),
             EnforceGCContent(mini=0.54, maxi=0.9, window=120)
         ],
         objectives=[
             CodonOptimize(method="use_best_codon",
                           codon_usage_table=codon_table)
         ]
     )
     problem.resolve_constraints()
     problem.optimize()
     self.__vaccine_codons_gen = []
     count = 1
     vcodon = ""
     for x in problem.sequence:
         if count % 3 == 0:
             vcodon += x
             self.__vaccine_codons_gen.append(vcodon)
             vcodon = ""
         else:
             vcodon += x
         count += 1
     return
Пример #4
0
def test_EnforceRegionsCompatibility():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(123)

    def compatibility_condition(location1, location2, problem):
        seq1 = location1.extract_sequence(problem.sequence)
        seq2 = location2.extract_sequence(problem.sequence)
        return sequences_differences(seq1, seq2) >= 2

    locations = [(0, 4), (50, 54), (100, 104), (150, 154)]
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(200, seed=123),
        constraints=[
            EnforceRegionsCompatibility(
                locations=locations,
                compatibility_condition=compatibility_condition,
                condition_label="2bp difference",
            ),
            EnforceGCContent(mini=0.4, maxi=0.6, window=40),
        ],
        logger=None,
    )
    assert not any([e.passes for e in problem.constraints_evaluations()])
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
    seq = problem.sequence
    assert [
        sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2
        for (s1, e1), (s2, e2) in itertools.combinations(locations, 2)
    ]
Пример #5
0
def test_optimization_2():
    sequence_path = os.path.join("tests", "data",
                                 "test_optimization_sequence_2.fa")
    sequence = str(load_record(sequence_path).seq)[:5500]

    deluxe_dna = CommercialDnaOffer(
        name="DeluxeDNA.com",
        sequence_constraints=[SequenceLengthConstraint(max_length=4000)],
        pricing=PerBasepairPricing(0.20),
        lead_time=10,
    )

    cheap_dna = CommercialDnaOffer(
        name="CheapDNA.com",
        sequence_constraints=[
            NoPatternConstraint(enzyme="BsaI"),
            EnforceGCContent(0.3, 0.7, window=60),
        ],
        pricing=PerBasepairPricing(0.10),
        lead_time=15,
    )

    # BLOCKS TO CHUNKS ASSEMBLY

    gibson_blocks_assembly_station = DnaAssemblyStation(
        name="Gibson Blocks Assembly",
        assembly_method=GibsonAssemblyMethod(
            overhang_selector=FixedSizeSegmentSelector(10),
            min_segment_length=1000,
            max_segment_length=6000,
            duration=8,
            cost=16,
        ),
        supplier=[deluxe_dna, cheap_dna],
        coarse_grain=30,
        fine_grain=False,
        memoize=True,
        # a_star_factor="auto",
    )

    quote_before = gibson_blocks_assembly_station.get_quote(sequence)
    assert quote_before.price > 850

    objective = OptimizeManufacturability(gibson_blocks_assembly_station)

    problem = DnaOptimizationProblem(
        sequence=sequence,
        constraints=[EnforceTranslation(location=(0, 4998))],
        objectives=[objective],
    )

    problem.randomization_threshold = 0  # Forces "random search" mode
    problem.max_random_iters = 5
    problem.optimize()

    print("OPTIMIZATION DONE, GENERATING REPORT")

    quote_after = gibson_blocks_assembly_station.get_quote(problem.sequence)
    assert quote_after.price < 580
Пример #6
0
def test_constraints_text_summary():
    problem = DnaOptimizationProblem(sequence="ATTGCCATATGCGC",
                                     constraints=[
                                         EnforceGCContent(mini=0.4, maxi=0.6),
                                         AvoidPattern('ATT')
                                     ])
    text = problem.constraints_text_summary()
    assert 'FAILURE: 1 constraints evaluations failed' in text
Пример #7
0
def test_no_solution_error_random_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTTTTTTTTTTTTTTTTTTTTTTT",
        constraints=[AvoidChanges((0, 10)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Random search did not' in str(err.value)
Пример #8
0
def test_no_solution_error_exhaustive_search():
    problem = DnaOptimizationProblem(
        sequence="TTTTTTT",
        constraints=[AvoidChanges((0, 4)), EnforceGCContent(mini=0.8)]
    )
    with pytest.raises(NoSolutionError) as err:
        problem.resolve_constraints()
    assert 'Exhaustive search failed' in str(err.value)
Пример #9
0
def test_EnforceChoice():
    # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which
    # enzyme will be chosen and inserted in the sequence depends on the other
    # constraint on GC content
    numpy.random.seed(123)
    spec = EnforceChoice(choices=['BsmBI_site', 'EcoRI_site'], location=(2, 8))

    problem = DnaOptimizationProblem(
        sequence="AGCCCCCCGT", constraints=[spec,
                                            EnforceGCContent(maxi=0.3)])
    problem.resolve_constraints()
    assert 'GAATTC' in problem.sequence

    problem = DnaOptimizationProblem(
        sequence="AGCCCCCCGT", constraints=[spec,
                                            EnforceGCContent(mini=0.7)])
    problem.resolve_constraints()
    assert 'CGTCTC' in problem.sequence
Пример #10
0
def test_random_compatible_dna_sequence():
    constraints = [
        EnforceGCContent(mini=0.4, maxi=0.6, window=50),
        AvoidPattern('ATC')
    ]
    seq = random_compatible_dna_sequence(1000, constraints=constraints)
    problem = DnaOptimizationProblem(sequence=seq, constraints=constraints)
    assert ("ATC" not in seq)
    assert problem.all_constraints_pass()
Пример #11
0
def test_parameters_from_string():
    for pattern, expected in [
        ("35%", (None, None, 0.35, None)),
        ("35%/20bp", (None, None, 0.35, 20)),
        ("5-55%", (0.05, 0.55, None, None)),
        ("5-55%/400bp", (0.05, 0.55, None, 400)),
    ]:
        mini, maxi, target, w = EnforceGCContent.string_to_parameters(pattern)
        assert (mini, maxi, target, w) == expected
def test_avoid_change_as_objectives_basics():
    numpy.random.seed(123)
    results = []
    for boost in (0, 0.1, 0.2, 1):
        sequence = random_dna_sequence(1000, seed=123)
        problem = DnaOptimizationProblem(
            sequence=sequence,
            objectives=[
                EnforceGCContent(
                    mini=0.45, maxi=0.55,
                    window=80).copy_with_changes(locations_span=300),
                AvoidChanges(boost=boost).as_passive_objective()
            ])

        problem.optimize()
        differences = sequences_differences(problem.sequence,
                                            problem.sequence_before)
        results.append(differences)
    assert results[0] > 40
    assert (results[0] > results[1] > results[2] > results[3])
    assert results[-1] == 0
Пример #13
0
url = (
    "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
    + "db=nucleotide&id=48994873&rettype=gb&retmode=txt"
)
genbank_data = request.urlopen(url).read().decode("utf-8")
genbank_record = load_record(StringIO(genbank_data), file_format="genbank")

print("INITIALIZING THE PROBLEM WITH CONSTRAINTS FOR EACH GENE...")

constraints = []
for feature in genbank_record.features:
    if feature.type == "gene" and len(feature.location.parts) == 1:
        location = Location.from_biopython_location(feature.location)
        if (len(location) % 3 == 0) and len(location) > 100:
            gene_constraints = [
                EnforceTranslation(location = location),
                AvoidPattern("BsmBI_site", location),
                EnforceGCContent(
                    mini=0.40, maxi=0.60, window=150, location=location
                ),
            ]
            constraints.extend(gene_constraints)
problem = DnaOptimizationProblem(genbank_record, constraints)

print("RESOLVING THE CONSTRAINTS...")

problem.logger.ignore_bars_under = 50
problem.resolve_constraints()
problem.to_record("ecoli_genes_optimization.gb")
Пример #14
0
    random_protein_sequence,
    reverse_translate,
    CodonOptimize,
    EnforceTranslation,
    AvoidPattern,
    EnforceGCContent,
)

protein = random_protein_sequence(1000, seed=123)
sequence = reverse_translate(protein)
problem = DnaOptimizationProblem(
    sequence=sequence,
    constraints=[
        EnforceTranslation(),
        AvoidPattern("BsmBI_site"),
        EnforceGCContent(mini=0.4, maxi=0.6, window=60),
    ],
    objectives=[CodonOptimize(species="s_cerevisiae")],
)

print("\nBefore optimization:\n")
print(problem.constraints_text_summary())
print(problem.objectives_text_summary())

problem.resolve_constraints(final_check=True)
problem.optimize()

print("\nAfter optimization:\n")
print(problem.constraints_text_summary())
print(problem.objectives_text_summary())
Пример #15
0
    def from_specs(
        n_barcodes=96,
        barcode_length=20,
        spacer="AA",
        forbidden_enzymes=("BsaI", "BsmBI", "BbsI"),
        barcode_tmin=55,
        barcode_tmax=70,
        other_primer_sequences=(),
        heterodim_tmax=5,
        max_homology_length=10,
        include_spacers=True,
        names_template="B_%03d",
    ):
        """Return a BarcodesCollection object with compatible barcodes.

        Parameters
        ----------

        n_barcodes
          Number of barcodes to design.

        barcode_length
          Length of each barcode.

        spacer
          Spacer to place between each barcode during the optimization,
          ideally the same spacer that will be used when adding the barcode
          to a part.

        include_spacers
          Whether the spacers should be part of the final sequence of the
          barcodes (they still won't be considered part of the annealing
          primer and won't be used for melting temperature computations).

        forbidden_enzymes
          Name of enzymes whose sites should not be in the barcodes.

        barcode_tmin, barcode_tmax
          Interval of acceptable values for the melting temperature.

        other_primer_sequences
          External sequences with which the primers should not anneal.

        heterodim_tmax
          Max acceptable melting temperature for the annealing of a barcode
          and one of the other_primer_sequences.

        max_homology_length
          Maximal homology between any two barcodes in the sequence.

        names_template
          The template used to name barcode number "i".
        """
        unit_length = barcode_length + len(spacer)
        seq_len = n_barcodes * unit_length
        units_coordinates = [(i, i + unit_length)
                             for i in range(0, seq_len, unit_length)]
        constraints = [
            AvoidPattern(EnzymeSitePattern(enzyme))
            for enzyme in forbidden_enzymes
        ]
        for start, end in units_coordinates:
            constraints += [
                AllowPrimer(
                    tmin=barcode_tmin,
                    tmax=barcode_tmax,
                    max_homology_length=max_homology_length,
                    avoid_heterodim_with=None,
                    max_heterodim_tm=5,
                    location=(start, end - len(spacer)),
                ),
                EnforceSequence(spacer, location=(end - len(spacer), end)),
                EnforceGCContent(mini=0.4,
                                 maxi=0.6,
                                 location=(start, end - len(spacer))),
            ]
        problem = DnaOptimizationProblem(sequence=random_dna_sequence(seq_len),
                                         constraints=constraints)
        problem.logger.ignored_bars.add("location")
        problem.resolve_constraints()

        barcodes = [
            problem.sequence[start:end] for (start, end) in units_coordinates
        ]
        if not include_spacers:
            barcodes = [b[:-len(spacer)] for b in barcodes]
        names = [(names_template % (i + 1)) for i in range(len(barcodes))]
        return BarcodesCollection(zip(names, barcodes))
Пример #16
0
"""Example of use of the AvoidChanges as an objective to minimize modifications
of a sequence."""

from dnachisel import (DnaOptimizationProblem, random_dna_sequence,
                       AvoidPattern, AvoidChanges, sequences_differences,
                       EnforceGCContent)

# Note: we are not providing a location for AvoidChanges: it applies globally

for boost in (0, 0.1, 1, 10.0):
    sequence = random_dna_sequence(1000, seed=123)
    problem = DnaOptimizationProblem(
        sequence=sequence,
        objectives=[
            EnforceGCContent(mini=0.45, maxi=0.55, window=80),
            AvoidChanges(boost=boost).as_passive_objective()
        ])

    problem.optimize()
    differences = sequences_differences(problem.sequence,
                                        problem.sequence_before)

    print("%d nucleotides modified for boost=%.1f" % (differences, boost))
Пример #17
0
		for k2, v2 in v.items():
			if k2 in RSCU_list:
				codon_table_11[k][k2] = RSCU_list[k2]


	print("\nOptimizing codons for input gene list")
	#Read gene fasta sequence and initiate optimizer


	problem = DnaOptimizationProblem(
	sequence=gene,
	constraints=[
		EnforceTranslation(),
		AvoidPattern("BsmBI_site", "BamHI"),
		EnforceTranslation(),
		EnforceGCContent(mini=0.35, maxi=0.65, window=50), #TWIST: 25% and 65% GC
	],
	objectives=[CodonOptimize(codon_usage_table=codon_table_11)],
	)


if taxid and not input_path:
	print("\nOptimizing codons for taxonomic ID: " + taxid)
	#Read gene fasta sequence and initiate optimizer
	if not protein_flag:
		problem = DnaOptimizationProblem(
			sequence=gene,
			constraints=[
				#EnforceSequence(sequence = "ATG", location=(0, 2)),
				AvoidChanges(location=(0, 2)),
				AvoidPattern("BsmBI_site", "BamHI"),
Пример #18
0
    DnaOptimizationProblem,
    random_dna_sequence,
    EnforceGCContent,
    AvoidPattern,
    EnforceGCContent,
)
import numpy

# We setup the randomizer to always get the same sequence
numpy.random.seed(123)

problem = DnaOptimizationProblem(
    sequence=random_dna_sequence(10000),
    constraints=[
        AvoidPattern("BsaI_site"),
        EnforceGCContent(mini=0.3, maxi=0.7, window=50),
    ],
    objectives=[EnforceGCContent(target=0.4)],
)

print("\n\n=== Status before optimization ===")
print(problem.constraints_text_summary())
print(problem.objectives_text_summary())

print("Now solving constraints...")
problem.resolve_constraints()
print("Done. Now optimizing objectives...")
problem.max_random_iters = 10000
problem.optimize()

print("\n\n=== Status after optimization ===\n")
Пример #19
0
    def from_specs(
        n_barcodes=384,
        barcode_length=20,
        spacer="",
        forbidden_enzymes=("BsaI",),
        include_spacers=True,
        names_template="B_%03d",
    ):
        """Return a CustomBarcodesCollection object with compatible barcodes.


        **Parameters**

        **n_barcodes**
        > Number of barcodes to design.

        **barcode_length**
        > Length of each barcode.

        **spacer**
        > Spacer to place between each barcode during the optimization,
          ideally the same spacer that will be used when adding the barcode
          to a part.

        **include_spacers**
        > Whether the spacers should be part of the final sequence of the
          barcodes (they still won't be considered part of the annealing
          primer and won't be used for melting temperature computations).

        **forbidden_enzymes**
        > Name of enzymes whose sites should not be in the barcodes.

        **names_template**
        > The template used to name barcode number "i".
        """
        unit_length = barcode_length + len(spacer)
        seq_len = n_barcodes * unit_length
        units_coordinates = [
            (i, i + unit_length) for i in range(0, seq_len, unit_length)
        ]

        constraints = [
            AvoidPattern(EnzymeSitePattern(enzyme)) for enzyme in forbidden_enzymes
        ]
        constraints += [AvoidPattern(RepeatedKmerPattern(4, 1))]

        for start, end in units_coordinates:
            constraints += [
                UniquifyAllKmers(
                    barcode_length, reference=None, location=(end - len(spacer), end)
                ),
                EnforceGCContent(
                    mini=0.3, maxi=0.7, location=(start, end - len(spacer))
                ),
            ]
        problem = DnaOptimizationProblem(
            sequence=random_dna_sequence(seq_len), constraints=constraints
        )
        problem.logger.ignored_bars.add("location")
        problem.resolve_constraints()

        barcodes = [problem.sequence[start:end] for (start, end) in units_coordinates]
        if not include_spacers:
            barcodes = [b[: -len(spacer)] for b in barcodes]
        names = [(names_template % (i + 1)) for i in range(len(barcodes))]
        return CustomBarcodesCollection(zip(names, barcodes))
Пример #20
0
record = load_record(record_file, fmt="genbank")

CDS_list = [(int(f.location.start), int(f.location.end),
             int(f.location.strand)) for f in record.features
            if f.type == "CDS"]

# DEFINE CONSTRAINTS

dna_provider_constraints = [
    AvoidPattern("BsaI_site"),
    AvoidPattern("AarI_site"),
    AvoidPattern("9xA"),
    AvoidPattern("9xT"),
    AvoidPattern(HomopolymerPattern("6xG")),
    AvoidPattern(HomopolymerPattern("6xC")),
    EnforceGCContent(0.4, 0.65),
    EnforceGCContent(0.25, 0.80, window=50),
]

CDS_constraints = []
for (start, end, strand) in CDS_list:
    if strand == 1:
        promoter_region = (start - 30, start - 1)
    else:
        promoter_region = (end + 1, end + 30)
    CDS_constraints += [
        AvoidChanges(promoter_region),
        EnforceTranslation((start, end, strand)),
    ]

# DEFINE OBJECTIVES