Exemplo n.º 1
0
def generate_charts(ga: pyeasyga.GeneticAlgorithm):

    all_vals = []
    best_vals = []
    start = time_ns()
    ga.create_first_generation()
    all_vals.append([x.fitness for x in ga.current_generation])
    best_vals.append(ga.current_generation[0].fitness)

    for _ in range(1, ga.generations):

        if ga.current_generation[0].fitness == 0:
            break
        ga.create_next_generation()
        all_vals.append([x.fitness for x in ga.current_generation])
        best_vals.append(ga.current_generation[0].fitness)

    print(f'\nin {(time_ns() - start) / 1000000}ms')
    avg = [np.average(x) for x in all_vals]
    fig, ax = plt.subplots()
    ax.plot(best_vals, label="Max")
    ax.plot(avg, label="Avg")
    plt.xlabel("Generations")
    plt.ylabel("Value")
    legend = ax.legend(loc='lower right')
    plt.show()
Exemplo n.º 2
0
def generate(target_params,
             insert_aa_seq,
             population_size=100,
             mutation_probability=0.3,
             max_gens_since_improvement=50,
             genetic_code=11,
             verbose=False):

    # back translate to an initial seq
    insert = ""
    for aa in insert_aa_seq:
        try:
            insert += Bio.Data.CodonTable.unambiguous_dna_by_id[
                genetic_code].back_table[aa]
        except:
            if aa == "*":
                insert += Bio.Data.CodonTable.unambiguous_dna_by_id[
                    genetic_code].back_table[None]

    # create the genetic algorithm instance
    ga = GeneticAlgorithm(dna_to_vector(insert),
                          crossover_probability=0,
                          maximise_fitness=False,
                          population_size=population_size,
                          mutation_probability=mutation_probability)

    # get the target values of k
    k = list(target_params.keys())

    # generate the target vector from the input dict
    target = np.array([])
    for _k in sorted([x for x in k if x != "codons"]):
        target = np.concatenate((target, [
            x[1] for x in sorted(target_params[_k].items(), key=lambda x: x[0])
        ]))
    if "codons" in k:
        target = np.concatenate((target, [
            x[1] for x in sorted(target_params["codons"].items(),
                                 key=lambda x: x[0])
        ]))

    def vector(seq):
        output = k_mer_frequencies(seq, [x for x in k if x != "codons"],
                                   include_missing=True,
                                   vector=True)
        if "codons" in k:
            output = np.concatenate((output, [
                x[1]
                for x in sorted(codon_frequencies(seq, genetic_code).items(),
                                key=lambda x: x[0])
            ]))
        return output

    def fitness(individual, data):
        individual = vector_to_dna(individual)
        # fitness = np.linalg.norm(target - vector(individual))
        fitness = jensen_shannon_divergence([
            dit.ScalarDistribution(target),
            dit.ScalarDistribution(vector(individual))
        ])
        return fitness

    ga.fitness_function = fitness

    synonymous_codons = _synonymous_codons(genetic_codes[genetic_code])

    def mutate(individual):
        while True:
            # choose a random codon
            codon_idx = np.random.randint(len(individual) / 6) * 6

            # figure out which codon it is
            codon = vector_to_dna(individual[codon_idx:codon_idx + 6])

            # ensure that mutations actually change the sequence
            if len(synonymous_codons[codon]) != 1:
                break

        # choose a new one at random for the AA
        new_codon = dna_to_vector(
            np.random.choice(
                [x for x in synonymous_codons[codon] if x != codon]))

        # replace it in the individual
        individual[codon_idx:codon_idx + 6] = new_codon

        return individual

    ga.mutate_function = mutate

    def create_individual(seed_data):
        individual = vector_to_dna(seed_data)
        new = ""
        for codon in [
                individual[i:i + 3] for i in range(0, len(individual), 3)
        ]:
            if len(synonymous_codons[codon]) == 1:
                new += codon
                continue
            new += np.random.choice(
                [x for x in synonymous_codons[codon] if x != codon])

        return dna_to_vector(new)

    ga.create_individual = create_individual

    # set up for GA run
    ga.create_first_generation()
    gens_since_improvement = 0
    best_indv_fitness = ga.best_individual()[0]
    counter = 1

    # run the GA
    while gens_since_improvement < max_gens_since_improvement:
        ga.create_next_generation()
        if ga.best_individual()[0] < best_indv_fitness:
            best_indv_fitness = ga.best_individual()[0]
            gens_since_improvement = 0
        else:
            gens_since_improvement += 1
        if verbose:
            print(
                "Gen: %s\tSince Improvement: %s/%s\tFitness: %s".expandtabs(15)
                % (counter, gens_since_improvement, max_gens_since_improvement,
                   ga.best_individual()[0]),
                end="\r")
        counter += 1

    if verbose: print()

    best_seq = vector_to_dna(ga.best_individual()[1])
    best_freqs = vector(best_seq)
    return best_seq
Exemplo n.º 3
0
def generate(target_params, insert_aa_seq, population_size=100, mutation_probability=0.3, crossover_probability=0.8, max_gens_since_improvement=50, genetic_code=11, verbose=False):
    '''Generate a sequence matching :math:`k`-mer usage.

	Args:
		target_params (dict): The parameters to optimize towards. Should be of the format {:math:`k_n`: {:math:`k_{n1}`: 0.2, :math:`k_{n2}`: 0.3,...}...}
		insert_aa_seq (str): The amino acid sequence for the optimized sequence.
		population_size (int, optional): The size of the population for the genetic algorithm. Defaults to 100.
		mutation_probability (float, optional): The likelihood of changing each member of each generation. Defaults to 0.3.
		crossover_probability (float, optional): The likelihood of each member of the population undergoing crossover. Defaults to 0.8.
		max_gens_since_improvement (int, optional): The number of generations of no improvement after which to stop optimization. Defaults to 50.
		genetic_code (int, optional): The genetic code to use. Defaults to 11, the standard genetic code.
		verbose (bool, optional): Whether to print the generation number, generations since improvement, and fitness. Defaults to false.

	Returns:
		str: The generated sequence.
	'''
    # back translate to an initial seq
    insert = ""
    for aa in insert_aa_seq:
        try:
            insert += Bio.Data.CodonTable.unambiguous_dna_by_id[genetic_code].back_table[aa]
        except:
            if aa == "*":
                insert += Bio.Data.CodonTable.unambiguous_dna_by_id[genetic_code].back_table[None]

    # create the genetic algorithm instance
    ga = GeneticAlgorithm(dna_to_vector(insert),
                          crossover_probability=crossover_probability,
                          maximise_fitness=False,
                          population_size=population_size,
                          mutation_probability=mutation_probability)

    # get the target values of k
    k = list(target_params.keys())

    # generate the target vector from the input dict
    target = np.array([])
    for _k in sorted([x for x in k if x != "codons"]):
        target = np.concatenate((target, [x[1] for x in sorted(target_params[_k].items(), key=lambda x: x[0])]))
    if "codons" in k:
        target = np.concatenate((target, [x[1] for x in sorted(target_params["codons"].items(), key=lambda x: x[0])]))

    def vector(seq):
        output = k_mer_frequencies(seq, [x for x in k if x != "codons"], include_missing=True, vector=True)
        if "codons" in k:
            output = np.concatenate((output, [x[1] for x in sorted(codon_frequencies(seq, genetic_code).items(), key=lambda x: x[0])]))
        return output

    def fitness(individual, data):
        individual = vector_to_dna(individual)
        # fitness = np.linalg.norm(target - vector(individual))
        fitness = jensen_shannon_divergence([dit.ScalarDistribution(target), dit.ScalarDistribution(vector(individual))])
        return fitness
    ga.fitness_function = fitness

    synonymous_codons = _synonymous_codons(genetic_codes[genetic_code])
    def mutate(individual):
        while True:
            # choose a random codon
            codon_idx = np.random.randint(len(individual) / 6) * 6

            # figure out which codon it is
            codon = vector_to_dna(individual[codon_idx:codon_idx+6])

            # ensure that mutations actually change the sequence
            if len(synonymous_codons[codon]) != 1:
                break

        # choose a new one at random for the AA
        new_codon = dna_to_vector(np.random.choice([x for x in synonymous_codons[codon] if x != codon]))

        # replace it in the individual
        individual[codon_idx:codon_idx+6] = new_codon

        return individual
    ga.mutate_function = mutate

    def crossover(parent_1, parent_2):
        parent_1, parent_2 = list(parent_1), list(parent_2)
        index = random.randrange(1, len(parent_1) / 6) * 6
        child_1 = parent_1[:index] + parent_2[index:]
        child_2 = parent_2[:index] + parent_1[index:]
        return child_1, child_2
    ga.crossover_function = crossover

    def create_individual(seed_data):
        individual = vector_to_dna(seed_data)
        new = ""
        for codon in [individual[i:i+3] for i in range(0, len(individual), 3)]:
            if len(synonymous_codons[codon]) == 1:
                new += codon
                continue
            new += np.random.choice([x for x in synonymous_codons[codon] if x != codon])

        return dna_to_vector(new)
    ga.create_individual = create_individual

    # set up for GA run
    ga.create_first_generation()
    gens_since_improvement = 0
    best_indv_fitness = ga.best_individual()[0]
    counter = 1

    # run the GA
    try:
	    while gens_since_improvement < max_gens_since_improvement:
	        ga.create_next_generation()
	        if ga.best_individual()[0] < best_indv_fitness:
	            best_indv_fitness = ga.best_individual()[0]
	            gens_since_improvement = 0
	        else:
	            gens_since_improvement += 1
	        if verbose:
	            print("Gen: %s\tSince Improvement: %s/%s\tFitness: %s".expandtabs(15) % (counter, gens_since_improvement, max_gens_since_improvement, ga.best_individual()[0]), end="\r")
	        counter += 1
    except KeyboardInterrupt:
        print("\nStopping early...")

    if verbose: print()

    best_seq = vector_to_dna(ga.best_individual()[1])
    best_freqs = vector(best_seq)
    assert Seq(best_seq).translate(genetic_code) == Seq(insert).translate(genetic_code)
    return best_seq