def select_creatures(self): if self.gene_pool.__len__() is not 0: self.gene_pool.sort(key=operator.attrgetter('fitness'), reverse=True) self.best_from_last_gen = copy.deepcopy(self.gene_pool[0]) total_fitness = 0 for gene in self.gene_pool: total_fitness += gene.fitness new_pool = [] new_pool.append(self.gene_pool[0]) new_pool.append(self.gene_pool[1]) new_pool.append(self.gene_pool[2]) for i in range(94): flag = True r = random.randint(0, int(total_fitness)) k = 0 tsf = 0 while flag: if tsf + self.gene_pool[k].fitness >= r: flag = False new_pool.append(self.gene_pool[k]) else: tsf += self.gene_pool[k].fitness k += 1 new_pool.append(genome.Genome(None)) new_pool.append(genome.Genome(None)) new_pool.append(genome.Genome(None)) self.gene_pool = new_pool else: for i in range(100): self.gene_pool.append(None)
def blast_csv2fasta(genome_sequence,blast_csv): my_genome = genome.Genome(genome_sequence) my_genome.read_blast_csv(blast_csv) outfasta = [] for match in my_genome.annotations.match: outfasta.append(my_genome.annotations.match[match].get_fasta()) print '\n'.join(outfasta)
def exonerate2fasta(genome_sequence,exonerate_file): my_genome = genome.Genome(genome_sequence) my_genome.read_exonerate(exonerate_file) outfasta = [] for match in my_genome.annotations.match: outfasta.append(my_genome.annotations.match[match].get_fasta()) print '\n'.join(outfasta)
def gff2fasta(genome_sequence,gff,from_exons = "False",seq_type = "nucleotide", longest = "False", genomic = "False"): my_genome = genome.Genome(genome_sequence) if from_exons == "True": my_genome.read_gff(gff, features_to_ignore = "CDS", features_to_replace = [('exon','CDS')]) else: my_genome.read_gff(gff) print my_genome.annotations.get_fasta('gene',seq_type = seq_type, longest=eval(longest), genomic = eval(genomic))
def __init__(self, pos, genes, lock, copy=False): self.lock = lock self.genes = genome.Genome(genes, copy) self.size = self.genes.body_nodes.get(0).size self.health = 2 * self.size self.energy = 250000 self.food_collected = 0 self.dmg = False self.vel = vec(0, 0) num_flag = 1 for node in self.genes.body_nodes.values(): if node.part_type is "flagella": num_flag += 1 self.acceleration = vec( 0, -0.2 * (1 + 0.05 * num_flag) + ((self.size - 15) / 100)) self.position = vec(pos[0], pos[1]) self.angle_speed = 0 self.angle = self.set_angle() self.acceleration.rotate_ip(self.angle) self.img = pygame.image.load(os.path.join( "assets", "gradient.png")).convert_alpha() self.img = pygame.transform.scale( self.img, (self.size * 2 + 1, self.size * 2 + 1)) self.home = False self.parts = [] self.animated_parts = [] self.dmg_parts = [] self.setup_parts() self.brain = brain.Brain(self)
def test_genome_expansion_and_mutation(): an_instance = genome.Genome() an_instance.expand(n=10) # expansion test a_sequence = copy_genome(an_instance.sequence_A) an_instance.mutate(4) # mutation test set_A = set(an_instance.sequence_A) set_B = set(a_sequence) assert len(set_A.intersection(set_B)) < len(a_sequence) # overlap
def test_mutate_substitutions(): an_instance = genome.Genome() an_instance.expand(n=10) a_sequence = copy_genome(an_instance.sequence_A) mutate.mutate(an_instance, {'singles': 50, 'expansions': 0, 'deletions': 0}) set_A = set(an_instance.sequence_A) set_B = set(a_sequence) assert len(set_A.intersection(set_B)) < len(a_sequence) # overlap
def __get_population(self): ''' 初始化种群,种群大小为pop_size ''' self.pop = [] for i in range(self.pop_size): self.pop.append(genome.Genome(self.layers)) pass
def __init__(self, reference_fasta, person, output, header=_anon_header): """Given a reference, a person, and a file-like object for output. Ideally the reference would be encoded in the FASTA. """ self._ref_genome = genome.Genome(reference_fasta) self._output = output print(header + person, file=self._output) self._person = person
def test_mutate_expansions(): an_instance = genome.Genome() an_instance.expand(n=10) a_sequence = copy_genome(an_instance.sequence_A) mutate.mutate(an_instance, { 'singles': 0.0, 'expansions': 50.0, 'deletions': 0.0} ) assert len(an_instance.sequence_A) > len(a_sequence)
def __init__(self, mother, father, location, energy, species): self.data = [] self.age = 1 self.geno = (genome.Genome(mother,father,1)).get_geno() self.x = location[0] self.y = location[1] self.loc = [self.x,self.y] self.energy = energy self.energy_cost = int(.05 * energy) self.speed = self.getStats(1,self.energy) self.species = species
def __init__(self, mutation_rate=1): """ the building method of the generation :param mutation_rate: a number between 0 and 100 which will represent the mutation rate parentage :type mutation_rate: int """ super(Generation, self).__init__() self.population = [] self.mating_pool = [] self.generation = 0 self.fitness_mean = -1 self.mutation_rate = mutation_rate self.best_genome = genome.Genome([])
def initialize(self, n_in, n_out, pop_size=100, folder=None, delta_t=1., c1=1., c2=1., c3=0.5, desired_species=1, min_species=1, p_weight_mut=0.4, p_weight_random=0.02, weight_mut_sigma=0.3, node_mut_rate=0.05, edge_mut_rate=0.05, p_child_clone=0.02, p_mutate=0.8, p_inter_species=0.02, weight_amplitude=1.): self.generation = 1 self.n_in = n_in self.n_out = n_out prototype = genome_mod.Genome(self.n_in, self.n_out) self.population = population.Population( [deepcopy(prototype) for _ in range(pop_size)], delta_t=delta_t, c1=c1, c2=c2, c3=c3, desired_species=desired_species, min_species=min_species, p_weight_mut=p_weight_mut, p_weight_random=p_weight_random, weight_mut_sigma=weight_mut_sigma, node_mut_rate=node_mut_rate, edge_mut_rate=edge_mut_rate, p_child_clone=p_child_clone, p_mutate=p_mutate, p_inter_species=p_inter_species, weight_amplitude=weight_amplitude) self.population = self.population.generate_offspring() #self.population = population.Population([genome_mod.Genome(self.n_in, self.n_out) for _ in range(pop_size)]) if folder is not None: if not os.path.exists(folder): os.makedirs(folder) else: folder = "" self.folder = folder + "/" self.best_genome = None
def main(): """Set up the world and run it.""" metadesc = OrderedDict([ ("colors", "0.3 0.8 0.8 0.3 0.8 0.8 0.8 0.3 0.8 0.8 0.3 0.8"), ("fuzziness", "1.0"), ("const_bounds", "-5.0 5.0"), ("fun_gen_depth", "3"), ("incr_range", "5.0"), ("mult_range", "2.0") ]) muterates = OrderedDict([("mute", "0.1"), ("genome", "0.2"), ("gene_action", "0.5"), ("struct_mod", "0.5"), ("leaf_type", "0.3"), ("genome_rel", "2 1 1 1 2"), ("const_rel", "1 1 1 1"), ("leaf_rel", "1 1 4 4"), ("enum_rel", "1 1 1"), ("struct_rel", "1 1 1")]) meta1 = " ".join(metadesc.values()) + " " + " ".join(muterates.values()) metadesc["colors"] = "1.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0" meta2 = " ".join(metadesc.values()) + " " + " ".join(muterates.values()) gen = " 12 + 1 $10 | 4 * = 0 % $10 23 * 100 $1 | " \ " 5 * 100 $2 | " \ " 4 * 90 $4 | 3 * 90 $3 | 1 * 100 $5 | " \ " 3 * * 80 $1 $0 | 4 * * 80 $1 - 1 $0 | " \ " 1 20 " # Increment state per step, random turn # Suck up stuff if it's present underneath bot # Turn towards food # If bumped, turn away from obstacle # Baseline instinct to move forward #gen = " | 1 1 | 3 * 2 $0 | 4 * 3 $1 | 5 * 4 $2 | 12 + 1 $10 | 5 * = 0 % $10 7 * 100 $1" print("Generating Goombas") gen2 = [genome.Genome(*genome.cross_genome_sequences((meta2, gen), (meta1, gen))) \ for _ in range(30)] for gen in gen2: gen.mutate() gen2 = [g.sequences() for g in gen2] print("Building World") #wrld = world.World.random_goombas(40, 40, 10, meta1, [3, 10]) wrld = world.World((100, 100), gen2, meta1, [3, 10], 1000) print("Constructing geometry (can take a bit because I'm a retard)") #canv = display.get_canvas(wrld) #canv.title = "Genetic Roombas!" #canv.show() #app.run() while True: wrld.step()
def exclude_from_fasta(fasta, exclude_list, just_firstword = "False"): """excludes specific fasta entries from fasta file. "exclude_list" can be either comma seperated names or name of file with names on each line""" my_fasta = genome.Genome(fasta) try: exlist = open(exclude_list).read().replace('\r','').split('\n') except: exlist = exclude_list.split(',') for seqid in my_fasta.genome_sequence: if just_firstword == "True": seqid_fixed = seqid.split()[0] else: seqid_fixed = seqid if not seqid_fixed in exlist: print '>' + seqid + '\n' + my_fasta.genome_sequence[seqid]
def __init__(self, x, y, gene=0): self.gene = genome.Genome(gene) # self.gene.mutate() self.x_pos = x self.y_pos = y self.rad = 10 self.aggro = self.gene.dna['aggro'] self.speed = 2 self.c_img = pygame.transform.scale(self.img1, (self.rad * 2, self.rad * 2)) self.ac_img = pygame.transform.scale(self.img2, (self.rad * 2, self.rad * 2)) self.closest_food = 0 self.wander_dir = random.randrange(4) self.wander_count = 1000 self.energy = 100 self.vision = 100
def __init__(self, x, y, color, gc, stat): global bot_last_number bot_last_number += 1 global bots_counter bots_counter += 1 super().__init__(x, y, color) self.stat = stat self.genome = gm.Genome(gc) self.timer = 160 self.id = bot_last_number self.all_consumed_protein = 0 # протеин, потребленный за всю жизнь self.protein_plant = 0 self.protein_predator = 0 self.protein_mushroom = 0 self.moves = 0 # склько перемещений сделал бот self.children = 0 # сколько раз бот делился self.death_cycle = None
def create_child(self, genome1, genome2): if genome1.score > genome2.score: fitter, unfitter = genome1, genome2 else: fitter, unfitter = genome2, genome1 child = [] fit_index = unfit_index = 0 while fit_index < len(fitter.connections): gene_fit = fitter.connections[fit_index] gene_unfit = unfitter.connections[unfit_index] if gene_fit.innov == gene_unfit.innov: if not gene_fit.enabled or not gene_unfit.enabled: child.append(gene_fit) else: try: p_fit = fitter.score / (fitter.score + unfitter.score) except ZeroDivisionError: p_fit = 0.5 prob = random.random() if prob < p_fit: child.append(gene_fit) else: child.append(gene_unfit) #child.append(random.choice([gene_fit, gene_unfit])) fit_index += 1 if unfit_index + 1 < len(unfitter.connections): unfit_index += 1 elif gene_fit.innov < gene_unfit.innov: child.append(gene_fit) fit_index += 1 else: if unfit_index + 1 < len(unfitter.connections): unfit_index += 1 else: child.append(gene_fit) fit_index += 1 #print("Child list:") #for gene in child: # print("In:", gene.node_in, " Out:", gene.node_out) #print("Done") child_genome = genome_mod.Genome(self.n_in, self.n_out, child) return child_genome
def get_CDS_peptides(genome_sequence,gff,output_location,gene_name_filters = [], gene_length_filter = None, names_from = "CDS"): my_genome = genome.Genome(genome_sequence) my_genome.read_gff3(gff) out = open(output_location,'w') for gene in my_genome.annotations.gene: gene_obj = my_genome.annotations.gene[gene] keepgene = True for name_filter in gene_name_filters: if name_filter in gene_obj.ID: keepgene = False if gene_length_filter != None: seqlen = len(gene_obj.get_fasta().split('\n')[1]) if seqlen < int(gene_length_filter): keepgene = False if keepgene: for transcript in gene_obj.child_list: CDSdict = {} transcript_obj = my_genome.annotations.transcript[transcript] for CDS in transcript_obj.child_list: CDS_obj = my_genome.annotations.CDS[CDS] CDSdict[CDS_obj.coords] = (CDS_obj.ID,CDS_obj.get_seq().get_orfs(longest = True)) CDSlist = list(CDSdict) CDSlist.sort() if transcript_obj.strand == "-": CDSlist.reverse() counter = 1 for CDS in CDSlist: if names_from == 'CDS': pep_name = CDSdict[CDS][0] elif names_from == 'transcript': pep_name = transcript_obj.ID + '-CDS' + str(counter) counter = counter + 1 elif names_from == 'gene': pep_name = gene_obj.ID + '-CDS' + str(counter) counter = counter + 1 else: print "invalid option for 'names_from' argument" break out.write('>' + pep_name + '\n' + CDSdict[CDS][1] + '\n')
def test_build_nn(): SENSORS = [[0], [1], [0, 0]] ACTIONS = [[0, 1], [1, 0], [1, 1]] random.seed(2) nn = neuralnetwork.Neuralnetwork() gen = genome.Genome() gen.make_clean_rules() build_nn(nn, gen, verbose=False) assert (nn.neurons == [[0], [1], [0, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1], [0]]) assert (numpy.array_equal( nn.links, numpy.array([[0., 1., 0., 1., 0., 1., 0., 0., 0.], [0., 0., 0., 1., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 1., 1., 1.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [1., 1., 0., 1., 0., 1., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0., 0., 0.], [1., 1., 0., 1., 1., 1., 0., 0., 0.], [1., 1., 0., 1., 1., 1., 1., 0., 0.], [1., 1., 0., 1., 1., 1., 1., 1., 0.]])))
def dna2orfs(fasta_location,output_file,from_atg = False,longest = False): """takes a dna sequence in fasta format and returns ORFs found therein""" dna = genome.Genome(fasta_location) out = open(output_file, 'w') if longest: orf_list = [] for seq in dna.genome_sequence: if longest: candidate_list = [] longest_orf_len = 0 for frame in [0,1,2]: for strand in ['-','+']: translated_seq_list = dna.genome_sequence[seq].translate(frame=frame,strand=strand).split('*') if strand == '+': length_list = [frame] #populates with lengths of orfs so that I can trace back orf positions else: length_list = [len(dna.genome_sequence[seq]) - frame] for orf in translated_seq_list: orf_start = sum(length_list) length_list.append((1 + len(orf)) * 3) if from_atg: try: output_orf = 'M' + ''.join(orf.split('M')[1:]) except IndexError: continue else: output_orf = orf if longest: if len(output_orf) > longest_orf_len: candidate_list.append('>'+seq+'_longestORF\n'+output_orf+'\n') longest_orf_len = len(output_orf) else: out.write('>'+seq+'-pos:'+str(orf_start)+'\n'+output_orf+'\n') if longest: out.write(candidate_list[-1]) out.close()
def __init__(self, x, y): self.gene = genome.Genome() self.x_pos = x self.y_pos = y self.rad = 10 self.health = 30 + 30 * ((20 * self.gene.dna[0]) / 100) self.damage = 10 + 10 * ((20 * self.gene.dna[1]) / 100) self.speed = 0.5 + 0.5 * ((20 * self.gene.dna[2]) / 100) self.vision = 80 + 80 * ((20 * self.gene.dna[3]) / 100) self.aggro = 20 + 20 * ((20 * self.gene.dna[4]) / 100) self.satiation = 0 self.last_starve = 0 self.hunger = 8000 - 1000 * self.speed - 6 * self.vision self.c_img = pygame.transform.scale( self.img1, (self.rad*2, self.rad*2)) self.mc_img = pygame.transform.scale( self.img2, (self.rad*2, self.rad*2)) self.ac_img = pygame.transform.scale( self.img3, (self.rad*2, self.rad*2)) self.attack_ticks = 0 self.last_attack = 0 self.closest_food = 0 self.wander_dir = random.randrange(4) self.wander_count = 1000
import genome # get genome sequences g1 = genome.Genome('MN908947_China_01_05_2020.txt') g2 = genome.Genome('MT483564_California_11_10_2020.txt') print('China 01 05 2020 C frequency: ', g1.get_c_frequency()) print('China 01 05 2020 G frequency: ', g1.get_g_frequency()) print('California 11 10 2020 C frequency: ', g2.get_c_frequency()) print('California 11 05 2020 G frequency: ', g2.get_g_frequency())
filename=self.folder + "best_gen{}".format(self.generation)) #logging.info("Generation {}: Best score {}".format(self.generation, self.best_genome.score)) def print_gen_info(self): logger.info("Generation {}: {} species".format( self.generation, len(self.population.all_species))) logger.info(" Best score: {}".format(self.get_best().score)) def species_sizes(self): return self.population.species_sizes() if __name__ == '__main__': logging.basicConfig(stream=sys.stderr, level=logging.INFO) genotype = genome_mod.Genome(3, 2) genotype2 = genome_mod.Genome(3, 2, genotype.connections) genotype.add_random_node() genotype2.add_random_node() for i in range(2): #genotype.add_random_node() genotype2.add_random_node() #genotype2.random_connection() #print_edges(genotype2) #draw_genome_net(genotype2, show_innov=True, show_disabled=True, filename="genome2_run%i"%i) # genome_mod.draw_genome_net(genotype, show_weights=True, show_disabled=True, show_innov=False, filename="genome1") # genome_mod.draw_genome_net(genotype2,show_weights=True, show_disabled=True, show_innov=False, filename="genome2") #funktion = genotype2.build_phenotype() #print(funktion([1,1,1]))
def prep4apollo(genome_sequence, suppress_fasta = "False", output_directory = 'apollo_gffs', exon_fasta = None, full_length_seqs = None, exon_blast_csv = None, exonerate_output = None, starjuncs = None, other_gff = None, other_gff_format = 'gff3', blast_evalue = '0.01', exonerate_percent = '50',output_empty_scaffolds = "False", exonerate_intron_steps = "2000,5000,200000", mapping_threads = "1"): """takes evidence inputs and returns gff files to open in apollo""" subprocess.call("mkdir -p " + output_directory, shell = True) subprocess.call("mkdir -p " + output_directory + "/temp", shell = True) mapping_cmds = [] blast_run = False exonerate_run = False suppress_fasta = eval(suppress_fasta) output_empty_scaffolds = eval(output_empty_scaffolds) if exon_fasta != None: subprocess.call(config.makeblastdb + ' -in ' + genome_sequence + ' -out ' + output_directory + '/temp/tempdb -dbtype nucl', shell = True) mapping_cmds.append(config.tblastn + ' -query ' + exon_fasta + ' -db ' + output_directory + '/temp/tempdb -evalue ' + blast_evalue + " -out " + output_directory + "/exon_tblastn.csv -outfmt 10") blast_run = True if full_length_seqs != None: exonerate_intron_lengths = exonerate_intron_steps.split(',') for intron_length in exonerate_intron_lengths: mapping_cmds.append(config.exonerate + ' --model protein2genome --percent ' + exonerate_percent + ' --maxintron ' + intron_length + ' ' + full_length_seqs + ' ' + genome_sequence + ' > ' + output_directory + '/exonerate_output_' + intron_length + 'bp_introns.txt') exonerate_run = True running_cmds = [] if mapping_cmds != []: if blast_run: print "mapping exons with tblastn" if exonerate_run and blast_run: print " and" if exonerate_run: print "mapping full length sequences with exonerate" for cmd_index in range(len(mapping_cmds)): running_cmds.append(subprocess.Popen(mapping_cmds[cmd_index],shell = True)) if (cmd_index + 1) % int(mapping_threads) == 0 or cmd_index == (len(mapping_cmds) - 1): for cmd in running_cmds: cmd.wait() running_cmds = [] if blast_run: if exon_blast_csv != None: subprocess.call('cat ' + exon_blast_csv + ' ' + output_directory + '/exon_tblastn.csv > ' + output_directory + '/cat_exon_tblastn.csv', shell = True) exon_blast_csv = output_directory + '/cat_exon_tblastn.csv' else: exon_blast_csv = output_directory + '/exon_tblastn.csv' if exonerate_run: if exonerate_output != None: subprocess.call('cat ' + exonerate_output + ' ' + output_directory + '/exonerate_output* > ' + output_directory + '/cat_exonerate_output.txt', shell = True) else: subprocess.call('cat ' + output_directory + '/exonerate_output* > ' + output_directory + '/cat_exonerate_output.txt', shell = True) exonerate_output = output_directory + '/cat_exonerate_output.txt' print "building apollo gffs" my_genome = genome.Genome(genome_sequence,other_gff,annotation_format = other_gff_format) if exon_blast_csv != None: my_genome.read_blast_csv(exon_blast_csv, find_truncated_locname = True) if exonerate_output != None: my_genome.read_exonerate(exonerate_output) if output_empty_scaffolds: seqids = my_genome.get_seqids() else: seqids = my_genome.annotations.get_all_seqids() if starjuncs != None: starjunc_dic = {} starjunc_list = genome.starjunc2gff(starjuncs,output = "list") for junc in starjunc_list: seqid = junc.split('\t')[0] if seqid in starjunc_dic: starjunc_dic[seqid].append(junc) else: starjunc_dic[seqid] = [junc] for seqid in seqids: out = open(output_directory + '/' + sanitize_pathname(seqid) + '.gff','w') if starjuncs != None: if seqid in starjunc_dic: out.write('\n'.join(starjunc_dic[seqid]) + '\n') out.write(my_genome.write_apollo_gff(seqid, suppress_fasta = suppress_fasta)) out.close() subprocess.call('rm -rf ' + output_directory + '/temp', shell = True)
def get_seq_from_fasta(genome_sequence, seq_name, truncate_names = "False"): my_genome = genome.Genome(genome_sequence, truncate_names = eval(truncate_names)) print my_genome.get_scaffold_fasta(seq_name)
def test_genome_haploidy(): an_instance = genome.Genome(seqA=[1, 1, 1], seqB=[0, 0, 0]) a_haploid = an_instance.haploid() assert a_haploid[0].val == 1 or a_haploid[0].val == 0
qtnfile = ddir + '/potato.qtn.pos' # gene dropping pedigree pedfile = ddir + '/potato.ped' # snp file in gen format genfile = ddir + '/potato.gen.gz' # goto working directory os.chdir(wdir) # STEP 1: # uploads genotypes and generates snp positions (snpFile) # NOTE that ploidy level must be specified if gen format gbase = gg.GFounder(vcfFile=genfile, snpFile=seqfile, ploidy=4) # STEP 2: generates Genome object with chr names, recombination map, etc gfeatures = gg.Genome(snpFile=seqfile, ploidy=gbase.ploidy) # prints some basic info gfeatures.print() # STEP 3: read QTN file qtn = gg.QTNs(h2=[0.5, 0.7], genome=gfeatures, qtnFile=qtnfile) qtn.get_var(gfeatures, gbase) # STEP 4: generate base population pop = gg.Population(gfeatures, pedFile=None, generation=None, qtns=qtn, gfounders=gbase) qtn.print(gfeatures)
import genome gen = genome.Genome(400) print(gen.interpret())
def coords2fasta(fasta_file,seqid,start,stop,truncate_names = "False"): """prints fasta-format sequence between coordinates (1-based, as in gff-format) within a specific entry in a fasta file. truncate_names="True" can be used if you only want to provide the first word after the ">" as the seqid (assuming it's unique of course)""" print ">" + seqid + ":" + start + "-" + stop print genome.Genome(fasta_file, truncate_names=eval(truncate_names)).genome_sequence[seqid][int(start) - 1:int(stop)]