Exemplo n.º 1
0
    def select_creatures(self):
        if self.gene_pool.__len__() is not 0:
            self.gene_pool.sort(key=operator.attrgetter('fitness'),
                                reverse=True)
            self.best_from_last_gen = copy.deepcopy(self.gene_pool[0])
            total_fitness = 0
            for gene in self.gene_pool:
                total_fitness += gene.fitness
            new_pool = []
            new_pool.append(self.gene_pool[0])
            new_pool.append(self.gene_pool[1])
            new_pool.append(self.gene_pool[2])
            for i in range(94):
                flag = True
                r = random.randint(0, int(total_fitness))
                k = 0
                tsf = 0

                while flag:
                    if tsf + self.gene_pool[k].fitness >= r:
                        flag = False
                        new_pool.append(self.gene_pool[k])
                    else:
                        tsf += self.gene_pool[k].fitness
                        k += 1

            new_pool.append(genome.Genome(None))
            new_pool.append(genome.Genome(None))
            new_pool.append(genome.Genome(None))

            self.gene_pool = new_pool
        else:
            for i in range(100):
                self.gene_pool.append(None)
Exemplo n.º 2
0
def blast_csv2fasta(genome_sequence,blast_csv):
    my_genome = genome.Genome(genome_sequence)
    my_genome.read_blast_csv(blast_csv)
    outfasta = []
    for match in my_genome.annotations.match:
        outfasta.append(my_genome.annotations.match[match].get_fasta())
    print '\n'.join(outfasta)
Exemplo n.º 3
0
def exonerate2fasta(genome_sequence,exonerate_file):
    my_genome = genome.Genome(genome_sequence)
    my_genome.read_exonerate(exonerate_file)
    outfasta = []
    for match in my_genome.annotations.match:
        outfasta.append(my_genome.annotations.match[match].get_fasta())
    print '\n'.join(outfasta)
Exemplo n.º 4
0
def gff2fasta(genome_sequence,gff,from_exons = "False",seq_type = "nucleotide", longest = "False", genomic = "False"):
    my_genome = genome.Genome(genome_sequence)
    if from_exons == "True":
        my_genome.read_gff(gff, features_to_ignore = "CDS", features_to_replace = [('exon','CDS')])
    else:
        my_genome.read_gff(gff)
    print my_genome.annotations.get_fasta('gene',seq_type = seq_type, longest=eval(longest), genomic = eval(genomic))
Exemplo n.º 5
0
    def __init__(self, pos, genes, lock, copy=False):
        self.lock = lock

        self.genes = genome.Genome(genes, copy)
        self.size = self.genes.body_nodes.get(0).size
        self.health = 2 * self.size
        self.energy = 250000
        self.food_collected = 0
        self.dmg = False

        self.vel = vec(0, 0)
        num_flag = 1
        for node in self.genes.body_nodes.values():
            if node.part_type is "flagella":
                num_flag += 1
        self.acceleration = vec(
            0, -0.2 * (1 + 0.05 * num_flag) + ((self.size - 15) / 100))
        self.position = vec(pos[0], pos[1])
        self.angle_speed = 0
        self.angle = self.set_angle()
        self.acceleration.rotate_ip(self.angle)

        self.img = pygame.image.load(os.path.join(
            "assets", "gradient.png")).convert_alpha()
        self.img = pygame.transform.scale(
            self.img, (self.size * 2 + 1, self.size * 2 + 1))
        self.home = False

        self.parts = []
        self.animated_parts = []
        self.dmg_parts = []
        self.setup_parts()

        self.brain = brain.Brain(self)
Exemplo n.º 6
0
def test_genome_expansion_and_mutation():
    an_instance = genome.Genome()
    an_instance.expand(n=10)  # expansion test
    a_sequence = copy_genome(an_instance.sequence_A)
    an_instance.mutate(4)  # mutation test
    set_A = set(an_instance.sequence_A)
    set_B = set(a_sequence)
    assert len(set_A.intersection(set_B)) < len(a_sequence)  # overlap
Exemplo n.º 7
0
def test_mutate_substitutions():
    an_instance = genome.Genome()
    an_instance.expand(n=10)
    a_sequence = copy_genome(an_instance.sequence_A)
    mutate.mutate(an_instance, {'singles': 50, 'expansions': 0, 'deletions': 0})
    set_A = set(an_instance.sequence_A)
    set_B = set(a_sequence)
    assert len(set_A.intersection(set_B)) < len(a_sequence)  # overlap
Exemplo n.º 8
0
 def __get_population(self):
     '''
     初始化种群,种群大小为pop_size
     '''
     self.pop = []
     for i in range(self.pop_size):
         self.pop.append(genome.Genome(self.layers))
     pass
Exemplo n.º 9
0
    def __init__(self, reference_fasta, person, output, header=_anon_header):
        """Given a reference, a person, and a file-like object for output.

        Ideally the reference would be encoded in the FASTA.
        """
        self._ref_genome = genome.Genome(reference_fasta)
        self._output = output
        print(header + person, file=self._output)
        self._person = person
Exemplo n.º 10
0
def test_mutate_expansions():
    an_instance = genome.Genome()
    an_instance.expand(n=10)
    a_sequence = copy_genome(an_instance.sequence_A)
    mutate.mutate(an_instance, {
        'singles': 0.0,
        'expansions': 50.0,
        'deletions': 0.0}
    )
    assert len(an_instance.sequence_A) > len(a_sequence)
Exemplo n.º 11
0
 def __init__(self, mother, father, location, energy, species):
     self.data = []
     self.age = 1
     self.geno = (genome.Genome(mother,father,1)).get_geno()
     self.x = location[0]
     self.y = location[1]
     self.loc = [self.x,self.y]
     self.energy = energy
     self.energy_cost = int(.05 * energy)
     self.speed = self.getStats(1,self.energy)
     self.species = species
Exemplo n.º 12
0
 def __init__(self, mutation_rate=1):
     """
     the building method of the generation
     :param mutation_rate: a number between 0 and 100 which will represent
     the mutation rate parentage
     :type mutation_rate: int
     """
     super(Generation, self).__init__()
     self.population = []
     self.mating_pool = []
     self.generation = 0
     self.fitness_mean = -1
     self.mutation_rate = mutation_rate
     self.best_genome = genome.Genome([])
Exemplo n.º 13
0
 def initialize(self,
                n_in,
                n_out,
                pop_size=100,
                folder=None,
                delta_t=1.,
                c1=1.,
                c2=1.,
                c3=0.5,
                desired_species=1,
                min_species=1,
                p_weight_mut=0.4,
                p_weight_random=0.02,
                weight_mut_sigma=0.3,
                node_mut_rate=0.05,
                edge_mut_rate=0.05,
                p_child_clone=0.02,
                p_mutate=0.8,
                p_inter_species=0.02,
                weight_amplitude=1.):
     self.generation = 1
     self.n_in = n_in
     self.n_out = n_out
     prototype = genome_mod.Genome(self.n_in, self.n_out)
     self.population = population.Population(
         [deepcopy(prototype) for _ in range(pop_size)],
         delta_t=delta_t,
         c1=c1,
         c2=c2,
         c3=c3,
         desired_species=desired_species,
         min_species=min_species,
         p_weight_mut=p_weight_mut,
         p_weight_random=p_weight_random,
         weight_mut_sigma=weight_mut_sigma,
         node_mut_rate=node_mut_rate,
         edge_mut_rate=edge_mut_rate,
         p_child_clone=p_child_clone,
         p_mutate=p_mutate,
         p_inter_species=p_inter_species,
         weight_amplitude=weight_amplitude)
     self.population = self.population.generate_offspring()
     #self.population = population.Population([genome_mod.Genome(self.n_in, self.n_out) for _ in range(pop_size)])
     if folder is not None:
         if not os.path.exists(folder):
             os.makedirs(folder)
     else:
         folder = ""
     self.folder = folder + "/"
     self.best_genome = None
Exemplo n.º 14
0
Arquivo: run.py Projeto: zyzek/goombas
def main():
    """Set up the world and run it."""
    metadesc = OrderedDict([
        ("colors", "0.3 0.8 0.8  0.3 0.8 0.8  0.8 0.3 0.8  0.8 0.3 0.8"),
        ("fuzziness", "1.0"), ("const_bounds", "-5.0 5.0"),
        ("fun_gen_depth", "3"), ("incr_range", "5.0"), ("mult_range", "2.0")
    ])
    muterates = OrderedDict([("mute", "0.1"), ("genome", "0.2"),
                             ("gene_action", "0.5"), ("struct_mod", "0.5"),
                             ("leaf_type", "0.3"), ("genome_rel", "2 1 1 1 2"),
                             ("const_rel", "1 1 1 1"), ("leaf_rel", "1 1 4 4"),
                             ("enum_rel", "1 1 1"), ("struct_rel", "1 1 1")])

    meta1 = " ".join(metadesc.values()) + " " + " ".join(muterates.values())
    metadesc["colors"] = "1.0 0.0 0.0  1.0 0.0 0.0  1.0 0.0 0.0  1.0 0.0 0.0"
    meta2 = " ".join(metadesc.values()) + " " + " ".join(muterates.values())
    gen = " 12 + 1 $10 | 4 * = 0 % $10 23 * 100 $1 | " \
             " 5 * 100 $2 | " \
             " 4 * 90 $4 | 3 * 90 $3 | 1 * 100 $5 | " \
             " 3 * * 80 $1 $0 | 4 * * 80 $1 - 1 $0 | " \
             " 1 20 "
    # Increment state per step, random turn
    # Suck up stuff if it's present underneath bot
    # Turn towards food
    # If bumped, turn away from obstacle
    # Baseline instinct to move forward

    #gen = " | 1 1 | 3 * 2 $0 | 4 * 3 $1 | 5 * 4 $2 | 12 + 1 $10 | 5 * = 0 % $10 7 * 100 $1"

    print("Generating Goombas")
    gen2 = [genome.Genome(*genome.cross_genome_sequences((meta2, gen), (meta1, gen))) \
            for _ in range(30)]
    for gen in gen2:
        gen.mutate()
    gen2 = [g.sequences() for g in gen2]

    print("Building World")
    #wrld = world.World.random_goombas(40, 40, 10, meta1, [3, 10])
    wrld = world.World((100, 100), gen2, meta1, [3, 10], 1000)

    print("Constructing geometry (can take a bit because I'm a retard)")
    #canv = display.get_canvas(wrld)
    #canv.title = "Genetic Roombas!"

    #canv.show()
    #app.run()

    while True:
        wrld.step()
Exemplo n.º 15
0
def exclude_from_fasta(fasta, exclude_list, just_firstword = "False"):
    """excludes specific fasta entries from fasta file. "exclude_list" can be either comma
    seperated names or name of file with names on each line"""
    my_fasta = genome.Genome(fasta)
    try:
        exlist = open(exclude_list).read().replace('\r','').split('\n')
    except:
        exlist = exclude_list.split(',')
    for seqid in my_fasta.genome_sequence:
        if just_firstword == "True":
            seqid_fixed = seqid.split()[0]
        else:
            seqid_fixed = seqid
        if not seqid_fixed in exlist:
            print '>' + seqid + '\n' + my_fasta.genome_sequence[seqid]
Exemplo n.º 16
0
 def __init__(self, x, y, gene=0):
     self.gene = genome.Genome(gene)
     # self.gene.mutate()
     self.x_pos = x
     self.y_pos = y
     self.rad = 10
     self.aggro = self.gene.dna['aggro']
     self.speed = 2
     self.c_img = pygame.transform.scale(self.img1,
                                         (self.rad * 2, self.rad * 2))
     self.ac_img = pygame.transform.scale(self.img2,
                                          (self.rad * 2, self.rad * 2))
     self.closest_food = 0
     self.wander_dir = random.randrange(4)
     self.wander_count = 1000
     self.energy = 100
     self.vision = 100
Exemplo n.º 17
0
    def __init__(self, x, y, color, gc, stat):
        global bot_last_number
        bot_last_number += 1
        global bots_counter
        bots_counter += 1
        super().__init__(x, y, color)
        self.stat = stat
        self.genome = gm.Genome(gc)
        self.timer = 160

        self.id = bot_last_number
        self.all_consumed_protein = 0  # протеин, потребленный за всю жизнь
        self.protein_plant = 0
        self.protein_predator = 0
        self.protein_mushroom = 0
        self.moves = 0  # склько перемещений сделал бот
        self.children = 0  # сколько раз бот делился
        self.death_cycle = None
Exemplo n.º 18
0
    def create_child(self, genome1, genome2):
        if genome1.score > genome2.score:
            fitter, unfitter = genome1, genome2
        else:
            fitter, unfitter = genome2, genome1

        child = []
        fit_index = unfit_index = 0
        while fit_index < len(fitter.connections):
            gene_fit = fitter.connections[fit_index]
            gene_unfit = unfitter.connections[unfit_index]

            if gene_fit.innov == gene_unfit.innov:
                if not gene_fit.enabled or not gene_unfit.enabled:
                    child.append(gene_fit)
                else:
                    try:
                        p_fit = fitter.score / (fitter.score + unfitter.score)
                    except ZeroDivisionError:
                        p_fit = 0.5
                    prob = random.random()
                    if prob < p_fit:
                        child.append(gene_fit)
                    else:
                        child.append(gene_unfit)
                    #child.append(random.choice([gene_fit, gene_unfit]))
                fit_index += 1
                if unfit_index + 1 < len(unfitter.connections):
                    unfit_index += 1
            elif gene_fit.innov < gene_unfit.innov:
                child.append(gene_fit)
                fit_index += 1
            else:
                if unfit_index + 1 < len(unfitter.connections):
                    unfit_index += 1
                else:
                    child.append(gene_fit)
                    fit_index += 1
            #print("Child list:")
            #for gene in child:
            #    print("In:", gene.node_in, "  Out:", gene.node_out)
            #print("Done")
        child_genome = genome_mod.Genome(self.n_in, self.n_out, child)
        return child_genome
Exemplo n.º 19
0
def get_CDS_peptides(genome_sequence,gff,output_location,gene_name_filters = [], gene_length_filter = None, names_from = "CDS"):
    my_genome = genome.Genome(genome_sequence)
    my_genome.read_gff3(gff)
    out = open(output_location,'w')
    for gene in my_genome.annotations.gene:
        gene_obj = my_genome.annotations.gene[gene]
        keepgene = True
        for name_filter in gene_name_filters:
            if name_filter in gene_obj.ID:
                keepgene = False
        if gene_length_filter != None:
            seqlen = len(gene_obj.get_fasta().split('\n')[1])
            if seqlen < int(gene_length_filter):
                keepgene = False
        if keepgene:
            for transcript in gene_obj.child_list:
                CDSdict = {}
                transcript_obj = my_genome.annotations.transcript[transcript]
                for CDS in transcript_obj.child_list:
                    CDS_obj = my_genome.annotations.CDS[CDS]
                    CDSdict[CDS_obj.coords] = (CDS_obj.ID,CDS_obj.get_seq().get_orfs(longest = True))
                CDSlist = list(CDSdict)
                CDSlist.sort()
                if transcript_obj.strand == "-":
                    CDSlist.reverse()
                counter = 1
                for CDS in CDSlist:
                    if names_from == 'CDS':
                        pep_name = CDSdict[CDS][0]
                    elif names_from == 'transcript':
                        pep_name = transcript_obj.ID + '-CDS' + str(counter)
                        counter = counter + 1
                    elif names_from == 'gene':
                        pep_name = gene_obj.ID + '-CDS' + str(counter)
                        counter = counter + 1
                    else:
                        print "invalid option for 'names_from' argument"
                        break
                    out.write('>' + pep_name + '\n' + CDSdict[CDS][1] + '\n')
Exemplo n.º 20
0
def test_build_nn():

    SENSORS = [[0], [1], [0, 0]]
    ACTIONS = [[0, 1], [1, 0], [1, 1]]
    random.seed(2)
    nn = neuralnetwork.Neuralnetwork()
    gen = genome.Genome()
    gen.make_clean_rules()
    build_nn(nn, gen, verbose=False)
    assert (nn.neurons == [[0], [1], [0, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1],
                           [1, 1, 1], [1], [0]])
    assert (numpy.array_equal(
        nn.links,
        numpy.array([[0., 1., 0., 1., 0., 1., 0., 0., 0.],
                     [0., 0., 0., 1., 0., 1., 0., 0., 0.],
                     [0., 0., 0., 0., 1., 0., 1., 1., 1.],
                     [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                     [1., 1., 0., 1., 0., 1., 0., 0., 0.],
                     [0., 0., 0., 1., 0., 0., 0., 0., 0.],
                     [1., 1., 0., 1., 1., 1., 0., 0., 0.],
                     [1., 1., 0., 1., 1., 1., 1., 0., 0.],
                     [1., 1., 0., 1., 1., 1., 1., 1., 0.]])))
Exemplo n.º 21
0
def dna2orfs(fasta_location,output_file,from_atg = False,longest = False):
    """takes a dna sequence in fasta format and returns ORFs found therein"""
    dna = genome.Genome(fasta_location)
    out = open(output_file, 'w')
    if longest:
        orf_list = []
    for seq in dna.genome_sequence:
        if longest:
            candidate_list = []
            longest_orf_len = 0
        for frame in [0,1,2]:
            for strand in ['-','+']:
                translated_seq_list = dna.genome_sequence[seq].translate(frame=frame,strand=strand).split('*')
                if strand == '+':
                    length_list = [frame] #populates with lengths of orfs so that I can trace back orf positions
                else:
                    length_list = [len(dna.genome_sequence[seq]) - frame]
                for orf in translated_seq_list:
                    orf_start = sum(length_list)
                    length_list.append((1 + len(orf)) * 3)
                    if from_atg:
                        try:
                            output_orf = 'M' + ''.join(orf.split('M')[1:])
                        except IndexError:
                            continue
                    else:
                        output_orf = orf
                    if longest:
                        if len(output_orf) > longest_orf_len:
                            candidate_list.append('>'+seq+'_longestORF\n'+output_orf+'\n')
                            longest_orf_len = len(output_orf)
                    else:
                        out.write('>'+seq+'-pos:'+str(orf_start)+'\n'+output_orf+'\n')
        if longest:
            out.write(candidate_list[-1])
    out.close()
Exemplo n.º 22
0
 def __init__(self, x, y):
     self.gene = genome.Genome()
     self.x_pos = x
     self.y_pos = y
     self.rad = 10
     self.health = 30 + 30 * ((20 * self.gene.dna[0]) / 100)
     self.damage = 10 + 10 * ((20 * self.gene.dna[1]) / 100)
     self.speed = 0.5 + 0.5 * ((20 * self.gene.dna[2]) / 100)
     self.vision = 80 + 80 * ((20 * self.gene.dna[3]) / 100)
     self.aggro = 20 + 20 * ((20 * self.gene.dna[4]) / 100)
     self.satiation = 0
     self.last_starve = 0
     self.hunger = 8000 - 1000 * self.speed - 6 * self.vision
     self.c_img = pygame.transform.scale(
         self.img1, (self.rad*2, self.rad*2))
     self.mc_img = pygame.transform.scale(
         self.img2, (self.rad*2, self.rad*2))
     self.ac_img = pygame.transform.scale(
         self.img3, (self.rad*2, self.rad*2))
     self.attack_ticks = 0
     self.last_attack = 0
     self.closest_food = 0
     self.wander_dir = random.randrange(4)
     self.wander_count = 1000
Exemplo n.º 23
0
import genome

# get genome sequences
g1 = genome.Genome('MN908947_China_01_05_2020.txt')
g2 = genome.Genome('MT483564_California_11_10_2020.txt')

print('China 01 05 2020 C frequency: ', g1.get_c_frequency())
print('China 01 05 2020 G frequency: ', g1.get_g_frequency())

print('California 11 10 2020 C frequency: ', g2.get_c_frequency())
print('California 11 05 2020 G frequency: ', g2.get_g_frequency())
Exemplo n.º 24
0
                                   filename=self.folder +
                                   "best_gen{}".format(self.generation))
        #logging.info("Generation {}: Best score {}".format(self.generation, self.best_genome.score))

    def print_gen_info(self):
        logger.info("Generation {}: {} species".format(
            self.generation, len(self.population.all_species)))
        logger.info("    Best score: {}".format(self.get_best().score))

    def species_sizes(self):
        return self.population.species_sizes()


if __name__ == '__main__':
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    genotype = genome_mod.Genome(3, 2)
    genotype2 = genome_mod.Genome(3, 2, genotype.connections)
    genotype.add_random_node()
    genotype2.add_random_node()
    for i in range(2):
        #genotype.add_random_node()
        genotype2.add_random_node()
        #genotype2.random_connection()
        #print_edges(genotype2)
        #draw_genome_net(genotype2, show_innov=True, show_disabled=True, filename="genome2_run%i"%i)

    # genome_mod.draw_genome_net(genotype, show_weights=True, show_disabled=True, show_innov=False, filename="genome1")
    # genome_mod.draw_genome_net(genotype2,show_weights=True, show_disabled=True, show_innov=False, filename="genome2")

    #funktion = genotype2.build_phenotype()
    #print(funktion([1,1,1]))
Exemplo n.º 25
0
def prep4apollo(genome_sequence, suppress_fasta = "False", output_directory = 'apollo_gffs', exon_fasta = None, full_length_seqs = None,
                               exon_blast_csv = None, exonerate_output = None, starjuncs = None, other_gff = None, other_gff_format = 'gff3',
                               blast_evalue = '0.01', exonerate_percent = '50',output_empty_scaffolds = "False",
                               exonerate_intron_steps = "2000,5000,200000", mapping_threads = "1"):
    """takes evidence inputs and returns gff files to open in apollo"""
    subprocess.call("mkdir -p " + output_directory, shell = True)
    subprocess.call("mkdir -p " + output_directory + "/temp", shell = True)
    mapping_cmds = []
    blast_run = False
    exonerate_run = False
    suppress_fasta = eval(suppress_fasta)
    output_empty_scaffolds = eval(output_empty_scaffolds)
    if exon_fasta != None:
        subprocess.call(config.makeblastdb + ' -in ' + genome_sequence + ' -out ' + output_directory
                        + '/temp/tempdb -dbtype nucl', shell = True)
        mapping_cmds.append(config.tblastn + ' -query ' + exon_fasta + ' -db ' + output_directory + '/temp/tempdb -evalue '
                        + blast_evalue + " -out " + output_directory + "/exon_tblastn.csv -outfmt 10")
        blast_run = True
    if full_length_seqs != None:
        exonerate_intron_lengths = exonerate_intron_steps.split(',')
        for intron_length in exonerate_intron_lengths:        
            mapping_cmds.append(config.exonerate + ' --model protein2genome --percent ' + exonerate_percent + ' --maxintron '
                            + intron_length + ' ' + full_length_seqs + ' ' + genome_sequence + ' > ' + output_directory
                            + '/exonerate_output_' + intron_length + 'bp_introns.txt')
        exonerate_run = True
    running_cmds = []
    if mapping_cmds != []:
        if blast_run:
            print "mapping exons with tblastn"
        if exonerate_run and blast_run:
            print "       and"
        if exonerate_run:
            print "mapping full length sequences with exonerate"
    for cmd_index in range(len(mapping_cmds)):
        running_cmds.append(subprocess.Popen(mapping_cmds[cmd_index],shell = True))
        if (cmd_index + 1) % int(mapping_threads) == 0 or cmd_index == (len(mapping_cmds) - 1):
            for cmd in running_cmds:
                cmd.wait()
            running_cmds = []
    if blast_run:
        if exon_blast_csv != None:
            subprocess.call('cat ' + exon_blast_csv + ' ' + output_directory + '/exon_tblastn.csv > ' + output_directory
                            + '/cat_exon_tblastn.csv', shell = True)
            exon_blast_csv = output_directory + '/cat_exon_tblastn.csv'
        else:
           exon_blast_csv = output_directory + '/exon_tblastn.csv'
    if exonerate_run:
        if exonerate_output != None:
            subprocess.call('cat ' + exonerate_output + ' ' + output_directory + '/exonerate_output* > ' + output_directory
                            + '/cat_exonerate_output.txt', shell = True)
        else:
            subprocess.call('cat ' + output_directory + '/exonerate_output* > ' + output_directory + '/cat_exonerate_output.txt', shell = True)
        exonerate_output = output_directory + '/cat_exonerate_output.txt'  
    print "building apollo gffs"
    my_genome = genome.Genome(genome_sequence,other_gff,annotation_format = other_gff_format)
    if exon_blast_csv != None:
        my_genome.read_blast_csv(exon_blast_csv, find_truncated_locname = True)
    if exonerate_output != None:
        my_genome.read_exonerate(exonerate_output)
    if output_empty_scaffolds:
        seqids = my_genome.get_seqids()
    else:
        seqids = my_genome.annotations.get_all_seqids()
    if starjuncs != None:
        starjunc_dic = {}
        starjunc_list = genome.starjunc2gff(starjuncs,output = "list")
        for junc in starjunc_list:
            seqid = junc.split('\t')[0]
            if seqid in starjunc_dic:
                starjunc_dic[seqid].append(junc)
            else:
                starjunc_dic[seqid] = [junc]
    for seqid in seqids:
        out = open(output_directory + '/' + sanitize_pathname(seqid) + '.gff','w')
        if starjuncs != None:
            if seqid in starjunc_dic:
                out.write('\n'.join(starjunc_dic[seqid]) + '\n')
        out.write(my_genome.write_apollo_gff(seqid, suppress_fasta = suppress_fasta))
        out.close()
    subprocess.call('rm -rf ' + output_directory + '/temp', shell = True)
Exemplo n.º 26
0
def get_seq_from_fasta(genome_sequence, seq_name, truncate_names = "False"):
    my_genome = genome.Genome(genome_sequence, truncate_names = eval(truncate_names))
    print my_genome.get_scaffold_fasta(seq_name)
Exemplo n.º 27
0
def test_genome_haploidy():
    an_instance = genome.Genome(seqA=[1, 1, 1], seqB=[0, 0, 0])
    a_haploid = an_instance.haploid()
    assert a_haploid[0].val == 1 or a_haploid[0].val == 0
Exemplo n.º 28
0
qtnfile = ddir + '/potato.qtn.pos'
# gene dropping pedigree
pedfile = ddir + '/potato.ped'
# snp file in gen format
genfile = ddir + '/potato.gen.gz'

# goto working directory
os.chdir(wdir)

# STEP 1:
# uploads genotypes and generates snp positions (snpFile)
# NOTE that ploidy level must be specified if gen format
gbase = gg.GFounder(vcfFile=genfile, snpFile=seqfile, ploidy=4)

# STEP 2: generates Genome object with chr names, recombination map, etc
gfeatures = gg.Genome(snpFile=seqfile, ploidy=gbase.ploidy)

# prints some basic info
gfeatures.print()

# STEP 3: read QTN file
qtn = gg.QTNs(h2=[0.5, 0.7], genome=gfeatures, qtnFile=qtnfile)
qtn.get_var(gfeatures, gbase)

# STEP 4: generate base population
pop = gg.Population(gfeatures,
                    pedFile=None,
                    generation=None,
                    qtns=qtn,
                    gfounders=gbase)
qtn.print(gfeatures)
Exemplo n.º 29
0
import genome


gen = genome.Genome(400)
print(gen.interpret())
Exemplo n.º 30
0
def coords2fasta(fasta_file,seqid,start,stop,truncate_names = "False"):
    """prints fasta-format sequence between coordinates (1-based, as in gff-format) within
    a specific entry in a fasta file. truncate_names="True" can be used if you only want to provide
    the first word after the ">" as the seqid (assuming it's unique of course)"""
    print ">" + seqid + ":" + start + "-" + stop
    print genome.Genome(fasta_file, truncate_names=eval(truncate_names)).genome_sequence[seqid][int(start) - 1:int(stop)]