Python Gene示例，gene.Gene Python示例

示例#1

0

显示文件

文件： genome.py 项目： jamesrichter/NEAT_breakout

    def addGene(self, node1, node2):
        """
      Add a randomly weighted gene between two nodes.
      
      Args:
         node1: the first node
         node2: the second node
      """
        # connect the two valid, unconnected nodes
        g = Gene()
        g.source_neuron = node1
        g.target_neuron = node2
        g.weight = random.random() * 4 - 2
        # check to see if the gene already exists in the index
        for index, gene in gene_index.items():
            if gene.source_neuron == node1 and gene.target_neuron == node2:
                # if found, use that same innovation number
                self.genes[index] = g
                return

        # if it's not in the index, add it and record in the index
        if gene_index:
            self.genes[max(gene_index) + 1] = g
            gene_index[max(gene_index) + 1] = g
        else:
            gene_index[0] = g
            self.genes[0] = g

示例#2

0

显示文件

文件： genome.py 项目： jamesrichter/NEAT_breakout

 def mutateAllWeights(self):
     """Find all genes, and alter their weights a little."""
     g = Gene()
     for innov in self.genes:
         g.copy(self.genes[innov])
         g.weight += random.random() * 0.2 - 1
         self.genes[innov] = g

示例#3

0

显示文件

 def init_genes(self, keys):
     for x in range(len(self.maze)):
         for y in range(len(self.maze)):
             if self.maze[x][y] != "_":
                 self.maze[x][y] = Gene(self.maze[x][y], True)
             else:
                 self.maze[x][y] = Gene(keys[randint(0, len(keys) - 1)])

示例#4

0

显示文件

文件： chromosome.py 项目： MayankKharbanda/MSc

 def __init__(self, timetables):
     self.fitness = 1
     self.genes = []
     for timetable in timetables:
         gene = Gene(timetable)
         gene.permutate()
         self.genes.append(gene)

示例#5

0

显示文件

文件： algorithm.py 项目： lukeplaisance/Genetic_Algorithm

 def gen_population(self, size):
     population = []
     for i in range(0, size - 1, 1):
         g = Gene("")
         g.gen_gene(size)
         population.append(g)
     return population

示例#6

0

显示文件

文件： geneSet.py 项目： dyermd/legos

    def addGene(self, geneName, transcript, chromosome, strand, startPosition, endPosition, exonStarts, exonEnds):
        #create the gene object
        gene = Gene(geneName, chromosome, strand, startPosition, endPosition)
        gene.addTranscriptID(transcript)
        gene.addExonsToGene(exonStarts, exonEnds)

        #add the gene to the hash
        self.__genes[geneName] = gene

示例#7

0

显示文件

文件： chromosome.py 项目： samyakahuja/class

    def __init__(self, timetables):
        self.genes = []
        self.fitness = 1

        for tt in timetables:
            temp_gene = Gene(tt)
            temp_gene.perm()
            self.genes.append(temp_gene)

示例#8

0

显示文件

文件： genome.py 项目： jamesrichter/NEAT_breakout

 def mutateOneWeight(self):
     """Find a random gene, and alter its weight a little."""
     if self.genes:
         innov = random.sample(self.genes, 1)[0]
     else:
         return
     v = random.random()
     g = Gene()
     g.copy(self.genes[innov])
     g.weight += v * 0.2 - 0.1
     self.genes[innov] = g

示例#9

0

显示文件

 def mutation(self, mutate_gene: gene.Gene):
     """Insert random small gene into given gene."""
     mutation_size = int(np.random.exponential(self.mutation_scale))
     if mutation_size == 0:
         return
     mutation_gene = gene.Gene(
         np.random.randint(0, 2, (mutation_size, mutation_size)))
     mutatepoint_x = np.random.randint(0, self.genesize_x)
     mutatepoint_y = np.random.randint(0, self.genesize_y)
     mutate_gene.insert_subgene(mutation_gene, mutatepoint_x, mutatepoint_y)
     return

示例#10

0

显示文件

文件： genome.py 项目： LaoKpa/convNEAT

 def init_genome(self):
     return [[
         Node(0, 0, role='input'),
         Node(1, 1, role='flatten'),
         Node(2, 2, role='output')
     ],
             [
                 Gene(3, 0, 1, mutate_to=[[KernelGene, DenseGene],
                                          [1, 0]]).mutate_random(),
                 Gene(4, 1, 2, mutate_to=[[KernelGene, DenseGene],
                                          [0, 1]]).mutate_random()
             ]]

示例#11

0

显示文件

    def generate_genes(self, max_genes, max_conds, seed):
        '''
		Randomly generate a variable size chromosome.
		'''
        random.seed(seed)
        self.default_class = random.randint(0, 9)
        for i in range(random.randint(1, max_genes)):
            gene = Gene()
            gene.generate_conditions(max_conds, random.random())
            if self.one_vs_all != -1:
                gene.hand_class = self.one_vs_all
            self.genes.append(gene)

示例#12

0

显示文件

    def insertion(self, max_genes, max_conds, insertion_rate, seed):
        '''
		Mutation Function
		Randomly inserts genes into the chromosome. 
		'''
        random.seed(seed)
        for i in range(max_genes):
            if random.random() < (insertion_rate):
                gene = Gene()
                gene.generate_conditions(max_conds, random.random())
                if self.one_vs_all != -1:
                    gene.hand_class = self.one_vs_all
                self.genes.append(gene)

示例#13

0

显示文件

文件： chromosome.py 项目： atulshanbhag/EvolveImage

    def load(self, s):
        """ Load the Chromosome from
        save passed as parameter.
        """
        self._size = s["size"]
        self._gene_count = s["gene_count"]
        self._genes = []
        for g in s["genes"]:
            new_gene = Gene(self._size)
            new_gene.load(g)
            self._genes.append(new_gene)

        return s["generation"]

示例#14

0

显示文件

def cross_over(geneOne, geneTwo, pivot):
    g1 = ""
    g2 = ""
    for i in range(0, pivot):
        g1 += str(geneOne.info[i])
        g2 += str(geneTwo.info[i])

    for i in range(pivot, len(geneOne.info)):
        g1 += str(geneTwo.info[i])
        g2 += str(geneOne.info[i])

    g1 = Gene(g1)
    g2 = Gene(g2)
    return [g1,g2]

示例#15

0

显示文件

文件： genome.py 项目： afhuertass/ne-evolution

    def __init__(self, InputUnits=16, OutputUnits=4, graph=None, N_hidden=1):
        self.inovationNumber = 0
        self.calculated_fitness = 0

        self.genome = nx.DiGraph()

        N = 16  # input
        self.inputGenes = []
        self.outputGenes = []
        self.graph = graph

        self.HLayers = N_hidden

        #adding  inputnodes
        for i in range(0, InputUnits):
            gn = Gene(1.0,
                      input_gene=True,
                      response=1.0,
                      graph=self.graph,
                      indexLayer=0)
            self.inputGenes.append(gn)
            self.genome.add_node(gn)

        #adding outputnodes
        for i in range(0, OutputUnits):
            gn = Gene(0.0,
                      output_gene=True,
                      graph=self.graph,
                      indexLayer=N_hidden + 1)
            self.outputGenes.append(gn)
            self.genome.add_node(gn)

        bias = Gene(0.0, bias_gene=True, graph=self.graph, indexLayer=0)
        self.genome.add_node(bias)
        # random conection

        #self.add_conn_rnd()
        #self.add_conn_rnd()

        self.add_connection2()
        self.add_connection2()

        self.add_node_hidden()

        self.specie = ""

        self.champion = False

        self.name = self.new_name()

示例#16

0

显示文件

def elitism(population):
    population.sort(reverse=True)
    tophalf = []
    for _ in range(generationsize / 2):
        tophalf.append(population[_][1])

    newpopulation = []
    for _ in range(generationsize / 4):
        parent1 = random.choice(tophalf)
        parent2 = random.choice(tophalf)
        children = crossover(parent1.chromosone, parent2.chromosone)
        newpopulation.append(Gene(mutation(children[0])))
        newpopulation.append(Gene(mutation(children[1])))

    return tophalf + newpopulation

示例#17

0

显示文件

文件： gene_scored.py 项目： Wan-Yifei/AH_Project

 def __init__(self, sample_ID, sample_ICD, gene_name, Output_geno,
              amplicon_name, Active_score, Drug_action, Low_coverage, Range,
              Gene_KB):
     Gene.__init__(self, sample_ID, sample_ICD, gene_name, Drug_action,
                   amplicon_name, Range)
     if self.ICD_relevant:
         self.genotype = self.get_geno(Output_geno)
         self.phenotype, self.score_allele = self.get_pheno(Active_score)
         self.potential_allele = self.get_pot_allele(Gene_KB)
         self.potential_phenotype = self.get_pot_pheno(Active_score)
     else:
         self.genotype = None
         self.phenotype, self.score_allele = None, None
         self.potential_allele = None
         self.potential_phenotype = True

示例#18

0

显示文件

def before_first_request():
    options = app.config['OPTIONS']

    objects = [
        Gene.fromfile(f) for f in content_file_iterator(options.content)
    ]
    objects = [o for o in objects if o is not None]
    objects = dict([(g.id, g) for g in objects])
    app.config['GENES'] = objects
    print("In __init__ before_first_request, objects={}".format(objects))

    # Configure logging
    if not app.debug:
        from logging.handlers import SMTPHandler, RotatingFileHandler
        from logging import Formatter
        mail_handler = SMTPHandler('127.0.0.1', '*****@*****.**',
                                   app.config['ADMINS'], 'ogrdb failed')
        mail_handler.setLevel(logging.ERROR)
        log_path = app.config['LOG_PATH']
        file_handler = RotatingFileHandler(log_path,
                                           maxBytes=1 << 20,
                                           backupCount=5)
        file_handler.setFormatter(
            Formatter('%(asctime)s %(levelname)s: %(message)s '
                      '[in %(pathname)s:%(lineno)d]'))
        app.logger.setLevel(logging.INFO)
        file_handler.setLevel(logging.INFO)
        app.logger.addHandler(file_handler)
        app.logger.addHandler(mail_handler)

示例#19

0

显示文件

    def makeNewPopulation(self,
                          list_of_genes=None,
                          population_size=None,
                          num_of_chromo=None,
                          length_of_chromo=None):
        if list_of_genes == None:
            for i in range(population_size):
                gene = Gene([], num_of_chromo, length_of_chromo)
                gene.makeRandomGene()

                player = Player(gene)
                self.addPlayer(player)
        else:
            for i in range(population_size):
                player = Player(list_of_genes[i])
                self.addPlayer(player)

示例#20

0

显示文件

文件： environment.py 项目： lansiz/neuron

    def selection_1(self, fit_data, strength_threshhold):
        # normalize accuracy for pool selection for i in fit_data:
        i['accuracy'] /= accuracy_a.max()

        # pool selection 
        mating_pool = []
        while len(mating_pool) < mating_pool_size:
            choice = random.choice(fit_data)
            r = np.random.rand()
            if choice['accuracy'] > r:
                mating_pool.append(choice)

        # perish connections with weak strength from gene
        for i in mating_pool:
            temp = i['strength_matrix'] >= strength_threshhold
            i['gene'].connections &= temp
            i['gene'].connections_number = i['gene'].connections.sum()

        # mating pool for reproduction
        mating_pool = [i['gene'] for i in mating_pool]

        # Sample two parents from mating pool, reproduce children and impose muatation on the newborn.
        gene_pool = []
        for _ in range(P):
            new_gene = Gene.crossover(random.sample(mating_pool, 2)) 
            gene_pool.append(new_gene)

        # mutation

        return gene_pool

示例#21

0

显示文件

文件： neat.py 项目： azuse/NEAT-python

 def read_genome_from_file(self, filename):
     f = open(filename, "r")
     f.readline()
     line = f.readline()
     var = line.split(" ")
     fitness = var[0]
     orig_fitness = var[1]
     f.readline()
     nodes = []
     while line != "gene":
         line = f.readline()
         if line == "gene":
             break
         var = line.split(" ")
         from node import Node
         newnode = Node().new_node(var[0], var[1])
         nodes.append(newnode)
     genes = []
     while line != "":
         line = f.readline()
         if line == "":
             break
         var = line.split(" ")
         from gene import Gene
         newgene = Gene().add_gene_with_no_trait()
         genes.append(newgene)

示例#22

0

显示文件

文件： breeder.py 项目： agb91/ExperimentsXGBoost

    def get_son(self, parents):

        cbti = random.randint(0, (len(parents) - 1))
        cbt = parents[cbti].col_by_tree

        ssi = random.randint(0, (len(parents) - 1))
        ss = parents[ssi].subsample

        mcwi = random.randint(0, (len(parents) - 1))
        mcw = parents[mcwi].min_child_weight

        mdi = random.randint(0, (len(parents) - 1))
        md = parents[mdi].max_depth

        nei = random.randint(0, (len(parents) - 1))
        ne = parents[nei].n_estimators

        lri = random.randint(0, (len(parents) - 1))
        lr = parents[lri].learning_rate

        wayi = random.randint(0, (len(parents) - 1))
        way = parents[wayi].way

        nnei = random.randint(0, (len(parents) - 1))
        n_neighbors = parents[nnei].way

        son = Gene(cbt, ss, mcw, md, ne, lr, way, n_neighbors)

        return son

示例#23

0

显示文件

def to_chromosome(solution):
    """Create an simple/lighter internal representation of a solution to be used by the GA operations.
  """

    genes = {}
    host_failure_rates = {}
    for h in solution["hosts"]:
        genes[h["host_name"]] = []
        if "failure_rate" in h:
            host_failure_rates[h["host_name"]] = h["failure_rate"]
        else:
            host_failure_rates[h["host_name"]] = 0.0

    for v in solution["vnfs"]:
        if "place_at" in v and v["place_at"] is not None:
            for p in v[
                    "place_at"]:  # v[.] is a list, although it normally has just 1 element
                if "failure_rate" in v:
                    vfrate = v["failure_rate"]
                else:
                    vfrate = 0.0
                genes[p].append({
                    "vnf_name": v["vnf_name"],
                    "failure_rate": vfrate
                })

    G = []
    for hname, vnfs in genes.iteritems():
        G.append(Gene(hname, vnfs,
                      host_failure_rate=host_failure_rates[hname]))
    return Chromosome(G)

示例#24

0

显示文件

def build_site_id_dictionaries():
    if os.path.isfile('site_id_dictionaries.pkl'):
        with open('site_id_dictionaries.pkl', 'rb') as f:
            print('loading site_id_dictionaries.pkl... ', end='', flush=True)
            global site_id_dictionary_left
            global site_id_dictionary_right
            global gene_id_dictionary_left
            global gene_id_dictionary_right
            global gene_id_site_id_arcs
            global site_id_gene_id_arcs
            site_id_dictionary_left, site_id_dictionary_right, gene_id_dictionary_left, gene_id_dictionary_right, gene_id_site_id_arcs, site_id_gene_id_arcs = pickle.load(
                f)
            print('DONE')
    else:
        with open('processed/sites_with_scored_interactions.tsv',
                  'r') as infile:
            content = csv.reader(infile,
                                 delimiter='\t',
                                 quoting=csv.QUOTE_NONE)
            header = content.__next__()
            i = 0
            j = 0
            for row in content:
                gene_id = row[0]
                gene_symbol = row[1]
                transcript_id = row[2]
                utr_start = row[3]
                utr_end = row[4]
                seed_match_type = row[5]
                site = Site(gene_id, gene_symbol, transcript_id, utr_start,
                            utr_end, seed_match_type)
                site_id_dictionary_left[site] = i
                site_id_dictionary_right[i] = site
                gene = Gene(gene_id, gene_symbol, transcript_id)
                i += 1
                if not gene in gene_id_dictionary_left:
                    gene_id_dictionary_left[gene] = j
                    gene_id_dictionary_right[j] = gene
                    j += 1

                site_id = i - 1
                gene_id = gene_id_dictionary_left[gene]
                if not gene_id in gene_id_site_id_arcs:
                    gene_id_site_id_arcs[gene_id] = list()
                gene_id_site_id_arcs[gene_id].append(site_id)
                if site_id in site_id_gene_id_arcs:
                    print(
                        f'error: site_id = {site_id} already in site_id_gene_id_arcs'
                    )
                    os._exit(1)
                site_id_gene_id_arcs[site_id] = gene_id
        with open('site_id_dictionaries.pkl', 'wb') as f:
            print('saving site_id_dictionaries.pkl... ', end='', flush=True)
            pickle.dump([
                site_id_dictionary_left, site_id_dictionary_right,
                gene_id_dictionary_left, gene_id_dictionary_right,
                gene_id_site_id_arcs, site_id_gene_id_arcs
            ], f)
            print('DONE')

示例#25

0

显示文件

文件： geneDB(cp).py 项目： AsimovsFuture/NCBI_Genetic_Database_Tool

    def openDB(self, fileName=''):
        if fileName == '':
            fileName = self.geneDBname

        with open(fileName, 'r+') as f:
            self.geneList = []
            for x in f:
                self.geneList.append(Gene(x))

示例#26

0

显示文件

文件： genome.py 项目： t-wilkinson/N.E.A.T.

 def kill(self):
     self.Genes.clear()
     for in_node, out_node in it.product(self.inputs, self.outputs):
         self.Genes[in_node].append(Gene((in_node, out_node)))
     for gene in sum(self.Genes.values(), []):
         gene.mutate()
     self._add_node_()
     self._add_link_()

示例#27

0

显示文件

文件： tests.py 项目： uycire/goldenworm

 def testFromBounds(self):
     bound1 = SeqFeature(FeatureLocation(300, 400, strand=-1), qualifiers={'label': ['bound1']})
     bound2 = SeqFeature(FeatureLocation(100, 200, strand=1), qualifiers={'label': ['bound2']})
     self.record.features += [bound1, bound2]
     gene = Gene.from_bounds(self.record, 'test_gene', 'bound1', 'bound2')
     gene_seq = gene.extract(gene.record).seq
     expected_gene_seq = self.record[100:400].reverse_complement().seq
     self.assertEqual(str(gene_seq), str(expected_gene_seq))

示例#28

0

显示文件

文件： gene_panel.py 项目： VillemP/illumina-pipeline

 def add_genes(self, unpacked_json):
     self.genes_json = unpacked_json
     for gene in self.genes_json['result']['Genes']:
         # Use preexisting gene objects to save time on searching for name matches and so forth
         # if str(gene['GeneSymbol']) in genesdict:
         #    self.genes.append(genesdict[gene['GeneSymbol']])
         # else:
         g = Gene(self, json=gene, hgncHandler=self.hgncHandler)
         self.genes.append(g)

示例#29

0

显示文件

文件： main.py 项目： tylerburnham42/PythonSudokuGASolver

def create_population(population_size, input_board, width, height):
    population = []
    for x in range(population_size):
        gene = Gene(input_board, width, height)
        population.append(gene)

    population.sort()
    #pp.pprint(population)
    return population

示例#30

0

显示文件

文件： person.py 项目： Temmon/SampleProjects

    def reproduce(self, other):
        if self == other:
            return

        childGenes = [
            Gene(myGene.choose(), otherGene.choose())
            for myGene, otherGene in zip(self.genes, other.genes)
        ]

        return Person(self, self.genome, childGenes)

示例#31

0

显示文件

文件： chromosome.py 项目： atulshanbhag/EvolveImage

    def add_gene(self):
        """ Add a new Gene to the chromosome.
        """

        if Chromosome.ADD_CHANCE < random.random():
            self._gene_count += 1
            self._genes.append(Gene(self._size))

        else:
            pass

示例#32

0

显示文件

文件： genome.py 项目： t-wilkinson/N.E.A.T.

    def __init__(self, inputs, outputs):
        # create list of input and output Nodes
        self.inputs = [InputNode(i) for i in range(inputs)]
        self.outputs = [OutputNode(i) for i in range(inputs, inputs + outputs)]
        Gene.node_counter = inputs + outputs

        # create every possible link from input to output nodes
        self.Genes = defaultdict(list)
        for in_node, out_node in it.product(self.inputs, self.outputs):
            self.Genes[in_node].append(Gene((in_node, out_node)))

示例#33

0

显示文件

文件： chromosome.py 项目： MayankKharbanda/class

    def __init__(self, timetables, cRate = 1e0, mRate = 1e-1):
        self.genes = []
        self.crossoverRate = cRate
        self.mutationRate = mRate
        self.fitness = 0

        for tt in timetables:
            self.genes.append(Gene(tt))

        self.fitness = Chromosome.getFitness()

示例#34

0

显示文件

 def crossover(self, creature1: Creature, creature2: Creature) -> Creature:
     # 형질들은 다 똑같이 갖고있어야함
     halfChromosome1 = creature1.getHalfChromosomeAsList(
     )  # random half chromosome
     halfChromosome2 = creature2.getHalfChromosomeAsList()
     resultChromosome = Chromosome()
     for i in range(len(halfChromosome1)):
         gene1 = halfChromosome1[i]
         gene2 = halfChromosome2[i]
         resultChromosome.appendGene(
             Gene(gene1.geneName, gene1.stat, gene1.isSuperior),
             Gene(gene2.geneName, gene2.stat, gene2.isSuperior))
     x, y = 0, 0
     while True:
         x = random.randrange(0, c.MAP_WIDTH)
         y = random.randrange(0, c.MAP_HEIGHT)
         if self.map_[x][y] != c.OCCUPIED:
             break
     return Creature(x, y, resultChromosome)

示例#35

0

显示文件

    def create(self, k):
        generation = []
        for i in range(0, k):
            v1 = random.random()
            v2 = random.random()
            v3 = random.random()
            g = Gene(v1, v2, v3)
            generation.append(g)

        return generation

示例#36

0

显示文件

文件： mate.py 项目： hdl730/atlantis

def mate(gene1,gene2):
    # print'in mate'
    w1=gene1.weights
    w2=gene2.weights
    # print'w1:',len(w1),w1
    # print'w2:',len(w2),w2
    w3=[]
    for i in range(0,len(w1)):
        if randrange(0,2):
            w3.append(w1[i])
        else:
            w3.append(w2[i])
    t1=gene1.thresholds
    t2=gene2.thresholds
    t3=[]
    for i in range(0,len(t1)):
        if randrange(0,2):
            t3.append(t1[i])
        else:
            t3.append(t2[i])
    g=Gene(structure=gene1.structure,weights=w3,thresholds=t3)
    g.set()
    return g

示例#37

0

显示文件

文件： genetic_tools.py 项目： PotentialParadox/AM1Reparm2

def breed(job_data, elites, peasants):
    new_genes = []
    parents = elites + peasants
    print("Parents")
    print(parents)
    print("************")
    # The elite get to pass their genes directly
    for e in elites:
        file_name = job_data.genes[e[0]][0].file_name
        # file_name = 'test'
        gene = Gene(file_name)
        new_genes.append(gene)
    # Now let everyone randomly mate with one another,
    # using the fitness as the chance of mating
    for i in range(job_data.population - job_data.elites):
        file_name = job_data.genes[0][0].file_name
        gene = Gene(file_name)
        r1 = select_parent(parents)
        different = False
        while not different:
            r2 = select_parent(parents)
            if r2 != r1:
                different = True
        parent1 = parents[r1]
        parent2 = parents[r2]
        p_floats = mate(job_data, parent1[0], parent2[0])
        gene.p_floats = p_floats
        new_genes.append(gene)
    # Update the naming
    for i, n in enumerate(new_genes):
        n.file_name = job_data.file_name + "AM1_0P" + str(i)
        build_input(n.file_name + ".com", n.header, n.coordinates,
                    n.params, n.p_floats)
    for i in range(job_data.population):
        file_name = job_data.file_name + "AM1_0P" + str(i)
        job_data.genes[i][0] = Gene(file_name)

示例#38

0

显示文件

文件： genome.py 项目： jamesrichter/NEAT_breakout

    def mutateAddNeuron(self):
        """
      Add a neuron at the location of a random gene.
      This is done by disabling the original gene, 
      then adding a new node and connections between
      the new node and the original gene nodes.
      """
        # get one random gene
        if self.genes:
            innov = random.sample(self.genes, 1)[0]
        else:
            return
        gene = self.genes[innov]

        node1 = gene.source_neuron
        node2 = gene.target_neuron
        # make sure our nodes aren't two consecutive integers
        if node1 + 1 > node2 - 1:
            return

        g = Gene()
        g.copy(gene)

        # create a new node
        node = random.randint(node1 + 1, node2 - 1)

        # make sure our node isn't the same as any existing nodes
        if node in self.nodes:
            return
        # should be a hidden node
        assert node >= 1 and node <= MAX_LAYER

        # morph the gene into two genes and add the node
        g1 = Gene()
        g1.source_neuron = node1
        g1.target_neuron = node
        g1.weight = 1
        g2 = Gene()
        g2.source_neuron = node
        g2.target_neuron = node2
        g2.weight = gene.weight
        self.nodes.add(node)

        # disable the original gene, replace it with the new ones
        g.enabled = False
        self.genes[innov] = g
        self.genes[max(gene_index) + 1] = g1
        self.genes[max(gene_index) + 2] = g2

        # add the new genes to the index
        gene_index[max(gene_index) + 1] = g1
        gene_index[max(gene_index) + 1] = g2

示例#39

0

显示文件

文件： join_with_gtf.py 项目： FAANG/faang-methylation

	def read_reference(self,file_ref):
	
		print "Read reference ..."


		#get file size
		file_size=os.stat(file_ref).st_size

		#Parse GTF file
	        in_reference = open(file_ref)
	
		genes={}
		no_line = 0
		current_position=0
	        for gtf_line in in_reference:
			no_line = no_line + 1
			current_position+=len(gtf_line)

			#if (no_line< 1e5 and no_line % 1000 == 0) or (no_line<1e6 and no_line % 1e4 ==0) or (no_line>1e6 and no_line % 1e5 ==0) :
			if no_line % 1e5 ==0 :
				self.log_already_completed("{0} lines read from reference".format(no_line),file_size,current_position)
	
			if re.match("^#.*$",gtf_line):
				continue

	                gtf_line = gtf_line.rstrip('\r\n')
	
	                elmts = gtf_line.split('\t')
			gene_chr=elmts[0]
			gene_chr=gene_chr.lower().replace("chr","")
			start=int(elmts[3])
			end=int(elmts[4])

			if Join_with_gtf.debug and gene_chr != '1' :
				break

			feature=elmts[2]

			annot=elmts[8]
			me=re.match('^gene_id "([^"]+)".*$',annot)
			if me :
				gene_id=me.group(1)
			else :
				#Feature not related to a gene_id
				gene_id=""
				#sys.exit("Unable to find gene_id value  on line #{0} of file '{1}'. Exiting".format(no_line,file_ref))


			if feature == "gene":
				gene_start=start
				gene_end=end
				strand=elmts[6]

				if strand == "-" : strand=-1
				elif strand == '+' : strand=1
				else: sys.exit("Unexpected strand value on line #{0} of file '{1}' : '{2}'. Exiting".format(no_line,file_ref,strand))

				if gene_id not in genes :
					gene=Gene(gene_id,gene_chr,gene_start,gene_end,strand)
					genes[gene_id]=gene
				else :
					gene=genes[gene_id]
					gene.set_location(gene_chr,gene_start,gene_end)
					gene.set_strand(strand)
				#gene start and end are defined in this line, therefore we can compute :
				#tss, promoter and tss
				self.features_found["promoter"]=1
				self.features_found["tss"]=1
				self.features_found["tts"]=1
				self.features_found["gene"]=1
				gene.gene_model_has_been_defined()

			#elif feature not in("CDS","UTR","transcript") :
			else :
				if gene_id not in genes :
					gene=Gene(gene_id,gene_chr)
					genes[gene_id]=gene
				else :
					gene=genes[gene_id]
				if feature == "start_codon" :
					self.features_found["utr5"]=1
				elif feature == "stop_codon" :
					self.features_found["utr3"]=1
				elif feature == "exon" :
					self.features_found["exon"]=1
					self.features_found["intron"]=1
				else :
					self.features_found[feature.lower()]=1
				gene.add_feature(feature,start,end)

	        in_reference.close()
		print "\n\t{0} lines read from reference in total.".format(no_line)

		#Check that all features listed in configuration file has been found at least once
		for feature in self.features_found :
			if self.features_found[feature.lower()] == 0 :
				sys.stderr.write(("Warning : feature named '{0}' found in 'feature_priorities' parameter. "+
						  "This feature has never been found in reference file '{1}'.\n").format(
							feature, file_ref
				))

		#Complete feature_properties with the one found in gtf files but not requested by user
		#Otherwise when we will try to order feature overlapping with a given region
		#sorted(overlaps, key=lambda ovlp: self.feature_priorities[ ovlp.value[0] ])
		#It will raise an exception.
		for feature in self.features_found :
			if feature.lower() not in self.feature_priorities :
				self.feature_priorities[feature.lower()]=None

		#define downstream/upstream boundaries
		promoter_downstream= self.theme.get_parameter("promoter_downstream")
		promoter_upstream= self.theme.get_parameter("promoter_upstream")
		tss_downstream= self.theme.get_parameter("tss_downstream")
		tss_upstream= self.theme.get_parameter("tss_upstream")
		tts_downstream= self.theme.get_parameter("tts_downstream")
		tts_upstream= self.theme.get_parameter("tts_upstream")

		#print "promoter_upstream={0}".format(promoter_upstream)
		#print "promoter_downstream={0}".format(promoter_downstream)
		#print "tss_upstream={0}".format(tss_upstream)
		#print "tss_downstream={0}".format(tss_downstream)
		#print "tts_upstream={0}".format(tts_upstream)
		#print "tts_downstream={0}".format(tts_downstream)

		#Initialize dictionnaries
		features={}
		gene_boundaries={}

		#Build gene model
		print "Build gene model ..."
		no_gene=0
		for gene_id in genes :

			gene=genes[gene_id]
			(gene_chr,gene_start,gene_end)=gene.get_coordinates()

			no_gene+=1

			if no_gene % 1000 == 0 :
				self.log_already_completed("{0} genes treated".format(no_gene),len(genes),no_gene)

			if gene_chr not in features :
					features[gene_chr]=IntervalTree()
					gene_boundaries[gene_chr]=IntervalTree()

			if gene.gene_model_is_defined() :
				if gene_chr not in gene_boundaries :
					gene_boundaries[gene_chr]=IntervalTree()

				gene_boundaries[gene_chr].insert_interval( Interval(gene_start,gene_end, value=["gene",gene_id] ) )

				#Promoter
				if gene.strand == 1 :
					(start,end)=gene.get_promoter(promoter_upstream,promoter_downstream)
				else :
					(start,end)=gene.get_promoter(promoter_downstream,promoter_upstream)
				features[gene_chr].insert_interval( Interval(start,end, value=["promoter",gene_id] ) )
	
				#5' UTR
				(start,end)=gene.get_utr5()
				if start is not None:
					features[gene_chr].insert_interval( Interval(start,end, value=["utr5",gene_id] ) )
	
				#TTS
				if gene.strand == 1 :
					(start,end)=gene.get_tss(tss_upstream,tss_downstream)
				else :
					(start,end)=gene.get_tss(tss_downstream,tss_upstream)
				features[gene_chr].insert_interval( Interval(start,end, value=["tss",gene_id] ) )
	
				#Intron / Exon
				(intron_coords,exon_coords)=gene.get_introns_exons()

				#Debug
				#if gene.gene_id == "ENSBTAG00000000010" :
				#	print "gene_id '{0} / intron={1} / exon={2}".format(gene.gene_id,intron_coords,exon_coords)

				for exon_coord in exon_coords :
					(start,end)=exon_coord
					features[gene_chr].insert_interval( Interval(start,end, value=["exon",gene_id] ) )
	
				for intron_coord in intron_coords :
					(start,end)=intron_coord
					features[gene_chr].insert_interval( Interval(start,end, value=["intron",gene_id] ) )
	
				#TTS
				if gene.strand == 1 :
					(start,end)=gene.get_tts(tts_upstream,tts_downstream)
				else :
					(start,end)=gene.get_tts(tts_downstream,tts_upstream)
				features[gene_chr].insert_interval( Interval(start,end, value=["tts",gene_id] ) )
	
				#3' UTR
				(start,end)=gene.get_utr3()
				if start is not None:
					features[gene_chr].insert_interval( Interval(start,end, value=["utr3",gene_id] ) )
			
			#Other features
			for feature in gene.get_other_features() :
				(start,end,feature)=feature
				features[gene_chr].insert_interval( Interval(start,end, value=[feature,gene_id] ) )

		print "\n\t{0} genes treated in total.".format(no_gene)
	        return (features,gene_boundaries)