Пример #1
0
def create_icd10_lookup():
    """Precomputes the mental and nervous system disorder columns of the ICD10 dataset for faster subject comparison.
    See Chapters V and VI http://biobank.ndph.ox.ac.uk/showcase/field.cgi?id=41270

    :return dataframe row-indexed by subject IDs and and boolean columns indexed by disease.
    """

    icd10 = pd.read_csv(data_icd10, sep=',')
    icd10.index = ['UKB' + str(eid) for eid in icd10['eid']]

    subject_ids = np.load(SUBJECT_IDS, allow_pickle=True)
    biobank_uids = Phenotype.get_biobank_codes(Phenotype.ICD10)
    icd10 = icd10.loc[subject_ids, biobank_uids]

    icd10_lookup = pd.DataFrame(index=icd10.index)

    # Determine if the the patient has the occurrence of a particular disease.
    si = icd10.index.to_series()
    ci = np.concatenate((Phenotype.get_icd10_mental_disorder_codes(),
                         Phenotype.get_icd10_nervous_system_disorder_codes()))

    for c in ci:
        icd10_lookup[c] = pd.Series(
            si.apply(lambda s: np.any([
                k.startswith(c)
                for k in icd10.loc[s, :].to_numpy().astype('str')
            ])))

    icd10_lookup = icd10_lookup.sort_index()
    icd10_lookup.to_pickle(ICD10_LOOKUP)
    return icd10_lookup
Пример #2
0
class Genotype:
    
    #Add the map as a genotype and phenotype
    def __init__(self):
        self.chromosomes = np.zeros(0, dtype=int)

    def randomize(self, chromosomeSize, trajectory):
        self.level = Level(trajectory.level_width,trajectory.level_height)
        self.level.generate_from_trajectory(trajectory, random.uniform(0,1))
        self.phenotype = Phenotype(self.level)
        self.chromosomes = self.level.cells.flatten()
        self.trajectory = trajectory
#        np.set_printoptions(threshold=np.nan)
#        print(self.chromosomes)
#        print("another genotype")
        
        """
        for i in range(chromosomeSize):
            self.chromosomes.append(random.randint(0,1))
        """
            
    def getPhenotype(self):
        self.phenotype.levelFromChromosomes(self.chromosomes, self.trajectory,
                                            self.trajectory.level_width, self.trajectory.level_height)
        return self.phenotype
Пример #3
0
    def randomize(self, chromosomeSize, trajectory):
        self.level = Level(trajectory.level_width,trajectory.level_height)
        self.level.generate_from_trajectory(trajectory, random.uniform(0,1))
        self.phenotype = Phenotype(self.level)
        self.chromosomes = self.level.cells.flatten()
        self.trajectory = trajectory
#        np.set_printoptions(threshold=np.nan)
#        print(self.chromosomes)
#        print("another genotype")
        
        """
 def __init__(self, nb_entrees, nb_sorties, idInd):
     self.nb_e = nb_entrees
     self.nb_s = nb_sorties
     self.id = idInd
     self.espece = None
     self.genome = Genome(self.nb_e, self.nb_s)
     self.phenotype = Phenotype(self.nb_e, self.nb_s)
     self.idToPos = {
     }  #Ce tableau fera l'interface entre le genome et l'individu
     self.fitness = None
     self.sharedFitness = None
Пример #5
0
def evaluate_test_set_performance(model_dir):
    """Measures the test set performance of the model under the specified model directory.

    :param model_dir: directory containing the model state dictionaries for each fold and the model
        configuration (including the population graph parameterisation)
    :return: the test set performance for each fold.
    """

    with open(os.path.join(model_dir, 'config.yaml')) as file:
        cfg = yaml.full_load(file)

        graph_name = cfg['graph_name']['value']
        conv_type = cfg['model']['value']

        n_conv_layers = cfg['n_conv_layers']['value']
        layer_sizes = ast.literal_eval(cfg['layer_sizes']['value'])
        dropout_p = cfg['dropout']['value']

        similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]]
        similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1]

    if graph_name not in GRAPH_NAMES:
        graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set,
                                                   similarity_threshold=similarity_threshold,
                                                   functional=False,
                                                   structural=True,
                                                   euler=True)

    graph = graph_construct.load_population_graph(graph_root, graph_name)

    folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5)
    results = {}

    for i, fold in enumerate(folds):
        brain_gnn_train.set_training_masks(graph, *fold)
        graph_transform.graph_feature_transform(graph)

        if ConvTypes(conv_type) == ConvTypes.GCN:
            model = BrainGCN(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)
        else:
            model = BrainGAT(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)

        model.load_state_dict(torch.load(os.path.join(model_dir, 'fold-{}_state_dict.pt'.format(i))))
        model = model.to(device)
        model.eval()

        data = graph.to(device)
        model = model(data)

        predicted = model[data.test_mask].cpu()
        actual = graph.y[data.test_mask].cpu()

        r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy())
        r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten())
        results['fold_{}'.format(i)] = {'r': [x.item() for x in r], 'r2': r2.item()}
        mse = mean_squared_error(actual.detach().numpy(), predicted.detach().numpy())
        results=mse
        break

    return results
Пример #6
0
def retrieveData():
    '''
        Retrieves data from API and puts them into a dictionary. A phenotype value is paired with a list of its name description and score key
    '''
    token = 'GENOMELINKTEST'
    headers = {'Authorization': 'Bearer {}'.format(token)}

    phenotypes = [
        'carbohydrate-intake', 'protein-intake', 'vitamin-a', 'vitamin-b12',
        'vitamin-d', 'vitamin-e', 'calcium', 'magnesium', 'iron',
        'endurance-performance'
    ]
    population = 'european'

    for phenotype in phenotypes:
        report_url = 'https://genomicexplorer.io/v1/reports/{}?population={}'.format(
            phenotype, population)
        response = requests.get(report_url, headers=headers)
        data = response.json()
        data_str = json.dumps(data)
        data_dict = json.loads(data_str)

        p = Phenotype(data_dict["phenotype"]["display_name"],
                      data_dict["summary"]["text"],
                      data_dict["summary"]["score"])
        phenotypeDict[p._phenotype] = p._score
Пример #7
0
    def __init__(self):
        
        #-----location part
        if parameters.location_mode:
            self.mesh_size = 1./parameters.LD0range/2
            f = lambda size: [[{} for x in xrange(int(size)+1)] for y in xrange(int(size)+1)]
            self.mesh = f(self.mesh_size)
            map_path = parameters.map_phenotype_image(parameters.maps)
            self.load_terrain(map_path+".info.tmp", map_path+".tmp")
        #------------------

        from plant import Plant
        from phenotype import Phenotype

        self.plants = {}
        self.allplantslist = []
        self.generation = 0
        self.__class__.default = self
        self.__class__.environments += 1
        debug.g("niche %d" % parameters.niche_size)
        for i in xrange(parameters.niche_size):
            if parameters.location_mode:
                Plant.new(parameters.get_start_point(parameters.maps))
            else:
                Plant.new((0,0))
        debug.g("*** %d" % len(self.plants))
        self.optimal_global_phenotype = Phenotype()
        self.base_phenotype = Phenotype()
        self.survivors = parameters.niche_size
        self.randomkiller = selectors.KillerRandom()
        (self.killer, self.reproducer) = selectors.getSelectors()
        self.phenotype_link = Phenotype
        self.history = History(self)
        self.history.update()
Пример #8
0
 def average_distance(self, mc_samples = None):
     if mc_samples == None:
         ret = 0.0
         for plant1 in self.plants.values():
             for plant2 in self.plants.values():
                 ret += Phenotype.distance(plant1.phenotype, plant2.phenotype)
         numpl = len(self.plants)
         if numpl == 0: return 0.
         return ret / (numpl**2 - numpl)
     else:
         ret = 0.0
         tries_done = 0
         tries_valid = 0
         spk = self.plants.keys()
         if spk == []: return 0.
         while tries_done < mc_samples:
             p1 = random.choice(spk)
             p2 = random.choice(spk)
             tries_done += 1
             if p1 != p2:
                 tries_valid += 1
                 ret += self.phenotype_link.distance(self.plants[p1].phenotype, self.plants[p2].phenotype)
         if tries_valid == 0:
             tries_valid += 1
         return ret / tries_valid
Пример #9
0
 def __crossover__(self, population, ori):
     aux = []
     for f, m in population:
         index = np.arange(int(len(f.__dict__)))
         np.random.shuffle(index)
         son1 = dict(
             np.concatenate(
                 (np.array(list(f.__dict__.items()))[
                     index[:int(len(f.__dict__) * self.crossover)]],
                  np.array(list(m.__dict__.items()))[
                      index[int(len(f.__dict__) * self.crossover):]])))
         son1 = Phenotype(son1)
         son2 = dict(
             np.concatenate(
                 (np.array(list(f.__dict__.items()))[
                     index[int(len(f.__dict__) * self.crossover):]],
                  np.array(list(m.__dict__.items()))[
                      index[:int(len(f.__dict__) * self.crossover)]])))
         son2 = Phenotype(son2)
         aux += [son1, son2]
     ori.addPhenotype(aux)
     return ori
Пример #10
0
    def distance_to_opt(self):
        #magic trick by @MKitlas
        optimal_ph = 1410
        plant_ph = self.phenotype

        if p.location_mode:
            optimal_ph = Environment.default.optimal_phenotype_on_map(self.location)
        else:
            optimal_ph = Environment.default.optimal_phenotype_without_map()
        
        # TODO sum up TEs effect.
        for t in self.aut_transposons_list:
            if p.multidim_changes:
                plant_ph = Phenotype.add(plant_ph, t.mutation_rate)
            else:
                plant_ph[t.trait_no] += t.mutation_rate

        for t in self.nonaut_transposons_list:
            if p.multidim_changes:
                plant_ph = Phenotype.add(self.phenotype, t.mutation_rate)
            else:
                plant_ph[t.trait_no] += t.mutation_rate

        return Phenotype.distance(plant_ph, optimal_ph)
Пример #11
0
    def __init__(self, no_transp = None, transp_activity = None):
        #debug.g("tworzenie nowej rosliny c.d. __init__()" )#, plant.fitness()))
        self.aut_transposons = no_transp
        if no_transp == None:
            self.aut_transposons = p.starting_transposons()
        self.nonaut_transposons = 0

        ##KG--begin
        ##sexual_mode reproduction
        ##initializing a list of transposons in a new plant
   
        if p.sexual_mode : 
            self.sex = 1 if random.random() > 0.5 else 0
            self.nonaut_transposons_list = []
            self.aut_transposons_list = []
            for i in range(self.aut_transposons):
                te = Transposone(True)
                self.aut_transposons_list.append(te) 
        else :
            # asexual all plants have sex=0
            self.sex = 0
        ##KG--end
        
        self.phenotype = Phenotype.new()
        self.id = self.__class__.counter
        #Environment.default.register_new_plant(self.id, self) replaced to new()
        self.environment = Environment.default
        self.dead = False
        self.__class__.counter += 1
        self.transposase_activity = transp_activity
        if transp_activity == None:
            self.transposase_activity = p.starting_transposase_activity(self.aut_transposons)
        self.inactive_transposons = 0
        self.transpositions = 0
        self.total_mutations = 0
        self.random_mutations = 0
        self.ord_counter = self.__class__.order_cnt
        self.__class__.order_cnt += 1
Пример #12
0
def create_similarity_lookup():
    """Precomputes the columns of the phenotype dataset for faster subject comparison.

    :return dataframe containing the values used for similarity comparison, row-indexed by subject ID and
    column-indexed by phenotype code name (e.g. 'AGE', 'FTE' etc.)
    """

    phenotypes = pd.read_csv(data_phenotype, sep=',')
    phenotypes.index = ['UKB' + str(eid) for eid in phenotypes['eid']]

    biobank_feature_list = []
    for feature in Phenotype:
        biobank_feature_list.extend(Phenotype.get_biobank_codes(feature))

    phenotype_processed = phenotypes[biobank_feature_list]

    for feature in Phenotype:
        biobank_feature = Phenotype.get_biobank_codes(feature)
        if feature == Phenotype.MENTAL_HEALTH:
            mental_to_code = Phenotype.get_mental_to_code()
            # column names for summary (total number of conditions) + 18 possible condidions: MEN0, MEN1, ..., MEN18.
            mental_feature_codes = [
                Phenotype.MENTAL_HEALTH.value + str(i) for i in range(19)
            ]
            # Replace string descriptions with their codes for consistency.
            phenotype_processed.loc[:,
                                    biobank_feature[0]] = phenotype_processed[
                                        biobank_feature[0]].apply(
                                            lambda x: mental_to_code[x] if x in
                                            mental_to_code.keys() else None)
            # Determine if the the patient has the occurrence of a particular disease.
            si = phenotype_processed.index.to_series()
            for i in range(1, len(mental_feature_codes)):
                phenotype_processed.loc[:, Phenotype.MENTAL_HEALTH.value +
                                        str(i)] = si.apply(lambda s: int(
                                            i in phenotype_processed
                                            .loc[s, biobank_feature].to_numpy(
                                            ).astype(bool)))
            phenotype_processed.loc[:, mental_feature_codes[0]] = si.apply(
                lambda s: int(
                    np.sum(phenotype_processed.loc[s, mental_feature_codes[1:]]
                           )))

        elif len(biobank_feature) > 1:
            # handle the more/less recent values
            si = phenotype_processed.index.to_series().copy()
            phenotype_processed.loc[:, feature.value] = si.apply(
                lambda s: get_most_recent(biobank_feature, s,
                                          phenotype_processed))
        else:
            phenotype_processed.loc[:, feature.value] = phenotype_processed[
                biobank_feature[0]].copy()

    # Filter only the subjects used in the final dataset.
    phenotype_processed = phenotype_processed.loc[precompute_subject_ids()]

    # Return only the final feature columns (indexed by code names).
    phenotype_processed.drop(biobank_feature_list, axis=1, inplace=True)
    phenotype_processed = phenotype_processed.sort_index()

    phenotype_processed.to_pickle(SIMILARITY_LOOKUP)
    return phenotype_processed
Пример #13
0
# Population graph parameters
parser.add_argument('--functional', default=0, type=bool)
parser.add_argument('--structural', default=1, type=bool)
parser.add_argument('--euler', default=1, type=bool)
parser.add_argument('--similarity',
                    default="(['SEX', 'ICD10', 'FTE', 'NEU'], 0.8)",
                    type=str)

args = parser.parse_args()

functional = args.functional
structural = args.structural
euler = args.euler
similarity_feature_set = [
    Phenotype(i) for i in ast.literal_eval(args.similarity)[0]
]
similarity_threshold = ast.literal_eval(args.similarity)[1]

graph_name = graph_construct.get_graph_name(
    functional=functional,
    structural=structural,
    euler=euler,
    similarity_feature_set=similarity_feature_set,
    similarity_threshold=similarity_threshold)

if graph_name not in GRAPH_NAMES:
    graph_construct.construct_population_graph(
        similarity_feature_set=similarity_feature_set,
        similarity_threshold=similarity_threshold,
        functional=functional,
class Individu():
    def __init__(self, nb_entrees, nb_sorties, idInd):
        self.nb_e = nb_entrees
        self.nb_s = nb_sorties
        self.id = idInd
        self.espece = None
        self.genome = Genome(self.nb_e, self.nb_s)
        self.phenotype = Phenotype(self.nb_e, self.nb_s)
        self.idToPos = {
        }  #Ce tableau fera l'interface entre le genome et l'individu
        self.fitness = None
        self.sharedFitness = None

    def __repr__(self):
        s = "Ind " + str(self.id) + ":"
        s += "\n Fitness: " + str(self.fitness)
        s += "\n Shared Fitness: " + str(self.sharedFitness)
        s += "Espece: " + str(self.espece)
        return s

    def generer(self):
        #On ajoute au début les entrées et les sorties
        self.idToPos = {i: (0, i) for i in range(self.nb_e)}
        self.idToPos.update({self.nb_e + j: (1, j) for j in range(self.nb_s)})
        #On met les valeurs de poids de genome dans le phenotype
        self.genome.generer()
        self.phenotype.generer()
        for innov in self.genome.connexions:
            c = self.genome.connexions[innov]
            k, l = self.idToPos[c.sortie][1], self.idToPos[c.entree][1]
            self.phenotype.liens[0][1][k, l] = c.poids

    def calculateFitness(self):
        pass

    def output(self):
        return self.phenotype.couches[-1]

    def rawFitness(self):
        if self.fitness == None:
            self.fitness = 0
        return self.fitness

    def add_key(self, nouvid, couche, num):
        """Met à jour la table idToPos en ajoutant un noeud qui 
        sera en la couche et dont le numéro est num"""
        assert nouvid not in self.idToPos, "Le nouvel identifiant ne doit pas être existent"
        self.idToPos[nouvid] = (couche, num)

    def insertLayer(self, couche):
        """Insère une couche aprés la couche indiqué en paramètre"""
        #Décale tous les position d'une couche
        for i in self.idToPos:
            if self.idToPos[i][0] > couche:
                n, h = self.idToPos[i]
                self.idToPos[i] = (n + 1, h)
        #Ajoute une nouvelle couche en inserant de nouvelles matrices liens
        self.phenotype.insertLayer(couche)

    def posToId(self, pos):
        for i in self.idToPos:
            if self.idToPos[i] == pos:
                return i

    def estRecursive(self, con):
        ce, ne = self.idToPos[con.entree]
        cs, ns = self.idToPos[con.sortie]
        return ce >= cs

    def insertNoeudCouche(self, couche, idNouvNoeud):
        assert idNouvNoeud not in self.idToPos, "Noeud déja existant"
        self.phenotype.insertNode(couche)
        self.idToPos[idNouvNoeud] = (couche,
                                     len(self.phenotype.couches[couche]) - 1)

    def insertNoeud(self, con, p1, p2, innov, idNouvNoeud):
        """Cette fonction prend une connexion déja existante et la remplace par deux
           nouvelle connexions et un noeud intermédiaire qui occupera la couche milieu si elle existe
           et créera une nouvelle couche si la connexion relie deux couches succéssives ou la même couche"""
        #On désactive la connexion précèdente
        idN1 = con.entree
        idN2 = con.sortie
        con.desactiver()
        #On récupère la position dans le phénotype des deux noeuds précèdemment reliés
        c1, n1 = self.idToPos[idN1]
        c2, n2 = self.idToPos[idN2]

        assert c2 - c1 > 0, "Un lien recursif ne peut pas etre coupé"

        #On a une disjonction de cas selon que les deux noeuds était dans deux couches successifs ou pas
        if c2 - c1 >= 2:
            #Si les deux noeuds ne sont pas dans de ux couches succéssifs alors on met le nouveau noeud
            #dans une couche au milieu des deux couches
            m = (c1 + c2) // 2
            p = len(self.phenotype.couches[m])
            #On met à jour la table idToPos
            self.add_key(idNouvNoeud, m, p)
            #On insère le noeud dans la couche m
            self.phenotype.insertNode(m)

            self.phenotype.modifierConnexion(idN1, idNouvNoeud, self.idToPos,
                                             p1)
            self.phenotype.modifierConnexion(idNouvNoeud, idN2, self.idToPos,
                                             p2)
            self.phenotype.modifierConnexion(idN1, idN2, self.idToPos, 0)

            self.genome.ajouterConnexion(idN1, idNouvNoeud, p1, innov)
            self.genome.ajouterConnexion(idNouvNoeud, idN2, p2, innov + 1)

        elif c2 - c1 == 1:
            #On ajoute la nouvelle couche en dessus de la couche en dessous (ie le min)
            c = min(c1, c2)

            self.insertLayer(c)
            self.insertNoeudCouche(c + 1, idNouvNoeud)
            c1, n1 = self.idToPos[idN1]
            c2, n2 = self.idToPos[idN2]

            self.phenotype.modifierConnexion(idN1, idNouvNoeud, self.idToPos,
                                             p1)
            self.phenotype.modifierConnexion(idNouvNoeud, idN2, self.idToPos,
                                             p2)
            self.phenotype.modifierConnexion(idN1, idN2, self.idToPos, 0)

            self.genome.ajouterConnexion(idN1, idNouvNoeud, p1, innov)
            self.genome.ajouterConnexion(idNouvNoeud, idN2, p2, innov + 1)

        self.phenotype.reinit()

    def connexionPossible(self):
        if not (self.phenotype.estComplet()):
            tries = 0
            noeuds = self.idToPos.keys()
            noeudsSansEntree = [
                i for i in noeuds if (i not in range(self.nb_e))
            ]
            e = ut.randomPick(noeuds)
            s = ut.randomPick(noeudsSansEntree)
            c = self.genome.entreeSortieToCon(e, s)
            while tries < 10 and c != None and c.activation:
                e = ut.randomPick(noeuds)
                s = ut.randomPick(noeudsSansEntree)
                c = self.genome.entreeSortieToCon(e, s)
                tries += 1
            if tries < 10:
                if c != None:
                    return c
                else:
                    return Connexion(e, s, 1)

    def mutationPoids(self):
        for i in self.genome.connexions:
            c = self.genome.connexions[i]
            if rand.random() < prob.mutation.poids:
                if rand.random() < prob.mutation.poids_radical:
                    c.poids = 30 * rand.random() - 15
                else:
                    c.poids += 0.5 * rand.random()
                self.phenotype.modifierConnexion(c.entree, c.sortie,
                                                 self.idToPos, c.poids)

    def insertLien(self, c, innov):
        self.phenotype.modifierConnexion(c.entree, c.sortie, self.idToPos,
                                         c.poids)
        if not (c.activation):
            c.activer()
        else:
            self.genome.ajouter(c, innov)

    def draw(self, pos):
        self.phenotype.draw(pos, self.posToId)
Пример #15
0
    def advance_generation(self):
        from phenotype import Phenotype
        from plant import Plant
        from math import log, sqrt
        #self.avg_transpositions_in_this_generation = 0

    #if self.generation%10==0:
    #	debug.g("====") 
    #	debug.g(self.optimal_global_phenotype.properties)
    #	debug.g(self.base_phenotype.properties)

        if parameters.random_pressure > 0.0:
        #    for plant in self.plants.values():
        #if happened(random_pressure):
        #    plant.die()
            self.randomkiller.eliminate(self.plants.values())
        self.killer.eliminate(self.plants.values())

        #=========LOCATION
        if parameters.location_mode:
            r = parameters.LD0range
            r2 = r*r

            def important_fields_in_mesh(location):
                f = lambda (x,y): [(x,y),(x-r,y-r),(x,y-r),(x+r,y-r),(x-r,y),(x+r,y),(x-r,y+r),(x,y+r),(x+r,y+r)]
                g = lambda x: -1<x and x<self.mesh_size
                h = lambda (x,y): (Plant.scale(x),Plant.scale(y))
                i = lambda (x,y): g(x) and g(y)
            #debug.g(location)
            #debug.g(r)
                fields = set() 
                for x in filter(i, map(h, f(location))): fields.add(x)
                return fields

            def shuffle(l):
                random.shuffle(l)
                return l
          
            def maybe_neighbour_kill(p1, p2):
                fitness_val = p1.fitness()
            
                d2_fun = lambda ((x1,y1), (x2,y2)): (x2-x1)**2+(y2-y1)**2
                d2 = d2_fun((p1.location, p2.location))
            
            ##range_val = -2*log(d/r)
            #try: range_val = 2*logr-log(d2)
            #except: range_val = 1
                import math
                f = lambda (y,ymax,ymin): (y-ymin)/(ymax-ymin)
                range_val = f((math.e**(d2), math.e**(r2), 1.))
                if range_val <= 1.:
                    return not distributions.happened(range_val * fitness_val)
                else: return False
            #debug.g("%f, %f" %(range_val, math.sqrt(d2)))

            for plant in shuffle(self.plants.values()):
                if not plant.dead:
                    for (x,y) in important_fields_in_mesh(plant.location):
                        if plant.dead: break
                        for killer_plant in (self.mesh[x][y]).values():
                            if maybe_neighbour_kill(plant, killer_plant) and plant.id != killer_plant.id: 
                                plant.die()
                                break
        #=================

        #transpositions_itg = 0
        #for plant in self.plants.values():
        #    transpositions_itg += plant.transpositions
        #self.avg_transpositions_in_this_generation = float(transpositions_itg) / float(len(self.plants.values()))
    
        self.history.update()

        v = self.plants.values()
        self.survivors = len(v)
        self.reproducer.reproduce(self.plants)

        ##KG
#        for plant in self.plants.values():
#            print " >> after reproduction part" + str(plant.aut_transposons) + "==" + str(len(plant.aut_transposons_list))
#            for x in plant.aut_transposons_list : 
#                print "TE #" + str(x.id) + ", parent: " + str(x.parent) + ", aut= " + str(x.is_aut)

        for plant in self.plants.values():            
            plant.evolve()

        self.allplantslist = self.plants.values()

        if parameters.expected_horiz_transfers > 0.0:
            for plant in self.plants.values():
                plant.perform_horizontal_transfers()

        if self.generation >= parameters.stability_period:
            if parameters.is_drift_directed:
                for _unused in range(parameters.number_of_mutations):
                    self.base_phenotype[distributions.runifint(0, parameters.no_phenotype_properties-1)] += parameters.expected_mutation_shift
            else:
                for _unused in range(parameters.number_of_mutations):
                    self.base_phenotype.mutate_once(stdev = parameters.expected_mutation_shift)

        self.optimal_global_phenotype = None
        if parameters.fluctuations_magnitude > 0.0:
            self.optimal_global_phenotype = self.base_phenotype.add(Phenotype.new_random(parameters.fluctuations_magnitude))
        else:
            self.optimal_global_phenotype = self.base_phenotype

        allpl = sorted(self.plants.values(), key = lambda p: p.ord_counter)

        i = 0
        for p in allpl:
            p.ord_counter = i
            i += 1

        #self.history.update()
        self.generation += 1
Пример #16
0
class Environment:
    environments = 0
    def __init__(self):
        
        #-----location part
        if parameters.location_mode:
            self.mesh_size = 1./parameters.LD0range/2
            f = lambda size: [[{} for x in xrange(int(size)+1)] for y in xrange(int(size)+1)]
            self.mesh = f(self.mesh_size)
            map_path = parameters.map_phenotype_image(parameters.maps)
            self.load_terrain(map_path+".info.tmp", map_path+".tmp")
        #------------------

        from plant import Plant
        from phenotype import Phenotype

        self.plants = {}
        self.allplantslist = []
        self.generation = 0
        self.__class__.default = self
        self.__class__.environments += 1
        debug.g("niche %d" % parameters.niche_size)
        for i in xrange(parameters.niche_size):
            if parameters.location_mode:
                Plant.new(parameters.get_start_point(parameters.maps))
            else:
                Plant.new((0,0))
        debug.g("*** %d" % len(self.plants))
        self.optimal_global_phenotype = Phenotype()
        self.base_phenotype = Phenotype()
        self.survivors = parameters.niche_size
        self.randomkiller = selectors.KillerRandom()
        (self.killer, self.reproducer) = selectors.getSelectors()
        self.phenotype_link = Phenotype
        self.history = History(self)
        self.history.update()

    def load_terrain(self, info_filename, data_filename):
        size = (0,0)
        with open(info_filename) as f:
            arrinfo = array.array('L')
            arrinfo.fromfile(f, 2)
            size = (arrinfo[0], arrinfo[1])
        self.map_size = size
        self.phenotype_map = [[(1,1,1) for y in xrange(size[1])] for x in xrange(size[0])]

        with open(data_filename) as f:
            scale = lambda x: (float(x-127)/128)*parameters.map_phenotype_amplitude/2

            arr = array.array('B')
            arr.fromfile(f, size[0]*size[1]*3)
            for y in xrange(size[1]):
                for x in xrange(size[0]):
                    self.phenotype_map[x][y] = map(scale, (lambda nr: arr[nr:nr+3])(3*x+3*size[0]*y))
                    #debug.g(size)
                    #for x in xrange(63):
                    #	debug.g(self.phenotype_map[x*10][0])

    def register_new_plant(self, number, plant):
        self.plants[number] = plant
        ##TODO
        if parameters.location_mode:
            try:
                self.mesh[plant.scalex()][plant.scaley()][number] = plant
            except:
                debug.g(plant.scalex())
                debug.g(plant.scaley())
                debug.g(number)
                0/0

    def unregister_plant(self, number):
        ##TODO
        if parameters.location_mode:
            plant = self.plants[number]
            del self.mesh[plant.scalex()][plant.scaley()][number]
        del self.plants[number]

    def advance_generation(self):
        from phenotype import Phenotype
        from plant import Plant
        from math import log, sqrt
        #self.avg_transpositions_in_this_generation = 0

    #if self.generation%10==0:
    #	debug.g("====") 
    #	debug.g(self.optimal_global_phenotype.properties)
    #	debug.g(self.base_phenotype.properties)

        if parameters.random_pressure > 0.0:
        #    for plant in self.plants.values():
        #if happened(random_pressure):
        #    plant.die()
            self.randomkiller.eliminate(self.plants.values())
        self.killer.eliminate(self.plants.values())

        #=========LOCATION
        if parameters.location_mode:
            r = parameters.LD0range
            r2 = r*r

            def important_fields_in_mesh(location):
                f = lambda (x,y): [(x,y),(x-r,y-r),(x,y-r),(x+r,y-r),(x-r,y),(x+r,y),(x-r,y+r),(x,y+r),(x+r,y+r)]
                g = lambda x: -1<x and x<self.mesh_size
                h = lambda (x,y): (Plant.scale(x),Plant.scale(y))
                i = lambda (x,y): g(x) and g(y)
            #debug.g(location)
            #debug.g(r)
                fields = set() 
                for x in filter(i, map(h, f(location))): fields.add(x)
                return fields

            def shuffle(l):
                random.shuffle(l)
                return l
          
            def maybe_neighbour_kill(p1, p2):
                fitness_val = p1.fitness()
            
                d2_fun = lambda ((x1,y1), (x2,y2)): (x2-x1)**2+(y2-y1)**2
                d2 = d2_fun((p1.location, p2.location))
            
            ##range_val = -2*log(d/r)
            #try: range_val = 2*logr-log(d2)
            #except: range_val = 1
                import math
                f = lambda (y,ymax,ymin): (y-ymin)/(ymax-ymin)
                range_val = f((math.e**(d2), math.e**(r2), 1.))
                if range_val <= 1.:
                    return not distributions.happened(range_val * fitness_val)
                else: return False
            #debug.g("%f, %f" %(range_val, math.sqrt(d2)))

            for plant in shuffle(self.plants.values()):
                if not plant.dead:
                    for (x,y) in important_fields_in_mesh(plant.location):
                        if plant.dead: break
                        for killer_plant in (self.mesh[x][y]).values():
                            if maybe_neighbour_kill(plant, killer_plant) and plant.id != killer_plant.id: 
                                plant.die()
                                break
        #=================

        #transpositions_itg = 0
        #for plant in self.plants.values():
        #    transpositions_itg += plant.transpositions
        #self.avg_transpositions_in_this_generation = float(transpositions_itg) / float(len(self.plants.values()))
    
        self.history.update()

        v = self.plants.values()
        self.survivors = len(v)
        self.reproducer.reproduce(self.plants)

        ##KG
#        for plant in self.plants.values():
#            print " >> after reproduction part" + str(plant.aut_transposons) + "==" + str(len(plant.aut_transposons_list))
#            for x in plant.aut_transposons_list : 
#                print "TE #" + str(x.id) + ", parent: " + str(x.parent) + ", aut= " + str(x.is_aut)

        for plant in self.plants.values():            
            plant.evolve()

        self.allplantslist = self.plants.values()

        if parameters.expected_horiz_transfers > 0.0:
            for plant in self.plants.values():
                plant.perform_horizontal_transfers()

        if self.generation >= parameters.stability_period:
            if parameters.is_drift_directed:
                for _unused in range(parameters.number_of_mutations):
                    self.base_phenotype[distributions.runifint(0, parameters.no_phenotype_properties-1)] += parameters.expected_mutation_shift
            else:
                for _unused in range(parameters.number_of_mutations):
                    self.base_phenotype.mutate_once(stdev = parameters.expected_mutation_shift)

        self.optimal_global_phenotype = None
        if parameters.fluctuations_magnitude > 0.0:
            self.optimal_global_phenotype = self.base_phenotype.add(Phenotype.new_random(parameters.fluctuations_magnitude))
        else:
            self.optimal_global_phenotype = self.base_phenotype

        allpl = sorted(self.plants.values(), key = lambda p: p.ord_counter)

        i = 0
        for p in allpl:
            p.ord_counter = i
            i += 1

        #self.history.update()
        self.generation += 1

    def optimal_phenotype_on_map(self, (x,y)):
        scale = lambda (v, length): int(round((length-1)*(v+1)/2))
        xp = scale((x, self.map_size[0]))
        yp = scale((y, self.map_size[1]))
        return self.optimal_global_phenotype.get_map_phenotype(self.phenotype_map[xp][yp])
Пример #17
0
 def __init__(self, data_dict, nphenom):
     self.__dict__ = dict(
         map(lambda x: (x, Chromosome(data_dict[x])), data_dict))
     self.__phenotype__ = list(
         map(lambda x: Phenotype(data_dict["J"]), list(range(nphenom))))
Пример #18
0
def label_permutation_test(model_dir):
    """Permutation test measuring the performance of the model when the labels are shuffled.

    :param model_dir: directory containing the model state dictionaries for each fold and the model
        configuration (including the population graph parameterisation)
    :return: the test set performance for each permutation.
    """

    with open(os.path.join(model_dir, 'config.yaml')) as file:
        cfg = yaml.full_load(file)

        graph_name = cfg['graph_name']['value']
        conv_type = cfg['model']['value']

        n_conv_layers = cfg['n_conv_layers']['value']
        layer_sizes = ast.literal_eval(cfg['layer_sizes']['value'])
        dropout_p = cfg['dropout']['value']

        similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]]
        similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1]

    if graph_name not in GRAPH_NAMES:
        graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set,
                                                   similarity_threshold=similarity_threshold,
                                                   functional=False,
                                                   structural=True,
                                                   euler=True)

    graph = graph_construct.load_population_graph(graph_root, graph_name)

    folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5)
    fold = folds[0]
    brain_gnn_train.set_training_masks(graph, *fold)
    graph_transform.graph_feature_transform(graph)

    rs = []
    r2s = []
    mses = []

    for i in range(1000):
        graph.to('cpu')
        permute_population_graph_labels(graph, i)

        if ConvTypes(conv_type) == ConvTypes.GCN:
            model = BrainGCN(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)
        else:
            model = BrainGAT(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)

        model.load_state_dict(torch.load(os.path.join(model_dir, 'fold-{}_state_dict.pt'.format(0))))
        model = model.to(device)

        data = graph.to(device)
        model.eval()
        model = model(data)

        predicted = model[data.test_mask].cpu()
        actual = graph.y[data.test_mask].cpu()

        r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy())
        r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten())
        mse = mean_squared_error(actual.detach().numpy(), predicted.detach().numpy())

        rs.append(r[0])
        r2s.append(r2)
        mses.append(mse)
        print(r[0], r2, mse)

    np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'r')), rs)
    np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'r2')), r2s)
    np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'mse')), mses)

    return [rs, r2s]
Пример #19
0
from json import JSONEncoder

GFF = '/home/ethan/Documents/github/CoRNonCOB/corncob/killers/Lc20.fasta/prokka_results/PROKKA_03222020.gff'

GENOMES = '/home/ethan/Documents/ecoli_genome/putonti_seqs/nice'
RUN_DIR = '/home/ethan/Documents/phenotype_test'
PROKA = '/home/ethan/prokka/bin/./prokka'

from phenotype import Phenotype

p = Phenotype(GENOMES, RUN_DIR, phenotype='n')
p.pull_peptides(prokka_exec=PROKA)
p.get_conserved_sequences()

Пример #20
0
 def translate_to_phenotype(self):
     return Phenotype(self)
Пример #21
0
data_timeseries = 'data/raw_ts'
data_phenotype = 'data/phenotype.csv'
data_similarity = 'data/similarity'
data_ct = 'data/CT.csv'
data_sa = 'data/SA.csv'
data_vol = 'data/Vol.csv'
data_euler = 'data/Euler.csv'
data_computed_fcms = 'data/processed_ts'

SUBJECT_IDS = 'data/subject_ids.npy'

# Exclude the following raw timeseries due to incorrect size.
EXCLUDED_UKB_IDS = ['UKB2203847', 'UKB2208238', 'UKB2697888']

# Graph construction phenotypic parameters.
AGE_UID = Phenotype.get_biobank_codes(Phenotype.AGE)[0]


def get_subject_ids(num_subjects=None, randomise=True, seed=0):
    """Gets the list of subject IDs for a spcecified number of subjects.

    :param num_subjects: number of subjects. Use the entire dataset when set to None.
    :param randomise: indicates whether to use a random seed for selection of subjects.
    :param seed: random seed value.
    :return: list of subject IDs.
    """

    if not os.path.isfile(os.path.join(data_root, 'subject_ids.npy')):
        ukb_preprocess.precompute_subject_ids()

    subject_ids = np.load(os.path.join(data_root, 'subject_ids.npy'),
Пример #22
0
 def init(self, cartSpace, cartPrice, limit):
     for _ in range(self.populationSize):
         self.population.append(Phenotype(cartSpace, cartPrice, limit))
     self.bestSolution = self.population[0]
Пример #23
0
def evaluate_noise_performance(model_dir, noise_type='node'):
    """Measures the test set performance of the model under the specified model directory when noise is added.

    :param model_dir: directory containing the model state dictionaries for each fold and the model
        configuration (including the population graph parameterisation)
    :param noise_type: 'node', 'node_feature_permutation' or 'edge'.
    :return: the dictionary of results under five different random seeds and increasing probabilities of added noise.
    """

    with open(os.path.join(model_dir, 'config.yaml')) as file:
        cfg = yaml.full_load(file)

        graph_name = cfg['graph_name']['value']
        conv_type = cfg['model']['value']

        n_conv_layers = cfg['n_conv_layers']['value']
        layer_sizes = ast.literal_eval(cfg['layer_sizes']['value'])
        dropout_p = cfg['dropout']['value']

        lr = cfg['learning_rate']['value']
        weight_decay = cfg['weight_decay']['value']

        similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]]
        similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1]

    if graph_name not in GRAPH_NAMES:
        graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set,
                                                   similarity_threshold=similarity_threshold,
                                                   functional=False,
                                                   structural=True,
                                                   euler=True)

    graph = graph_construct.load_population_graph(graph_root, graph_name)

    folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5)
    fold = folds[0]
    results = {}

    for i in range(1, 5):
        brain_gnn_train.set_training_masks(graph, *fold)
        results_fold = {}

        for p in [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8, 0.95]:
            graph.to('cpu')
            graph_transform.graph_feature_transform(graph)
            if noise_type == 'node':
                add_population_graph_noise(graph, p, random_state=i)
            if noise_type == 'edge':
                remove_population_graph_edges(graph, p, random_state=i)
            if noise_type == 'node-feature-permutation':
                permute_population_graph_features(graph, p, random_state=i)


            data = graph.to(device)
            epochs = 10000
            model, _ = brain_gnn_train.train(conv_type, graph, device, n_conv_layers, layer_sizes, epochs, lr,
                                             dropout_p, weight_decay, patience=100)
            model.eval()
            model = model(data)

            predicted = model[data.test_mask].cpu()
            actual = data.y[data.test_mask].cpu()
            r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy())
            r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten())
            results_fold['p={}_metric=r'.format(p)] = [x.item() for x in r][0]
            wandb.run.summary['{}_{}_{}_p={}_metric=r'.format(conv_type, noise_type, i, p)] = [x.item() for x in r][0]
            results_fold['p={}_metric=r2'.format(p)] = r2.item()
            wandb.run.summary['{}_{}_{}_p={}_metric=r2'.format(conv_type, noise_type, i, p)] = r2.item()

            gc.collect()

        results['{}_{}_{}'.format(conv_type, noise_type, i)] = results_fold

    return results