Exemplo n.º 1
0
def retrieveData():
    '''
        Retrieves data from API and puts them into a dictionary. A phenotype value is paired with a list of its name description and score key
    '''
    token = 'GENOMELINKTEST'
    headers = {'Authorization': 'Bearer {}'.format(token)}

    phenotypes = [
        'carbohydrate-intake', 'protein-intake', 'vitamin-a', 'vitamin-b12',
        'vitamin-d', 'vitamin-e', 'calcium', 'magnesium', 'iron',
        'endurance-performance'
    ]
    population = 'european'

    for phenotype in phenotypes:
        report_url = 'https://genomicexplorer.io/v1/reports/{}?population={}'.format(
            phenotype, population)
        response = requests.get(report_url, headers=headers)
        data = response.json()
        data_str = json.dumps(data)
        data_dict = json.loads(data_str)

        p = Phenotype(data_dict["phenotype"]["display_name"],
                      data_dict["summary"]["text"],
                      data_dict["summary"]["score"])
        phenotypeDict[p._phenotype] = p._score
Exemplo n.º 2
0
def evaluate_test_set_performance(model_dir):
    """Measures the test set performance of the model under the specified model directory.

    :param model_dir: directory containing the model state dictionaries for each fold and the model
        configuration (including the population graph parameterisation)
    :return: the test set performance for each fold.
    """

    with open(os.path.join(model_dir, 'config.yaml')) as file:
        cfg = yaml.full_load(file)

        graph_name = cfg['graph_name']['value']
        conv_type = cfg['model']['value']

        n_conv_layers = cfg['n_conv_layers']['value']
        layer_sizes = ast.literal_eval(cfg['layer_sizes']['value'])
        dropout_p = cfg['dropout']['value']

        similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]]
        similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1]

    if graph_name not in GRAPH_NAMES:
        graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set,
                                                   similarity_threshold=similarity_threshold,
                                                   functional=False,
                                                   structural=True,
                                                   euler=True)

    graph = graph_construct.load_population_graph(graph_root, graph_name)

    folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5)
    results = {}

    for i, fold in enumerate(folds):
        brain_gnn_train.set_training_masks(graph, *fold)
        graph_transform.graph_feature_transform(graph)

        if ConvTypes(conv_type) == ConvTypes.GCN:
            model = BrainGCN(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)
        else:
            model = BrainGAT(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)

        model.load_state_dict(torch.load(os.path.join(model_dir, 'fold-{}_state_dict.pt'.format(i))))
        model = model.to(device)
        model.eval()

        data = graph.to(device)
        model = model(data)

        predicted = model[data.test_mask].cpu()
        actual = graph.y[data.test_mask].cpu()

        r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy())
        r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten())
        results['fold_{}'.format(i)] = {'r': [x.item() for x in r], 'r2': r2.item()}
        mse = mean_squared_error(actual.detach().numpy(), predicted.detach().numpy())
        results=mse
        break

    return results
 def __init__(self, nb_entrees, nb_sorties, idInd):
     self.nb_e = nb_entrees
     self.nb_s = nb_sorties
     self.id = idInd
     self.espece = None
     self.genome = Genome(self.nb_e, self.nb_s)
     self.phenotype = Phenotype(self.nb_e, self.nb_s)
     self.idToPos = {
     }  #Ce tableau fera l'interface entre le genome et l'individu
     self.fitness = None
     self.sharedFitness = None
Exemplo n.º 4
0
    def randomize(self, chromosomeSize, trajectory):
        self.level = Level(trajectory.level_width,trajectory.level_height)
        self.level.generate_from_trajectory(trajectory, random.uniform(0,1))
        self.phenotype = Phenotype(self.level)
        self.chromosomes = self.level.cells.flatten()
        self.trajectory = trajectory
#        np.set_printoptions(threshold=np.nan)
#        print(self.chromosomes)
#        print("another genotype")
        
        """
Exemplo n.º 5
0
 def __crossover__(self, population, ori):
     aux = []
     for f, m in population:
         index = np.arange(int(len(f.__dict__)))
         np.random.shuffle(index)
         son1 = dict(
             np.concatenate(
                 (np.array(list(f.__dict__.items()))[
                     index[:int(len(f.__dict__) * self.crossover)]],
                  np.array(list(m.__dict__.items()))[
                      index[int(len(f.__dict__) * self.crossover):]])))
         son1 = Phenotype(son1)
         son2 = dict(
             np.concatenate(
                 (np.array(list(f.__dict__.items()))[
                     index[int(len(f.__dict__) * self.crossover):]],
                  np.array(list(m.__dict__.items()))[
                      index[:int(len(f.__dict__) * self.crossover)]])))
         son2 = Phenotype(son2)
         aux += [son1, son2]
     ori.addPhenotype(aux)
     return ori
Exemplo n.º 6
0
 def init(self, cartSpace, cartPrice, limit):
     for _ in range(self.populationSize):
         self.population.append(Phenotype(cartSpace, cartPrice, limit))
     self.bestSolution = self.population[0]
Exemplo n.º 7
0
 def __init__(self, data_dict, nphenom):
     self.__dict__ = dict(
         map(lambda x: (x, Chromosome(data_dict[x])), data_dict))
     self.__phenotype__ = list(
         map(lambda x: Phenotype(data_dict["J"]), list(range(nphenom))))
Exemplo n.º 8
0
def label_permutation_test(model_dir):
    """Permutation test measuring the performance of the model when the labels are shuffled.

    :param model_dir: directory containing the model state dictionaries for each fold and the model
        configuration (including the population graph parameterisation)
    :return: the test set performance for each permutation.
    """

    with open(os.path.join(model_dir, 'config.yaml')) as file:
        cfg = yaml.full_load(file)

        graph_name = cfg['graph_name']['value']
        conv_type = cfg['model']['value']

        n_conv_layers = cfg['n_conv_layers']['value']
        layer_sizes = ast.literal_eval(cfg['layer_sizes']['value'])
        dropout_p = cfg['dropout']['value']

        similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]]
        similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1]

    if graph_name not in GRAPH_NAMES:
        graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set,
                                                   similarity_threshold=similarity_threshold,
                                                   functional=False,
                                                   structural=True,
                                                   euler=True)

    graph = graph_construct.load_population_graph(graph_root, graph_name)

    folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5)
    fold = folds[0]
    brain_gnn_train.set_training_masks(graph, *fold)
    graph_transform.graph_feature_transform(graph)

    rs = []
    r2s = []
    mses = []

    for i in range(1000):
        graph.to('cpu')
        permute_population_graph_labels(graph, i)

        if ConvTypes(conv_type) == ConvTypes.GCN:
            model = BrainGCN(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)
        else:
            model = BrainGAT(graph.num_node_features, n_conv_layers, layer_sizes, dropout_p)

        model.load_state_dict(torch.load(os.path.join(model_dir, 'fold-{}_state_dict.pt'.format(0))))
        model = model.to(device)

        data = graph.to(device)
        model.eval()
        model = model(data)

        predicted = model[data.test_mask].cpu()
        actual = graph.y[data.test_mask].cpu()

        r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy())
        r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten())
        mse = mean_squared_error(actual.detach().numpy(), predicted.detach().numpy())

        rs.append(r[0])
        r2s.append(r2)
        mses.append(mse)
        print(r[0], r2, mse)

    np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'r')), rs)
    np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'r2')), r2s)
    np.save(os.path.join('notebooks', 'permutations_{}_{}'.format(conv_type, 'mse')), mses)

    return [rs, r2s]
Exemplo n.º 9
0
def evaluate_noise_performance(model_dir, noise_type='node'):
    """Measures the test set performance of the model under the specified model directory when noise is added.

    :param model_dir: directory containing the model state dictionaries for each fold and the model
        configuration (including the population graph parameterisation)
    :param noise_type: 'node', 'node_feature_permutation' or 'edge'.
    :return: the dictionary of results under five different random seeds and increasing probabilities of added noise.
    """

    with open(os.path.join(model_dir, 'config.yaml')) as file:
        cfg = yaml.full_load(file)

        graph_name = cfg['graph_name']['value']
        conv_type = cfg['model']['value']

        n_conv_layers = cfg['n_conv_layers']['value']
        layer_sizes = ast.literal_eval(cfg['layer_sizes']['value'])
        dropout_p = cfg['dropout']['value']

        lr = cfg['learning_rate']['value']
        weight_decay = cfg['weight_decay']['value']

        similarity_feature_set = [Phenotype(i) for i in ast.literal_eval(cfg['similarity']['value'])[0]]
        similarity_threshold = ast.literal_eval(cfg['similarity']['value'])[1]

    if graph_name not in GRAPH_NAMES:
        graph_construct.construct_population_graph(similarity_feature_set=similarity_feature_set,
                                                   similarity_threshold=similarity_threshold,
                                                   functional=False,
                                                   structural=True,
                                                   euler=True)

    graph = graph_construct.load_population_graph(graph_root, graph_name)

    folds = brain_gnn_train.get_cv_subject_split(graph, n_folds=5)
    fold = folds[0]
    results = {}

    for i in range(1, 5):
        brain_gnn_train.set_training_masks(graph, *fold)
        results_fold = {}

        for p in [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8, 0.95]:
            graph.to('cpu')
            graph_transform.graph_feature_transform(graph)
            if noise_type == 'node':
                add_population_graph_noise(graph, p, random_state=i)
            if noise_type == 'edge':
                remove_population_graph_edges(graph, p, random_state=i)
            if noise_type == 'node-feature-permutation':
                permute_population_graph_features(graph, p, random_state=i)


            data = graph.to(device)
            epochs = 10000
            model, _ = brain_gnn_train.train(conv_type, graph, device, n_conv_layers, layer_sizes, epochs, lr,
                                             dropout_p, weight_decay, patience=100)
            model.eval()
            model = model(data)

            predicted = model[data.test_mask].cpu()
            actual = data.y[data.test_mask].cpu()
            r2 = r2_score(actual.detach().numpy(), predicted.detach().numpy())
            r = pearsonr(actual.detach().numpy().flatten(), predicted.detach().numpy().flatten())
            results_fold['p={}_metric=r'.format(p)] = [x.item() for x in r][0]
            wandb.run.summary['{}_{}_{}_p={}_metric=r'.format(conv_type, noise_type, i, p)] = [x.item() for x in r][0]
            results_fold['p={}_metric=r2'.format(p)] = r2.item()
            wandb.run.summary['{}_{}_{}_p={}_metric=r2'.format(conv_type, noise_type, i, p)] = r2.item()

            gc.collect()

        results['{}_{}_{}'.format(conv_type, noise_type, i)] = results_fold

    return results
Exemplo n.º 10
0
 def translate_to_phenotype(self):
     return Phenotype(self)
Exemplo n.º 11
0
# Population graph parameters
parser.add_argument('--functional', default=0, type=bool)
parser.add_argument('--structural', default=1, type=bool)
parser.add_argument('--euler', default=1, type=bool)
parser.add_argument('--similarity',
                    default="(['SEX', 'ICD10', 'FTE', 'NEU'], 0.8)",
                    type=str)

args = parser.parse_args()

functional = args.functional
structural = args.structural
euler = args.euler
similarity_feature_set = [
    Phenotype(i) for i in ast.literal_eval(args.similarity)[0]
]
similarity_threshold = ast.literal_eval(args.similarity)[1]

graph_name = graph_construct.get_graph_name(
    functional=functional,
    structural=structural,
    euler=euler,
    similarity_feature_set=similarity_feature_set,
    similarity_threshold=similarity_threshold)

if graph_name not in GRAPH_NAMES:
    graph_construct.construct_population_graph(
        similarity_feature_set=similarity_feature_set,
        similarity_threshold=similarity_threshold,
        functional=functional,
Exemplo n.º 12
0
from json import JSONEncoder

GFF = '/home/ethan/Documents/github/CoRNonCOB/corncob/killers/Lc20.fasta/prokka_results/PROKKA_03222020.gff'

GENOMES = '/home/ethan/Documents/ecoli_genome/putonti_seqs/nice'
RUN_DIR = '/home/ethan/Documents/phenotype_test'
PROKA = '/home/ethan/prokka/bin/./prokka'

from phenotype import Phenotype

p = Phenotype(GENOMES, RUN_DIR, phenotype='n')
p.pull_peptides(prokka_exec=PROKA)
p.get_conserved_sequences()