def test_get_cas_numbers(self): """read the CAS numbers from the R package (rownames)""" csv_path = os.path.join(self.tmp_path, 'rm.csv') rdl.get_data_from_r(csv_path) cas_numbers, _, _ = rdl.load_response_matrix(csv_path) self.assertEqual(len(cas_numbers), 249) self.assertIn('89-78-1', cas_numbers) self.assertNotIn('solvent', cas_numbers) os.remove(csv_path)
def test_get_response_matrix(self): """read the response matrix from the DoOR R package""" csv_path = os.path.join(self.tmp_path, 'rm.csv') rdl.get_data_from_r(csv_path) row_names, col_names, rm = rdl.load_response_matrix(csv_path) self.assertEqual(249, rm.shape[0]) self.assertEqual(67, rm.shape[1]) self.assertEqual(249, len(row_names)) self.assertEqual(67, len(col_names)) os.remove(csv_path)
def load_data_targets(config, features): """load the targets for a glomerulus""" door2id = json.load(open(os.path.join(config['data_path'], 'door2id.json'))) csv_path = os.path.join(config['data_path'], 'response_matrix.csv') if 'normed_responses' in config and not config['normed_responses']: csv_path = os.path.join(config['data_path'], 'unnorm_response_matrix.csv') cas_numbers, glomeruli, rm = rdl.load_response_matrix(csv_path, door2id) glom_idx = glomeruli.index(config['glomerulus']) # select molecules available for the glomerulus targets , tmp_cas_numbers = rdl.get_avail_targets_for_glom(rm, cas_numbers, glom_idx) molids = [str(door2id[cas_number][0]) for cas_number in tmp_cas_numbers] assert len(molids) == len(targets) # for some of them the spectra are not available avail = [i for i in range(len(molids)) if molids[i] in features] targets = np.array([targets[i] for i in avail]) data = np.array([features[molids[i]] for i in avail]) molids = [m for i, m in enumerate(molids) if i in avail] assert targets.shape[0] == data.shape[0] assert targets.shape[0] == len(molids) return data, targets, molids
from scipy.stats import scoreatpercentile reload(plib) reload(rdl) desc = 'all' selection = 'linear' method = 'svr' config = { "inpath": "/Users/dedan/projects/master/results/param_search/all_gloms_svrlin_all", "data_path": os.path.join(os.path.dirname(__file__), '..', '..', 'data'), "format": "png", } outpath = os.path.join(config['inpath'], 'plots') door2id = json.load(open(os.path.join(config['data_path'], 'door2id.json'))) path_to_csv = os.path.join(config['data_path'], 'response_matrix.csv') cas_numbers, all_glomeruli, rm = rdl.load_response_matrix(path_to_csv, door2id) # variables for results plt.close('all') search_res, max_overview, sc, _ = rdl.read_paramsearch_results(config['inpath']) glomeruli = search_res[desc][selection].keys() # sort glomeruli according to performance maxes = [np.max(search_res[desc][selection][glom][method]) for glom in glomeruli] picks = [search_res[desc][selection][glom][method][-1, 1] for glom in glomeruli] max_idx = np.argsort(maxes) glomeruli = [glomeruli[i] for i in max_idx] fig = plt.figure(figsize=(3, 20)) for i_glom, glom in enumerate(glomeruli): mat = search_res[desc][selection][glom][method]
from master.libs import utils import numpy as np import pylab as plt from scipy.stats import scoreatpercentile reload(rdl) data_path = '/Users/dedan/projects/master/data' results_path = '/Users/dedan/projects/master/results/summary/' descriptor = 'ATOMCENTRED_FRAGMENTS' format = 'png' N = 50 percentile = 75 percentile_thres = 0.2 door2id = json.load(open(os.path.join(data_path, 'door2id.json'))) cas_numbers, glomeruli, rm = rdl.load_response_matrix(os.path.join(data_path, 'response_matrix.csv')) # which molecules are missing in door2id? print 'molecues missing in door2id: \n%s' % [r for r in cas_numbers if not door2id[r]] # number of measurements available fig = plt.figure(figsize=(20, 5)) ax = fig.add_subplot(111) ax.bar(range(len(cas_numbers)), np.sum(~np.isnan(rm), axis=1)) ax.set_xticks(np.arange(len(cas_numbers)) + 1) bla = [] for i, g in enumerate(cas_numbers): bla.append(g + ' ' * 40 if i % 2 == 0 else '' + g) ax.set_xticklabels(bla, rotation='90', ha='right') ax.set_title('number of glomeruli available for a stimulus') fig.savefig(os.path.join(results_path, 'glomeruli_per_stimulus.' + format))
#!/usr/bin/env python # encoding: utf-8 """ compare normalized and unnormalized response matrices Created by on 2012-01-27. Copyright (c) 2012. All rights reserved. """ import sys import os import numpy as np import pylab as plt from master.libs import read_data_lib as rdl data_path = os.path.join(os.path.dirname(__file__), '..', 'data') _, _, rm = rdl.load_response_matrix(os.path.join(data_path, 'response_matrix.csv')) _, _, urm = rdl.load_response_matrix(os.path.join(data_path, 'unnorm_response_matrix.csv')) plt.imshow(rm) plt.axis('off') plt.savefig('bla1.png') plt.imshow(urm) plt.axis('off') plt.savefig('bla2.png')
format = 'png' # selected via the basic statistics script interesting_glomeruli = ['Or19a', 'Or22a', 'Or35a', 'Or43b', 'Or67a', 'Or67b', 'Or7a', 'Or85b', 'Or98a', 'Or9a'] n_glomeruli = 5 resolution = 0.5 recompute = True n_estimators=100 # read in the IR spectra TODO: move them to data when final version exists spectra = pickle.load(open(ir_file)) door2id = json.load(open(os.path.join(base_path, 'data', 'door2id.json'))) # investigate only the glomeruli for which we have most molecules available csv_path = os.path.join(base_path, 'data', 'response_matrix.csv') cas_numbers, glomeruli, rm = rdl.load_response_matrix(csv_path, door2id) # best_glom = rdl.select_n_best_glomeruli(rm, glomeruli, n_glomeruli) # print best_glom kernel_widths = [2, 3, 5, 10, 20, 30, 50] res = {} # data collection if recompute: for glom in interesting_glomeruli: print glom glom_idx = glomeruli.index(glom) # select molecules available for the glomerulus targets , tmp_cas_numbers = rdl.get_avail_targets_for_glom(rm, cas_numbers, glom_idx)
Created by on 2012-01-27. Copyright (c) 2012. All rights reserved. """ import os import numpy as np import pylab as plt from matplotlib.patches import Rectangle, Circle from matplotlib.ticker import NullLocator from master.libs import read_data_lib as rdl subtract_sfr = False outpath = '/Users/dedan/projects/master/results/summary/' rm_path = 'data/response_matrix.csv' cases, gloms, rm = rdl.load_response_matrix(rm_path, door2id=None) # read standard firing rates if subtract_sfr: sfrs = open(rm_path).readlines()[1].split(',')[1:] sfrs = np.array([float(s) if not s == 'NA' else 0 for s in sfrs]) rm = np.subtract(rm, sfrs) # only look at a slice of the matrix rm = rm[70:110] rm[np.isnan(rm)] = 0 fig = plt.figure(figsize=(rm.shape[1]/7, rm.shape[0]/7)) ax = fig.add_subplot(111) ax.set_aspect('equal') ax.xaxis.set_major_locator(NullLocator())