# load in the csv data = pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv') # keep just males aged 60-74 for now data = data[(data.sex == 1) & (data.age_group == '60to74')] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0.] ### setup temporal indexing # set year to start at 0 data = pl.rec_append_fields( rec = data, names = 'year0', arrs = np.array(data.year - np.min(data.year)) ) # make a list of years in the data years = np.arange(np.min(data.year0), np.max(data.year0)+1, 1) ### make lists/indices by state # list of states state_names = np.unique(data.statefips) state_names.sort() # make states numeric/sequential in data data = pl.rec_append_fields(
full_dir = '%s/v02_prep_%s' % (indir, iso3) # get cause list causes = list(set([file.split('+')[1] for file in os.listdir(full_dir) if re.search(age, file)])) causes.remove('HIV') # temporary until Miriam fixes the HIV files # gather data and fit model cf = data.get_cod_data(full_dir, causes, age, iso3, sex) m, pi = models.fit_latent_simplex(cf) # calculate summary measures N, T, J = pi.shape mean = pi.mean(0) lower = pl.array([[st.mquantiles(pi[:,t,j], 0.025)[0] for j in range(J)] for t in range(T)]) upper = pl.array([[st.mquantiles(pi[:,t,j], 0.975)[0] for j in range(J)] for t in range(T)]) # format summary and save output = pl.np.core.records.fromarrays(mean.T, names=['%s_mean' % c for c in causes]) output = pl.rec_append_fields(output, ['%s_lower' % c for c in causes], lower.T) output = pl.rec_append_fields(output, ['%s_upper' % c for c in causes], upper.T) pl.rec2csv(output, '%s/%s+%s+%s+summary.csv' % (outdir, iso3, age, sex)) # format all sims and save pi.shape = (N*T, J) years = pl.array([t for s in range(N) for t in range(1980, 2012)]) sim = pl.array([s for s in range(N) for t in range(1980, 2012)]) output = pl.np.core.records.fromarrays(pi.T, names=causes) output = pl.rec_append_fields(output, 'year', years) output = pl.rec_append_fields(output, 'sim', sim) pl.rec2csv(output, '%s/%s+%s+%s.csv' % (outdir, iso3, age, sex))
data = pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv') print 'Data loaded' # keep just the specified age and sex data = data[(data.sex == sex) & (data.age_group == age)] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0.] ### setup temporal indexing # set year to start at 0 data = pl.rec_append_fields( rec = data, names = 'year0', arrs = np.array(data.year - np.min(data.year)) ) # make a list of years in the data years = np.arange(np.min(data.year0), np.max(data.year0)+1, 1) # find indices of years year_indices = np.array([data.year0 == y for y in years]) # make a list of which years to sample the random walks at knot_spacing = 5 syears = np.arange(np.min(data.year0), np.max(data.year0)+knot_spacing, knot_spacing) # make a diagonal matrix for computing the cumulative sum of sample years syear_cumsum = np.zeros((len(syears), len(syears)))
### setup the data # load in the csv data = pl.csv2rec(proj_dir + "data/model inputs/downsampled.csv") print "Data loaded" # keep just the specified age and sex data = data[(data.sex == sex) & (data.age_group == age)] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0.0] ### setup temporal indexing # set year to start at 0 data = pl.rec_append_fields(rec=data, names="year0", arrs=np.array(data.year - np.min(data.year))) # set years to go from 0 to (num years - 1) for i, y in enumerate(np.sort(np.unique(data.year0))): data.year0[data.year0 == y] = i # make a list of years in the data years = np.unique(data.year0) # find indices of years year_indices = np.array([data.year0 == y for y in years]) # make a diagonal matrix for computing the cumulative sum of years year_cumsum = np.zeros((len(years), len(years))) for i in range(len(years)): for j in range(len(years)):
def process_summary(summary_filename): summary = physio.summary.Summary(summary_filename) logging.debug("Processing %s" % summary._filename) fn = os.path.basename(summary_filename) animal = fn.split('_')[0] date = fn.split('_')[1] # convert to datetime dt = datetime.datetime(int('20' + date[:2]), int(date[2:4]), int(date[4:])) # cull trials by success trials = summary.get_trials() if len(trials) == 0: logging.error("No trails for %s" % summary._filename) return trials = trials[trials['outcome'] == 0] # and gaze try: gaze = clean_gaze(summary.get_gaze()) except Exception as E: print "Fetching gaze failed: %s" % E gaze = [] if len(gaze) > 0: logging.debug("N Trials before gaze culling: %i" % len(trials)) trials = cull_trials_by_gaze(trials, gaze) logging.debug("N Trials after gaze culling: %i" % len(trials)) for ch in xrange(1, 33): try: cis = summary.get_cluster_indices(ch) except Exception as E: print "Getting cluster_indices failed: %s" % E continue for cl in cis: ctrials = trials.copy() cell = {} cell['ch'] = ch cell['cl'] = cl cell['animal'] = animal cell['date'] = date cell['datetime'] = dt logging.debug("ch: %i, cl: %i" % (ch, cl)) # rate spike_times = summary.get_spike_times(ch, cl) # find start of isolation isolation_start = physio.spikes.times.\ find_isolation_start_by_isi(spike_times) spike_times = spike_times[spike_times >= isolation_start] nspikes = len(spike_times) cell['nspikes'] = nspikes if nspikes < min_spikes: logging.warning("\t%i < min_spikes[%i]" % \ (nspikes, min_spikes)) #write_cell(cell) continue trange = (spike_times.min(), spike_times.max()) # trange = summary.get_epoch_range() rate = nspikes / (trange[1] - trange[0]) cell['rate'] = rate if rate < min_rate: logging.warning("\t%i < min_rate[%i]" % \ (rate, min_rate)) write_cell(cell) continue cell['trange'] = trange # snr TODO try: snrs = summary.get_spike_snrs(ch, cl, timeRange=trange) cell['snr_mean'] = numpy.mean(snrs) cell['snr_std'] = numpy.std(snrs) except Exception as E: print "Snr measure failed: %s" % str(E) # location try: location = summary.get_location(ch) except Exception as E: location = (0, 0, 0) print "Attempt to get location failed: %s" % str(E) cell['location'] = list(location) # ---------- responsivity --------------- baseline, response, stat = get_responsivity(\ spike_times, ctrials, bwin, rwin) cell['baseline_mean'] = numpy.mean(baseline) cell['baseline_std'] = numpy.std(baseline) cell['driven_mean'] = numpy.mean(response) cell['driven_std'] = numpy.std(response) cell['ntrials'] = len(ctrials) cell['responsivity'] = stat ctrials = pylab.rec_append_fields(ctrials, \ ['baseline', 'response'], [baseline, response]) # find all distractor trials dtrials = summary.filter_trials(ctrials, \ {'name': {'value': 'BlueSquare', 'op': '!='}}, \ timeRange=trange) if len(dtrials) == 0: logging.error("Zero trials for %i %i %s" % \ (ch, cl, summary._filename)) continue dstims = summary.get_stimuli({'name': \ {'value': 'BlueSquare', 'op': '!='}}) # --------- selectivity -------------- cell['selectivity'] = {} cell['separability'] = {} for attr in attrs: sorted_keys, means, stds, ns, stats = \ get_selectivity(summary, dtrials, dstims, attr) max_key = sorted_keys[0] cell['selectivity'][attr] = { \ 'means': means, 'stds': stds, 'ns': ns, 'stats': stats, 'sorted': sorted_keys} cell['separability'][attr] = {} atrials = summary.filter_trials(dtrials, {attr: max_key}) for attr2 in attrs: if attr == attr2: continue # this is only for the MAX sorted_keys, means, stds, ns, stats = \ get_selectivity(summary, atrials, dstims, attr2) max_key = sorted_keys[0] cell['selectivity'][attr][attr2] = { \ 'means': means, 'stds': stds, 'ns': ns, 'stats': stats, 'sorted': sorted_keys} # ----------- separability -------------- # this is for all M, S, N, L, stats = get_separability(summary, dtrials, \ dstims, attr, attr2) cell['separability'][attr][attr2] = \ {'M': M, 'S': S, 'N': N, 'stats': stats} # --------- tolerance ------------ write_cell(cell) continue
proj_dir = "D:/Projects/" + project + "/" if (os.environ["OS"] == "Windows_NT") else "/shared/projects/" + project + "/" ### setup the data # load in the csv data = pl.csv2rec(proj_dir + "data/model inputs/state_random_effects_input.csv") # keep just males aged 60-74 for now data = data[(data.sex == 1) & (data.age_group == "60to74")] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0] # center and standardize year data = pl.rec_append_fields( rec=data, names="year_std", arrs=np.array((data.year - np.mean(data.year)) / np.std(data.year)) ) ### find indices to speed up later insertion of random effects # list of states state_list = dict([(s, i) for i, s in enumerate(np.unique(data.statefips))]) # indices of observations for each state state_indices = np.array([data.statefips == s for s in state_list]) # list of causes cause_list = dict([(c, i) for i, c in enumerate(np.unique(data.underlying))]) # indices of observations for each cause cause_indices = np.array([data.underlying == c for c in cause_list])
wshape = [1]*sorted.ndim wshape[axis] = 2 weights.shape = wshape sumval = weights.sum() return np.add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval import time print 'Finished at %s' % time.ctime() # save basic predictions predictions = model.trace('predicted')[:] mean_prediction = predictions.mean(axis=0) lower_prediction = percentile(predictions, 2.5, axis=0) upper_prediction = percentile(predictions, 97.5, axis=0) output = pl.rec_append_fields( rec = data, names = ['mean', 'lower', 'upper'], arrs = [mean_prediction, lower_prediction, upper_prediction]) pl.rec2csv(output, proj_dir + 'outputs/model results/epi transition by state/all_cause_males.csv') # plot surfaces from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages pp = PdfPages(proj_dir + 'outputs/model results/epi transition by state/surfaces.pdf') fig = plt.figure() ax = fig.gca(projection='3d') X,Y = np.meshgrid(years, ages) Z = model.trace('alpha_surf')[:].mean(axis=0) ax.plot_wireframe(X, Y, Z, color='#315B7E') ax.set_title('National') pp.savefig()
### setup the data # load in the csv data = pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv') # keep just males aged 60-74 for now data = data[(data.sex == 1) & (data.age_group == '60to74')] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0.] # center and standardize year data = pl.rec_append_fields( rec = data, names = 'year_std', arrs = np.array((data.year - np.mean(data.year)) / np.std(data.year)) ) ### find indices to speed up later insertion of random effects # list of states state_list = dict([(s, i) for i, s in enumerate(np.unique(data.statefips))]) # indices of observations for each state state_indices = np.array([data.statefips == s for s in state_list]) # list of causes cause_list = dict([(c, i) for i, c in enumerate(np.unique(data.underlying))])