Пример #1
0
# load in the csv
data =      pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv')

# keep just males aged 60-74 for now
data =      data[(data.sex == 1) & (data.age_group == '60to74')]

# remove any instances of population zero, which might blow things up due to having offsets of negative infinity
data =      data[data.pop > 0.]



### setup temporal indexing
# set year to start at 0
data =          pl.rec_append_fields(
                    rec =   data, 
                    names = 'year0', 
                    arrs =  np.array(data.year - np.min(data.year))
                )

# make a list of years in the data
years =         np.arange(np.min(data.year0), np.max(data.year0)+1, 1)



### make lists/indices by state
# list of states
state_names =   np.unique(data.statefips)
state_names.sort()

# make states numeric/sequential in data
data =          pl.rec_append_fields(
Пример #2
0
full_dir = '%s/v02_prep_%s' % (indir, iso3)

# get cause list 
causes = list(set([file.split('+')[1] for file in os.listdir(full_dir) if re.search(age, file)]))
causes.remove('HIV') # temporary until Miriam fixes the HIV files 

# gather data and fit model 
cf = data.get_cod_data(full_dir, causes, age, iso3, sex)
m, pi = models.fit_latent_simplex(cf) 

# calculate summary measures
N, T, J = pi.shape
mean = pi.mean(0)
lower = pl.array([[st.mquantiles(pi[:,t,j], 0.025)[0] for j in range(J)] for t in range(T)])
upper = pl.array([[st.mquantiles(pi[:,t,j], 0.975)[0] for j in range(J)] for t in range(T)])

# format summary and save
output = pl.np.core.records.fromarrays(mean.T, names=['%s_mean' % c for c in causes])
output = pl.rec_append_fields(output, ['%s_lower' % c for c in causes], lower.T)
output = pl.rec_append_fields(output, ['%s_upper' % c for c in causes], upper.T)
pl.rec2csv(output, '%s/%s+%s+%s+summary.csv' % (outdir, iso3, age, sex))

# format all sims and save 
pi.shape = (N*T, J)
years = pl.array([t for s in range(N) for t in range(1980, 2012)])
sim = pl.array([s for s in range(N) for t in range(1980, 2012)])
output = pl.np.core.records.fromarrays(pi.T, names=causes)
output = pl.rec_append_fields(output, 'year', years)
output = pl.rec_append_fields(output, 'sim', sim)
pl.rec2csv(output, '%s/%s+%s+%s.csv' % (outdir, iso3, age, sex))
Пример #3
0
data =      pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv')
print 'Data loaded'

# keep just the specified age and sex
data =      data[(data.sex == sex) & (data.age_group == age)]

# remove any instances of population zero, which might blow things up due to having offsets of negative infinity
data =      data[data.pop > 0.]



### setup temporal indexing
# set year to start at 0
data =          pl.rec_append_fields(
                    rec =   data, 
                    names = 'year0', 
                    arrs =  np.array(data.year - np.min(data.year))
                )

# make a list of years in the data
years =         np.arange(np.min(data.year0), np.max(data.year0)+1, 1)

# find indices of years
year_indices =  np.array([data.year0 == y for y in years])

# make a list of which years to sample the random walks at
knot_spacing =  5
syears =        np.arange(np.min(data.year0), np.max(data.year0)+knot_spacing, knot_spacing)

# make a diagonal matrix for computing the cumulative sum of sample years
syear_cumsum =  np.zeros((len(syears), len(syears)))
Пример #4
0
### setup the data
# load in the csv
data = pl.csv2rec(proj_dir + "data/model inputs/downsampled.csv")
print "Data loaded"

# keep just the specified age and sex
data = data[(data.sex == sex) & (data.age_group == age)]

# remove any instances of population zero, which might blow things up due to having offsets of negative infinity
data = data[data.pop > 0.0]


### setup temporal indexing
# set year to start at 0
data = pl.rec_append_fields(rec=data, names="year0", arrs=np.array(data.year - np.min(data.year)))

# set years to go from 0 to (num years - 1)
for i, y in enumerate(np.sort(np.unique(data.year0))):
    data.year0[data.year0 == y] = i

# make a list of years in the data
years = np.unique(data.year0)

# find indices of years
year_indices = np.array([data.year0 == y for y in years])

# make a diagonal matrix for computing the cumulative sum of years
year_cumsum = np.zeros((len(years), len(years)))
for i in range(len(years)):
    for j in range(len(years)):
Пример #5
0
def process_summary(summary_filename):
    summary = physio.summary.Summary(summary_filename)
    logging.debug("Processing %s" % summary._filename)

    fn = os.path.basename(summary_filename)
    animal = fn.split('_')[0]
    date = fn.split('_')[1]
    # convert to datetime
    dt = datetime.datetime(int('20' + date[:2]), int(date[2:4]), int(date[4:]))

    # cull trials by success
    trials = summary.get_trials()
    if len(trials) == 0:
        logging.error("No trails for %s" % summary._filename)
        return
    trials = trials[trials['outcome'] == 0]

    # and gaze
    try:
        gaze = clean_gaze(summary.get_gaze())
    except Exception as E:
        print "Fetching gaze failed: %s" % E
        gaze = []

    if len(gaze) > 0:
        logging.debug("N Trials before gaze culling: %i" % len(trials))
        trials = cull_trials_by_gaze(trials, gaze)
        logging.debug("N Trials after gaze culling: %i" % len(trials))

    for ch in xrange(1, 33):
        try:
            cis = summary.get_cluster_indices(ch)
        except Exception as E:
            print "Getting cluster_indices failed: %s" % E
            continue
        for cl in cis:
            ctrials = trials.copy()
            cell = {}
            cell['ch'] = ch
            cell['cl'] = cl
            cell['animal'] = animal
            cell['date'] = date
            cell['datetime'] = dt

            logging.debug("ch: %i, cl: %i" % (ch, cl))
            # rate
            spike_times = summary.get_spike_times(ch, cl)

            # find start of isolation
            isolation_start = physio.spikes.times.\
                    find_isolation_start_by_isi(spike_times)
            spike_times = spike_times[spike_times >= isolation_start]

            nspikes = len(spike_times)
            cell['nspikes'] = nspikes
            if nspikes < min_spikes:
                logging.warning("\t%i < min_spikes[%i]" % \
                        (nspikes, min_spikes))
                #write_cell(cell)
                continue

            trange = (spike_times.min(), spike_times.max())
            # trange = summary.get_epoch_range()
            rate = nspikes / (trange[1] - trange[0])
            cell['rate'] = rate
            if rate < min_rate:
                logging.warning("\t%i < min_rate[%i]" % \
                        (rate, min_rate))
                write_cell(cell)
                continue
            cell['trange'] = trange

            # snr TODO
            try:
                snrs = summary.get_spike_snrs(ch, cl, timeRange=trange)
                cell['snr_mean'] = numpy.mean(snrs)
                cell['snr_std'] = numpy.std(snrs)
            except Exception as E:
                print "Snr measure failed: %s" % str(E)

            # location
            try:
                location = summary.get_location(ch)
            except Exception as E:
                location = (0, 0, 0)
                print "Attempt to get location failed: %s" % str(E)
            cell['location'] = list(location)

            # ---------- responsivity ---------------
            baseline, response, stat = get_responsivity(\
                    spike_times, ctrials, bwin, rwin)
            cell['baseline_mean'] = numpy.mean(baseline)
            cell['baseline_std'] = numpy.std(baseline)

            cell['driven_mean'] = numpy.mean(response)
            cell['driven_std'] = numpy.std(response)

            cell['ntrials'] = len(ctrials)
            cell['responsivity'] = stat

            ctrials = pylab.rec_append_fields(ctrials, \
                    ['baseline', 'response'], [baseline, response])

            # find all distractor trials
            dtrials = summary.filter_trials(ctrials, \
                    {'name': {'value': 'BlueSquare', 'op': '!='}}, \
                    timeRange=trange)
            if len(dtrials) == 0:
                logging.error("Zero trials for %i %i %s" % \
                        (ch, cl, summary._filename))
                continue
            dstims = summary.get_stimuli({'name': \
                    {'value': 'BlueSquare', 'op': '!='}})

            # --------- selectivity --------------
            cell['selectivity'] = {}
            cell['separability'] = {}
            for attr in attrs:
                sorted_keys, means, stds, ns, stats = \
                        get_selectivity(summary, dtrials, dstims, attr)
                max_key = sorted_keys[0]
                cell['selectivity'][attr] = { \
                        'means': means, 'stds': stds, 'ns': ns,
                        'stats': stats, 'sorted': sorted_keys}
                cell['separability'][attr] = {}

                atrials = summary.filter_trials(dtrials, {attr: max_key})
                for attr2 in attrs:
                    if attr == attr2:
                        continue

                    # this is only for the MAX
                    sorted_keys, means, stds, ns, stats = \
                            get_selectivity(summary, atrials, dstims, attr2)
                    max_key = sorted_keys[0]
                    cell['selectivity'][attr][attr2] = { \
                            'means': means, 'stds': stds, 'ns': ns,
                            'stats': stats, 'sorted': sorted_keys}

                    # ----------- separability --------------
                    # this is for all
                    M, S, N, L, stats = get_separability(summary, dtrials, \
                            dstims, attr, attr2)
                    cell['separability'][attr][attr2] = \
                            {'M': M, 'S': S, 'N': N, 'stats': stats}

            # --------- tolerance ------------

            write_cell(cell)
            continue
proj_dir = "D:/Projects/" + project + "/" if (os.environ["OS"] == "Windows_NT") else "/shared/projects/" + project + "/"


### setup the data
# load in the csv
data = pl.csv2rec(proj_dir + "data/model inputs/state_random_effects_input.csv")

# keep just males aged 60-74 for now
data = data[(data.sex == 1) & (data.age_group == "60to74")]

# remove any instances of population zero, which might blow things up due to having offsets of negative infinity
data = data[data.pop > 0]

# center and standardize year
data = pl.rec_append_fields(
    rec=data, names="year_std", arrs=np.array((data.year - np.mean(data.year)) / np.std(data.year))
)


### find indices to speed up later insertion of random effects
# list of states
state_list = dict([(s, i) for i, s in enumerate(np.unique(data.statefips))])

# indices of observations for each state
state_indices = np.array([data.statefips == s for s in state_list])

# list of causes
cause_list = dict([(c, i) for i, c in enumerate(np.unique(data.underlying))])

# indices of observations for each cause
cause_indices = np.array([data.underlying == c for c in cause_list])
Пример #7
0
        wshape = [1]*sorted.ndim
        wshape[axis] = 2
        weights.shape = wshape
        sumval = weights.sum()
    return np.add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval

import time
print 'Finished at %s' % time.ctime()

# save basic predictions
predictions =       model.trace('predicted')[:]
mean_prediction =   predictions.mean(axis=0)
lower_prediction =  percentile(predictions, 2.5, axis=0)
upper_prediction =  percentile(predictions, 97.5, axis=0)
output =            pl.rec_append_fields(  rec =   data, 
                        names = ['mean', 'lower', 'upper'], 
                        arrs =  [mean_prediction, lower_prediction, upper_prediction])
pl.rec2csv(output, proj_dir + 'outputs/model results/epi transition by state/all_cause_males.csv')

# plot surfaces
from    mpl_toolkits.mplot3d    import axes3d
import  matplotlib.pyplot       as plt
from    matplotlib.backends.backend_pdf import PdfPages
pp =    PdfPages(proj_dir + 'outputs/model results/epi transition by state/surfaces.pdf')
fig =   plt.figure()
ax =    fig.gca(projection='3d')
X,Y =   np.meshgrid(years, ages)
Z =     model.trace('alpha_surf')[:].mean(axis=0)
ax.plot_wireframe(X, Y, Z, color='#315B7E')
ax.set_title('National')
pp.savefig()

### setup the data
# load in the csv
data =      pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv')

# keep just males aged 60-74 for now
data =      data[(data.sex == 1) & (data.age_group == '60to74')]

# remove any instances of population zero, which might blow things up due to having offsets of negative infinity
data =      data[data.pop > 0.]

# center and standardize year
data = pl.rec_append_fields(
            rec =   data, 
            names = 'year_std', 
            arrs =  np.array((data.year - np.mean(data.year)) / np.std(data.year))
       )



### find indices to speed up later insertion of random effects
# list of states
state_list =    dict([(s, i) for i, s in enumerate(np.unique(data.statefips))])

# indices of observations for each state
state_indices = np.array([data.statefips == s for s in state_list])

# list of causes
cause_list =    dict([(c, i) for i, c in enumerate(np.unique(data.underlying))])