def main(hdf, name): # Create a csv file to tabulate into f = open('{0}.csv'.format(name), 'w') csvw = csv.writer(f) # Make a header for the table header = ["x", "count", "cond"] csvw.writerow(header) conds = set(["acc", "value", "rpe", "p", "rand", "box"]) # Find a dmcol from models, # pick the first model with # that dm cond. Conds across # models are the same. locations = {} models = get_model_names(hdf) for model in models: # Get model meta data and compare it too conds meta = get_model_meta(hdf, model) for ii, dmname in enumerate(meta["dm"]): if dmname in conds: locations[dmname] = (model, ii) conds.remove(dmname) ## conds loses elements! # If conds is empty, stop if not conds: print("All conds found. {0}".format(model)) break # Get each dm's data and pick a col using pos; # pos matches cond. dmdata = {} for cond, loci in locations.items(): print("Getting {0}".format(cond)) model, pos = loci dm = np.array(read_hdf(hdf, '/' + model + '/dm')) dmdata[cond] = dm[:,:,pos].flatten() # Create a histogram then write it out. for cond, data in dmdata.items(): print("Histogramming {0}".format(cond)) # Instantiate a RHist instance and # use it to make a histogram hist = RHist(name=cond, decimals=2) [hist.add(x) for x in data] # Tell that textfile a tale. [csvw.writerow([k, v, cond]) for k, v in hist.h.items()] f.close()
def create_hist_list(hdf, model, stat): """ Create a list of Rhist (histogram) objects for <model> and <stat> in the given <hdf>. If <stat> has only one entry (as is the case for 'aic') the list will have only one entry. If however <stat> has n entries per model (like't') the list will have n-1 entries. As n matches the number of columns in the design matrix, the rightmost will always correspond to the dummy predictor and is therefore discarded. """ hist_list = [] ## A list of RHist objects. meta = get_model_meta(hdf, model) ## metadata for naming # A handle on the hdf data hdfdata = read_hdf(hdf, '/' + model + '/' + stat) # Loop over the nodes, adding the data # for each to a RHist. for node in hdfdata: # Some data will be list-like # so try to iterate, if that fails # assume the data is a single number try: for ii in range(len(node) - 1): # Init entries in hist_list as needed try: hist_list[ii].add(node[ii]) except IndexError: hist_list.append(RHist(name=meta['dm'][ii], decimals=2)) hist_list[ii].add(node[ii]) except TypeError: # Assume a number so hist_list has only one # entry (i.e. 0). # # Init entries in hist_list as needed try: hist_list[0].add(node) except IndexError: hist_list.append(RHist(name=stat, decimals=2)) hist_list[0].add(node) return hist_list
def hist_t(hdf, model, name=None): """ Plot histograms of the t values in <hdf> for each condition in <model>. If <name> is not None the plot is saved as <name>.pdf. """ meta = get_model_meta(hdf, model) hist_list = [] for dm_col in meta['dm']: # Make an instance RHist for the list. hist = RHist(name=dm_col, decimals=1) hist_list.append(hist) # read_hdf_inc returns a generator so.... tdata = read_hdf_inc(hdf, '/' + model + '/t') for ts in tdata: # get the tvals for each instance of model # and add them to the hist_list, [hist_list[ii].add(ts[ii]) for ii in range(len(ts) - 1)] ## The last t in ts is the constant, which we ## do not want to plot. # Create a fig, loop over the hist_list # plotting each on fig.axes = 0. fig = plt.figure() fig.add_subplot(111) colors = itertools.cycle( ['DarkGray', 'DarkBlue', 'DarkGreen', 'MediumSeaGreen']) ## Using html colors... [h.plot(fig=fig, color=colors.next(), norm=True) for h in hist_list] # Prettify the plot ax = fig.axes[0] ax.set_xlabel('t-values') ax.set_ylabel('P(t)') # Add vetical lines representing significance tresholds ax.axvline(x=1.7822, label='p < 0.05', color='red', linewidth=4) ax.axvline(x=2.6810, label='p < 0.01', color='red', linewidth=3) ax.axvline(x=3.0545, label='p < 0.005', color='red', linewidth=2) ax.axvline(x=4.3178, label='p < 0.0005', color='red', linewidth=1) ## tval lines assume N=12 subjects plt.xlim(-10, 15) plt.legend() plt.title('{0} -- BOLD: {1}'.format(model, meta['bold'])) if name != None: plt.savefig(name, format="pdf")
def test_RHist(): import numpy as np import scipy.stats as stats from bigstats.hist import RHist xs = stats.norm.rvs(size=10000) rh = RHist(name='test',decimals=1) [rh.incr(x) for x in xs] print('The normal distribution was sampled 10,000 times....') print('Est. Mean (0): {0}'.format(rh.mean())) print('Est. Var (1): {0}'.format(rh.var())) print('Est. Stdev (0.01): {0}'.format(rh.stdev())) print('N (10000): {0}'.format(rh.n())) print('** Decimals should be precise to 1 place.**') fig = rh.plot(fig=None,norm=True) # TODO: Add a uniform and some asymmetric dist too.