def table4simav(dirpath, title): """Return None. Provides a summary table for one simulation. """ table = [title] table_data = dict() #map eps -> y2pp (a dict) years = range(249) for eps in range(11): sheetname = 'eps{0:02d}simav'.format(eps) fname = os.path.join(dirpath , sheetname + '.csv') fname = os.path.normpath(fname) y2pp = dict() #map year -> data for this eps table_data[eps] = y2pp with open(fname, 'r') as fin: next(fin) #discard header line for rownum, line in enumerate(fin): year = rownum if year in years: line = line.split(',') male_partner_dist = np.array([map(float, line[8:20])]) female_partner_dist = np.array([map(float, line[20:32])]) all_tab = DegreeTab(male_partner_dist + female_partner_dist) #get point prevalence (ok to compute from averaged data) concurrency_point_prevalence = all_tab.point_prevalence() total_inf = float(line[0]) + float(line[1]) pp = total_inf / 200 #200=20000/100 -> point prevalance as pct y2pp[year] = pp, round(100*concurrency_point_prevalence[0]) header = 'year ' for eps in range(11): header += '{0:10}'.format('eps{0:02d}'.format(eps) ) table.append(header) for year in years: data = '{0:4d}'.format(year) for eps in range(11): data += '{0:6.2f}({1:2.0f})'.format(*table_data[eps][year]) table.append(data) return '\n'.join(table)
def simav2csv(dirpath, fpattern, outname, force=False): """Compute average of the replicates (where the replicates are in a given `dirpath` and share `fpattern`). The results files are assumed to have a single header line. Past computation of simav used if possible, unless `force` is True. """ assert outname.endswith('csv') #full path to results files patternpath = os.path.join(dirpath, fpattern) outpath = os.path.join(dirpath, outname) #get all completed replicates of this simulation fnames = glob.glob(patternpath) n_sims = len(fnames) if n_sims == 0: print 'WARN: no data for ', patternpath return None elif n_sims < 100: print 'WARN: incomplete data for ', patternpath print 'Only {0:d} simulations run. (Computing average anyway!)'.format(n_sims) #check if simav needs updating ftimes = [os.path.getmtime(fname) for fname in fnames] use_existing = False try: csvtime = os.path.getmtime(outpath) if any((csvtime < ftime) for ftime in ftimes): print 'Updating ' + outpath else: use_existing = not force except os.error: print 'Creating ' + outpath #compute average across simulations if use_existing: print 'reuse existing simav' simav = np.loadtxt(outpath, delimiter=',', skiprows=1) point_prevalence = DegreeTab(simav[:,8:20] + simav[:,20:32]).point_prevalence() kappa3 = None #must compute within loop (see below) with open(outpath, 'r') as temp: csv_header = next(temp).strip() headers = tuple(hdr for hdr in csv_header.split(',')) else: # compute the average of the replicates asum = 0 kappa3 = 0 #must be computed in the loop! point_prevalence = 0 #compute in the loop *if* population changes #get the header from the first file (assumed to be identical across results files) with open(fnames[0], 'r') as temp: csv_header = next(temp).strip() headers = tuple(hdr for hdr in csv_header.split(',')) for fname in fnames: #get the individual replicates one at a time anew = np.loadtxt(fname, delimiter=',', skiprows=1) asum += anew male_partner_dist = anew[:,8:20] female_partner_dist = anew[:,20:32] all_tab = DegreeTab(male_partner_dist + female_partner_dist) assert (all_tab._data==(anew[:,8:20]+anew[:,20:32])).all() #get kappa3 for every period: array shape (t,) kappa3 += all_tab.kappa3 #get point prevalence for every period: array shape (t,) point_prevalence += all_tab.point_prevalence() assert asum.shape in [(250, 2+6+4*12), (250, 2+6+4*12+2)] pptest = DegreeTab(asum[:,8:20]+asum[:,20:32]).point_prevalence() simav = asum / float(n_sims) #deflate sum to average """ Write `a` to dirpath, where `a` is simulation average. (Average of 100 replications of simulation for this `eps`.) """ #this data set is not huge, so make and write a string representing all of it output = [csv_header] output.extend((','.join(str(d) for d in row) for row in simav)) print 'writing ' + outpath with open(outpath, 'w') as fout: fout.write('\n'.join(output)) #create other data used for graphs kappa3 /= float(n_sims) #deflate sum to average point_prevalence /= float(n_sims) #deflate sum to average assert np.allclose(point_prevalence, pptest) return dict(headers=headers, simav=simav, point_prevalence=point_prevalence, kappa3=kappa3)