def process(h5file, ratio): isomerlist = ["scyllo", "chiro", "water"] plot_data = [] mean_contact_list = [] std_contact_list = [] #read in files for each system and aggregate format="pp_nonpolar_vs_t.xvg" for iso in isomerlist: print "processing", iso pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars()) if iso == "water": pattern = re.compile(r"%(iso)s.*%(format)s" % vars()) datalist=[] for table in h5file.listNodes(where='/pp_nonpolar'): table_path = os.path.join('/pp_nonpolar', table.name) if pattern.search(table.name): data = myh5.getTableAsMatrix(h5file, table_path) if data is not None: data = data.astype('float') datalist.append(data[0:config.LASTFRAME, 1]) else: print "no data was read in" print "datalist", datalist data_matrix = numpy.transpose(numpy.vstack(datalist)) print "data_matrix", data_matrix, data_matrix.shape avg, std = utils.summary_statistics(data_matrix, sum_across="columns") avg_contacts = numpy.average(data_matrix[config.STARTFRAME:config.LASTFRAME], axis=0) mean_contact = numpy.average(avg_contacts) std_contact = numpy.std(avg_contacts) print mean_contact print std_contact mean_contact_list.append(mean_contact) std_contact_list.append(std_contact) avg_smoothed = utils.smooth(avg/config.NMOLECULES, 500, time_present=False, timestep=2) std_smoothed = utils.smooth(std/config.NMOLECULES, 500, time_present=True, timestep=2) plot_data.append(avg_smoothed) plot_data.append(std_smoothed) timeseries_matrix = numpy.hstack(plot_data) print "timeseries_matrix", timeseries_matrix, timeseries_matrix.shape print "time", timeseries_matrix[:,0] numpy.savetxt(ratio + "_pp_nonpolar_smoothed.txt.gz", timeseries_matrix, fmt='%0.3f') utils.savetxt(ratio + "_avg_pp_nonpolar_contact.txt", "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.3f') return timeseries_matrix
def process(h5file, ratio, format="p2p_vs_t.dat"): # given a h5file return a list of data to be plotted as line plots # and a corresponding list of labels header = "# time average_inter std_inter average_intra std_intra" datalist = [] labellist = [] isomerlist = ["scyllo", "chiro", "water"] mean_contact_list = [] std_contact_list = [] for iso in isomerlist: print "processing", iso pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars()) if iso == "water": pattern = re.compile(r"%(iso)s.*%(format)s" % vars()) data_inter = [] data_intra = [] for table in h5file.listNodes(where='/polar'): table_path = os.path.join('/polar', table.name) if pattern.search(table.name): print "processing", table.name data = myh5.getTableAsMatrix(h5file, table_path, dtype=numpy.int32) data = data.astype('float') print "converted to float32", data nrows, ncols = data.shape assert nrows > ncols print "Test data read in dimensions", data.shape, data.dtype data_inter.append(data[0:config.LASTFRAME,1]) data_intra.append(data[0:config.LASTFRAME,2]) # compute summary statistics print "summarizing statistics ... " inter_matrix = utils.array_list_to_matrix(data_inter) intra_matrix = utils.array_list_to_matrix(data_intra) average_inter, std_inter = utils.summary_statistics(inter_matrix) average_intra, std_intra = utils.summary_statistics(intra_matrix) # compute the time average number of contacts and its std error avg_contacts = numpy.average(inter_matrix, axis=0) mean_contact = numpy.average(avg_contacts) std_contact = numpy.std(avg_contacts) mean_contact_list.append(mean_contact) std_contact_list.append(std_contact) print mean_contact, std_contact time = data[0:config.LASTFRAME,0] # print "Test: dimensions of average_inter", average_inter.shape plotdata = utils.array_list_to_matrix([ time, average_inter, std_inter, average_intra, std_intra ]) print "plotdata", plotdata print "Test: dimensions of plotdata for", iso, ratio, plotdata.shape plotdata_smoothed = utils.smooth(plotdata, 500, time_present=True, timestep=2) print plotdata_smoothed datalist.append(plotdata_smoothed) print "smoothed data", plotdata_smoothed, plotdata_smoothed.shape ratiolabel = config.RATIO[ratio] if iso == "water": labellist.append("water" % vars()) else: labellist.append("%(iso)s (%(ratiolabel)s)" % vars()) utils.savetxt('%(ratio)s_p2p_vs_t.txt' % vars(), header, plotdata, fmt='%0.2f') utils.savetxt('%(ratio)s_p2p_vs_t_smoothed.txt' % vars(), header, plotdata_smoothed, fmt='%0.2f') utils.savetxt('%(ratio)s_avg_contacts_w_err.txt' % vars(), "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.2f') return (datalist, labellist)