Пример #1
0
def process(h5file, ratio):
	isomerlist = ["scyllo", "chiro", "water"]
	plot_data = []
	mean_contact_list = []
	std_contact_list = []
	#read in files for each system and aggregate
	format="pp_nonpolar_vs_t.xvg"
	for iso in isomerlist:
		print "processing", iso
		pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars())
		if iso == "water":
			pattern = re.compile(r"%(iso)s.*%(format)s" % vars())

		datalist=[]
		for table in h5file.listNodes(where='/pp_nonpolar'):
			table_path = os.path.join('/pp_nonpolar', table.name)
			if pattern.search(table.name):			
				data = myh5.getTableAsMatrix(h5file, table_path)
				if data is not None:
					data = data.astype('float')
					datalist.append(data[0:config.LASTFRAME, 1])
				else:
					print "no data was read in"
			
		print "datalist", datalist
		data_matrix = numpy.transpose(numpy.vstack(datalist))	
		print "data_matrix", data_matrix, data_matrix.shape

		avg, std = utils.summary_statistics(data_matrix, sum_across="columns")
		
		avg_contacts = numpy.average(data_matrix[config.STARTFRAME:config.LASTFRAME], axis=0)
		mean_contact = numpy.average(avg_contacts)
		std_contact = numpy.std(avg_contacts)
		print mean_contact
		print std_contact
		mean_contact_list.append(mean_contact)
		std_contact_list.append(std_contact)
		
		avg_smoothed = utils.smooth(avg/config.NMOLECULES, 500, time_present=False, timestep=2)
		std_smoothed = utils.smooth(std/config.NMOLECULES, 500, time_present=True, timestep=2)
		plot_data.append(avg_smoothed)
		plot_data.append(std_smoothed)
	
	timeseries_matrix = numpy.hstack(plot_data)
	print "timeseries_matrix", timeseries_matrix, timeseries_matrix.shape
	print "time", timeseries_matrix[:,0]
	numpy.savetxt(ratio + "_pp_nonpolar_smoothed.txt.gz", timeseries_matrix, fmt='%0.3f')
	utils.savetxt(ratio + "_avg_pp_nonpolar_contact.txt", "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.3f')

	return timeseries_matrix
Пример #2
0
def process(h5file, ratio, format="p2p_vs_t.dat"):
	# given a h5file return a list of data to be plotted as line plots 
	# and a corresponding list of labels

	header = "# time average_inter std_inter average_intra std_intra"
	datalist = []
	labellist = []
	isomerlist = ["scyllo", "chiro", "water"]
	mean_contact_list = []
	std_contact_list = []
	
	for iso in isomerlist:
		print "processing", iso
		pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars())
		if iso == "water":
			pattern = re.compile(r"%(iso)s.*%(format)s" % vars())

		data_inter = []
		data_intra = []
		for table in h5file.listNodes(where='/polar'):
			table_path = os.path.join('/polar', table.name)
			if pattern.search(table.name):
				print "processing", table.name
				data = myh5.getTableAsMatrix(h5file, table_path, dtype=numpy.int32)
				data = data.astype('float')
				print "converted to float32", data

				nrows, ncols = data.shape
				assert nrows > ncols
				print "Test data read in dimensions", data.shape, data.dtype
				data_inter.append(data[0:config.LASTFRAME,1])
				data_intra.append(data[0:config.LASTFRAME,2])

		# compute summary statistics
		print "summarizing statistics ... "
		inter_matrix = utils.array_list_to_matrix(data_inter)
		intra_matrix = utils.array_list_to_matrix(data_intra)
		average_inter, std_inter = utils.summary_statistics(inter_matrix)
		average_intra, std_intra = utils.summary_statistics(intra_matrix)
		
		# compute the time average number of contacts and its std error
		avg_contacts = numpy.average(inter_matrix, axis=0)
		mean_contact = numpy.average(avg_contacts)
		std_contact = numpy.std(avg_contacts)

		mean_contact_list.append(mean_contact)
		std_contact_list.append(std_contact)
		print mean_contact, std_contact

		time = data[0:config.LASTFRAME,0]
		# print "Test: dimensions of average_inter", average_inter.shape
		plotdata = utils.array_list_to_matrix([ time, average_inter, std_inter, average_intra, std_intra ])
		print "plotdata", plotdata
		print "Test: dimensions of plotdata for", iso, ratio, plotdata.shape
		plotdata_smoothed = utils.smooth(plotdata, 500, time_present=True, timestep=2)
		print plotdata_smoothed

		datalist.append(plotdata_smoothed)
		print "smoothed data", plotdata_smoothed, plotdata_smoothed.shape

		ratiolabel = config.RATIO[ratio]
		if iso == "water":
			labellist.append("water" % vars())
		else:
			labellist.append("%(iso)s (%(ratiolabel)s)" % vars())

	utils.savetxt('%(ratio)s_p2p_vs_t.txt' % vars(), header, plotdata, fmt='%0.2f')
	utils.savetxt('%(ratio)s_p2p_vs_t_smoothed.txt' % vars(), header, plotdata_smoothed, fmt='%0.2f')
	utils.savetxt('%(ratio)s_avg_contacts_w_err.txt' % vars(), "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.2f')

	return (datalist, labellist)