Пример #1
0
def save_helper(h5file, data_array_list, paths_list):
	# for data_array, path in zip(data_array_list, paths_list):
		# table = myh5.getTable(h5file, path)
	for i in range(0, len(paths_list)):
		#print "data_array before saving"
		#print data_array_list[0]
		myh5.save(h5file, data_array_list[i], paths_list[i], format="float")
Пример #2
0
def load_into_tables():
    """docstring for load_into_tables"""
    # Initialize a pytable for saving data into

    function_list = {'rmsd' : rmsd } 

    print "initializing pytables data file"
    
    group_name = 'analysis'

    h5file = myh5.initialize('analysis.h5', group_name)

    root = '/' + group_name

    print "reading in flat files"    
    for ratio in [15, 64]:
        for isomer in ["chiro", "scyllo", "glycerol"]:
            for analysis in ["rmsd"]:
                for sys_idx in range(0, 10):
                    flat_file_path = generate_file_name(ratio, isomer, sys_idx, analysis)
                    print "loading in file at", flat_file_path

                    flat_file_name = flat_file_path.replace('/','_')
                    print "loading in", flat_file_name
                    if os.path.exists(flat_file_path):
                        data_file = numpy.genfromtxt(flat_file_path)
                    else:
                        print flat_file_path, "was not found!"
                    
                    data_cleaned = preprocess(function_list[analysis], data=data_file, keep_time=True)
                    #kwargs={'data': data_file, 'keep_time': True})
                    myh5.save(h5file, data_cleaned, os.path.join(root, os.path.splitext(flat_file_name)[0]))
Пример #3
0
def analysis(saveto_h5, max_num_dataset=10):
	""" A bad way to organize a sequence of analysis """
	
	# h5 files to read, tables, and paths to tables are encoded inside the analysis
	# ideally they would be refactored into a configuration file
	polar_h5 = tables.openFile('GA4_mon_polar_analysis.h5', mode='a')
	nonpolar_h5 = tables.openFile('GA4_mon_nonpolar_analysis.h5', mode='a')

	# analyze and aggregate all data for each iso and store each in a separate table 
	for system in ["mon"]:
		for iso in ["scyllo", "chiro"]:
			# clear the results
			analysis_results = []
			for i in range(0, max_num_dataset):
				table_path = '/%(system)s/%(iso)s%(i)d' % vars()
				print "analyzing", table_path
				polar_table = myh5.getTable(polar_h5, table_path)
				nonpolar_table = myh5.getTable(nonpolar_h5, table_path)
				if polar_table != None and nonpolar_table != None:
					polar_array = utils.convert_to_numpy(polar_table)
					nonpolar_array = utils.convert_to_numpy(nonpolar_table)
					s = stoichiometry(polar_array[0:5001, 1:], nonpolar_array[0:5001,1:])
					analysis_results.append(s)

			myh5.save(saveto_h5, numpy.vstack(analysis_results), "/mon_analysis/stoichiometry_%(iso)s" % vars())
Пример #4
0
def parse(datfile, h5file_name):
	"""read all the analysis files into a single h5 file"""
	
	# print "parsing into h5file"
	
	column_names = ['replica', 'sequence', 'w', 'w_nominal', 'rg', 'sas1', 'sas2']
	descr = create_description(column_names, 7)
	h5file = myh5.initialize(h5file_name)
		
	f = open(datfile)
	data = read_analysis_file(f)
	f.close()
	data_array = numpy.array(data)
	myh5.save(h5file, numpy.array(data), '/test', table_struct=descr)
Пример #5
0
def read_nonpolar(h5file):
	""" reads in the flat files containing nonpolar contact analysis into a 
		h5 file
	"""
	nonpolar = glob.glob("nonpolar_all/*per_inositol_contacts.dat")
	for file in nonpolar:
		# print file
		data = numpy.genfromtxt(file, dtype=numpy.int32)
		# Nasty file name string parsing
		path, filename = os.path.split(file)
		parts = filename.split('_')
		sys_number = parts[2][-1]
		table_name = 'inf_' + '_'.join(parts[1:3])[:-1] + '_sys' + sys_number
		group_name = 'nonpolar_per_inositol'
		table_path = os.path.join(os.path.join('/', group_name), table_name)
		print "saving %(file)s to" % vars(), table_path
		myh5.save(h5file, data, table_path)
Пример #6
0
def read_polar(h5file):
	""" reads in the flat files contain polar contact analysis into a
		h5 file
	"""
	# Contact Analysis (per inositol)
	# Polar contact
	polar = glob.glob("polar/*.dat")
	for file in polar:
		# print file
		data = numpy.genfromtxt(file, dtype=numpy.int32)
		# construct table path from filename
		discard, rest = os.path.split(file)
		parts = rest.split('_')
		group_name, ext = parts[-1].split('.')
		table_name = '_'.join(parts[0:4])
		table_path = os.path.join('/', os.path.join(group_name, table_name))
		
		print "saving %(file)s to" % vars(), table_path
		
		myh5.save(h5file, data, table_path)
Пример #7
0
def process_dssp(filename, totalResidue, correction_factor, h5file='analysis_results.h5'):
	fp = open(filename)

	#initialize structure lists
	legend={}
	averageStruct = {}
	columnTotal = 0
	columnIndex = 0
	totalFramesProcessed=0
	raw_data = []
	for line in fp:
		if line[0] == "#":
			continue;
		elif line[0] == "@":
			columns = line.split()
			#print columns
			if columns[1][0] == "s" and columns[1] != "subtitle":
				#print columns
				structureType = columns[3][1:len(columns[3])-1]
				#print structureType
				legend[columnIndex+1] = structureType
				columnIndex+=1
				#print columnIndex

			columnTotal = columnIndex
			#initialize data array 
			for i in range(1, columnTotal+1):
				averageStruct[i]=0
		else:
			# should all be data now
			cols = line.split()
			raw_data.append(cols)
			for i in range(1,columnTotal+1):
				# correct for the 3 extra residues are counted in the GA4 system by dssp
				if legend[i] == "Coil":
					averageStruct[i] += (float(cols[i]) - correction_factor)/totalResidue
				else:
					averageStruct[i] += float(cols[i])/totalResidue
			totalFramesProcessed+=1

	# print "total number of columns is", columnTotal
	table = []
	table_descr = {}
	table.append(filename)
	table_descr['filename'] = tables.StringCol(256, pos=0)
	
	for i in range(1,columnTotal+1):
		table.append(averageStruct[i]/totalFramesProcessed)
		table_descr[legend[i]] = tables.Float32Col(pos=i)
	
	table.append(totalFramesProcessed)
	table_descr['num_frames'] = tables.Float32Col(pos=columnTotal+1)
	
	h5 = myh5.initialize(h5file)
	
	basename,ext = os.path.splitext(filename)
	myh5.save(h5, [tuple(table),], '/dssp/%(basename)s' % vars(), table_descr)
	
	raw_data_array = numpy.Array(raw_data)
	(nrows, ncols) = raw_data_array.shape
	myh5.save(h5, raw_data_array, '/dssp_data/%(basename)s' % vars(), myh5.create_description('col', ncols, format=tables.Int32Col(dflt=0)))