def save_helper(h5file, data_array_list, paths_list): # for data_array, path in zip(data_array_list, paths_list): # table = myh5.getTable(h5file, path) for i in range(0, len(paths_list)): #print "data_array before saving" #print data_array_list[0] myh5.save(h5file, data_array_list[i], paths_list[i], format="float")
def load_into_tables(): """docstring for load_into_tables""" # Initialize a pytable for saving data into function_list = {'rmsd' : rmsd } print "initializing pytables data file" group_name = 'analysis' h5file = myh5.initialize('analysis.h5', group_name) root = '/' + group_name print "reading in flat files" for ratio in [15, 64]: for isomer in ["chiro", "scyllo", "glycerol"]: for analysis in ["rmsd"]: for sys_idx in range(0, 10): flat_file_path = generate_file_name(ratio, isomer, sys_idx, analysis) print "loading in file at", flat_file_path flat_file_name = flat_file_path.replace('/','_') print "loading in", flat_file_name if os.path.exists(flat_file_path): data_file = numpy.genfromtxt(flat_file_path) else: print flat_file_path, "was not found!" data_cleaned = preprocess(function_list[analysis], data=data_file, keep_time=True) #kwargs={'data': data_file, 'keep_time': True}) myh5.save(h5file, data_cleaned, os.path.join(root, os.path.splitext(flat_file_name)[0]))
def analysis(saveto_h5, max_num_dataset=10): """ A bad way to organize a sequence of analysis """ # h5 files to read, tables, and paths to tables are encoded inside the analysis # ideally they would be refactored into a configuration file polar_h5 = tables.openFile('GA4_mon_polar_analysis.h5', mode='a') nonpolar_h5 = tables.openFile('GA4_mon_nonpolar_analysis.h5', mode='a') # analyze and aggregate all data for each iso and store each in a separate table for system in ["mon"]: for iso in ["scyllo", "chiro"]: # clear the results analysis_results = [] for i in range(0, max_num_dataset): table_path = '/%(system)s/%(iso)s%(i)d' % vars() print "analyzing", table_path polar_table = myh5.getTable(polar_h5, table_path) nonpolar_table = myh5.getTable(nonpolar_h5, table_path) if polar_table != None and nonpolar_table != None: polar_array = utils.convert_to_numpy(polar_table) nonpolar_array = utils.convert_to_numpy(nonpolar_table) s = stoichiometry(polar_array[0:5001, 1:], nonpolar_array[0:5001,1:]) analysis_results.append(s) myh5.save(saveto_h5, numpy.vstack(analysis_results), "/mon_analysis/stoichiometry_%(iso)s" % vars())
def parse(datfile, h5file_name): """read all the analysis files into a single h5 file""" # print "parsing into h5file" column_names = ['replica', 'sequence', 'w', 'w_nominal', 'rg', 'sas1', 'sas2'] descr = create_description(column_names, 7) h5file = myh5.initialize(h5file_name) f = open(datfile) data = read_analysis_file(f) f.close() data_array = numpy.array(data) myh5.save(h5file, numpy.array(data), '/test', table_struct=descr)
def read_nonpolar(h5file): """ reads in the flat files containing nonpolar contact analysis into a h5 file """ nonpolar = glob.glob("nonpolar_all/*per_inositol_contacts.dat") for file in nonpolar: # print file data = numpy.genfromtxt(file, dtype=numpy.int32) # Nasty file name string parsing path, filename = os.path.split(file) parts = filename.split('_') sys_number = parts[2][-1] table_name = 'inf_' + '_'.join(parts[1:3])[:-1] + '_sys' + sys_number group_name = 'nonpolar_per_inositol' table_path = os.path.join(os.path.join('/', group_name), table_name) print "saving %(file)s to" % vars(), table_path myh5.save(h5file, data, table_path)
def read_polar(h5file): """ reads in the flat files contain polar contact analysis into a h5 file """ # Contact Analysis (per inositol) # Polar contact polar = glob.glob("polar/*.dat") for file in polar: # print file data = numpy.genfromtxt(file, dtype=numpy.int32) # construct table path from filename discard, rest = os.path.split(file) parts = rest.split('_') group_name, ext = parts[-1].split('.') table_name = '_'.join(parts[0:4]) table_path = os.path.join('/', os.path.join(group_name, table_name)) print "saving %(file)s to" % vars(), table_path myh5.save(h5file, data, table_path)
def process_dssp(filename, totalResidue, correction_factor, h5file='analysis_results.h5'): fp = open(filename) #initialize structure lists legend={} averageStruct = {} columnTotal = 0 columnIndex = 0 totalFramesProcessed=0 raw_data = [] for line in fp: if line[0] == "#": continue; elif line[0] == "@": columns = line.split() #print columns if columns[1][0] == "s" and columns[1] != "subtitle": #print columns structureType = columns[3][1:len(columns[3])-1] #print structureType legend[columnIndex+1] = structureType columnIndex+=1 #print columnIndex columnTotal = columnIndex #initialize data array for i in range(1, columnTotal+1): averageStruct[i]=0 else: # should all be data now cols = line.split() raw_data.append(cols) for i in range(1,columnTotal+1): # correct for the 3 extra residues are counted in the GA4 system by dssp if legend[i] == "Coil": averageStruct[i] += (float(cols[i]) - correction_factor)/totalResidue else: averageStruct[i] += float(cols[i])/totalResidue totalFramesProcessed+=1 # print "total number of columns is", columnTotal table = [] table_descr = {} table.append(filename) table_descr['filename'] = tables.StringCol(256, pos=0) for i in range(1,columnTotal+1): table.append(averageStruct[i]/totalFramesProcessed) table_descr[legend[i]] = tables.Float32Col(pos=i) table.append(totalFramesProcessed) table_descr['num_frames'] = tables.Float32Col(pos=columnTotal+1) h5 = myh5.initialize(h5file) basename,ext = os.path.splitext(filename) myh5.save(h5, [tuple(table),], '/dssp/%(basename)s' % vars(), table_descr) raw_data_array = numpy.Array(raw_data) (nrows, ncols) = raw_data_array.shape myh5.save(h5, raw_data_array, '/dssp_data/%(basename)s' % vars(), myh5.create_description('col', ncols, format=tables.Int32Col(dflt=0)))