def load_into_tables(): """docstring for load_into_tables""" # Initialize a pytable for saving data into function_list = {'rmsd' : rmsd } print "initializing pytables data file" group_name = 'analysis' h5file = myh5.initialize('analysis.h5', group_name) root = '/' + group_name print "reading in flat files" for ratio in [15, 64]: for isomer in ["chiro", "scyllo", "glycerol"]: for analysis in ["rmsd"]: for sys_idx in range(0, 10): flat_file_path = generate_file_name(ratio, isomer, sys_idx, analysis) print "loading in file at", flat_file_path flat_file_name = flat_file_path.replace('/','_') print "loading in", flat_file_name if os.path.exists(flat_file_path): data_file = numpy.genfromtxt(flat_file_path) else: print flat_file_path, "was not found!" data_cleaned = preprocess(function_list[analysis], data=data_file, keep_time=True) #kwargs={'data': data_file, 'keep_time': True}) myh5.save(h5file, data_cleaned, os.path.join(root, os.path.splitext(flat_file_name)[0]))
def parse(datfile, h5file_name): """read all the analysis files into a single h5 file""" # print "parsing into h5file" column_names = ['replica', 'sequence', 'w', 'w_nominal', 'rg', 'sas1', 'sas2'] descr = create_description(column_names, 7) h5file = myh5.initialize(h5file_name) f = open(datfile) data = read_analysis_file(f) f.close() data_array = numpy.array(data) myh5.save(h5file, numpy.array(data), '/test', table_struct=descr)
def load(): # initialize a h5 file to store all the analysis relating to GA4-beta protofibrils h5file = myh5.initialize('GA4_beta_analysis.h5') read_polar(h5file) read_nonpolar(h5file) h5file.close()
def process_dssp(filename, totalResidue, correction_factor, h5file='analysis_results.h5'): fp = open(filename) #initialize structure lists legend={} averageStruct = {} columnTotal = 0 columnIndex = 0 totalFramesProcessed=0 raw_data = [] for line in fp: if line[0] == "#": continue; elif line[0] == "@": columns = line.split() #print columns if columns[1][0] == "s" and columns[1] != "subtitle": #print columns structureType = columns[3][1:len(columns[3])-1] #print structureType legend[columnIndex+1] = structureType columnIndex+=1 #print columnIndex columnTotal = columnIndex #initialize data array for i in range(1, columnTotal+1): averageStruct[i]=0 else: # should all be data now cols = line.split() raw_data.append(cols) for i in range(1,columnTotal+1): # correct for the 3 extra residues are counted in the GA4 system by dssp if legend[i] == "Coil": averageStruct[i] += (float(cols[i]) - correction_factor)/totalResidue else: averageStruct[i] += float(cols[i])/totalResidue totalFramesProcessed+=1 # print "total number of columns is", columnTotal table = [] table_descr = {} table.append(filename) table_descr['filename'] = tables.StringCol(256, pos=0) for i in range(1,columnTotal+1): table.append(averageStruct[i]/totalFramesProcessed) table_descr[legend[i]] = tables.Float32Col(pos=i) table.append(totalFramesProcessed) table_descr['num_frames'] = tables.Float32Col(pos=columnTotal+1) h5 = myh5.initialize(h5file) basename,ext = os.path.splitext(filename) myh5.save(h5, [tuple(table),], '/dssp/%(basename)s' % vars(), table_descr) raw_data_array = numpy.Array(raw_data) (nrows, ncols) = raw_data_array.shape myh5.save(h5, raw_data_array, '/dssp_data/%(basename)s' % vars(), myh5.create_description('col', ncols, format=tables.Int32Col(dflt=0)))