def filter_h5(input_path, output_path, condition): print "filtering for '%s'" % condition input_file = tables.openFile(input_path, mode="r") output_file = tables.openFile(output_path, mode="w") # copy globals input_file.root.globals._f_copy(output_file.root, recursive=True) output_entities = output_file.createGroup("/", "entities", "Entities") for table in input_file.iterNodes(input_file.root.entities): print table._v_name, "..." copyTable(table, output_entities, condition=condition) input_file.close() output_file.close()
def filter_h5(input_path, output_path, condition): print "filtering for '%s'" % condition input_file = tables.openFile(input_path, mode="r") output_file = tables.openFile(output_path, mode="w") output_globals = output_file.createGroup("/", "globals", "Globals") copyTable(input_file.root.globals.periodic, output_file, output_globals) output_entities = output_file.createGroup("/", "entities", "Entities") for table in input_file.iterNodes(input_file.root.entities): print table._v_name, "..." copyTable(table, output_file, output_entities, condition=condition) input_file.close() output_file.close()
def dropfields(input_path, output_path, todrop): input_file = tables.openFile(input_path, mode="r") input_root = input_file.root output_file = tables.openFile(output_path, mode="w") output_globals = output_file.createGroup("/", "globals", "Globals") print " * copying globals ...", copyTable(input_root.globals.periodic, output_file, output_globals) print "done." output_entities = output_file.createGroup("/", "entities", "Entities") for table in input_file.iterNodes(input_root.entities): table_fields = get_fields(table) table_fields = [(fname, ftype) for fname, ftype in table_fields if fname not in todrop] print " * copying table %s (%.2f Mb) ..." % (table._v_name, table_size(table)), copyTable(table, output_file, output_entities, table_fields) print "done." input_file.close() output_file.close()
def merge_h5(input1_path, input2_path, output_path): input1_file = tables.openFile(input1_path, mode="r") input2_file = tables.openFile(input2_path, mode="r") output_file = tables.openFile(output_path, mode="w") output_globals = output_file.createGroup("/", "globals", "Globals") print "copying globals from", input1_path, copyTable(input1_file.root.globals.periodic, output_file, output_globals) print "done." input1_entities = input1_file.root.entities input2_entities = input2_file.root.entities fields1 = get_h5_fields(input1_file) fields2 = get_h5_fields(input2_file) ent_names1 = set(fields1.keys()) ent_names2 = set(fields2.keys()) output_entities = output_file.createGroup("/", "entities", "Entities") for ent_name in sorted(ent_names1 | ent_names2): print print ent_name ent_fields1 = fields1.get(ent_name, []) ent_fields2 = fields2.get(ent_name, []) output_fields = merge_items(ent_fields1, ent_fields2) output_table = output_file.createTable(output_entities, ent_name, np.dtype(output_fields)) if ent_name in ent_names1: table1 = getattr(input1_entities, ent_name) print " * indexing table from %s ..." % input1_path, input1_rows = index_table_light(table1) print "done." else: table1 = None input1_rows = {} if ent_name in ent_names2: table2 = getattr(input2_entities, ent_name) print " * indexing table from %s ..." % input2_path, input2_rows = index_table_light(table2) print "done." else: table2 = None input2_rows = {} print " * merging: ", input1_periods = input1_rows.keys() input2_periods = input2_rows.keys() output_periods = sorted(set(input1_periods) | set(input2_periods)) def merge_period(period_idx, period): if ent_name in ent_names1: start, stop = input1_rows.get(period, (0, 0)) input1_array = table1.read(start, stop) else: input1_array = None if ent_name in ent_names2: start, stop = input2_rows.get(period, (0, 0)) input2_array = table2.read(start, stop) else: input2_array = None if ent_name in ent_names1 and ent_name in ent_names2: output_array, _ = mergeArrays(input1_array, input2_array) elif ent_name in ent_names1: output_array = input1_array elif ent_name in ent_names2: output_array = input2_array else: raise Exception("this shouldn't have happened") output_table.append(output_array) output_table.flush() loop_wh_progress(merge_period, output_periods) print " done." input1_file.close() input2_file.close() output_file.close()