def merge_results(self, label, full_merge=False, force=False): """Merge the different process result files together. Parameters: label (str): The name of the subdirectory containing folders where each process computes results. For example, if label="states", then self.records_dir/states/ should exist and contain at least one subfolder named states_s1/ (or similar), which in turn contains .h5 files from each process (states_s1_p0.h5, etc.). full_merge (bool): If true, merge the process files AND merge the resulting files into one large file. For example, states_s1_p0.h5 and states_s1_p1.h5 are merged into states_s1.h5 like usual, and then states_s1.h5 and states_s2.h5 are merged into one large states.h5. force (bool): If true, merge the files even if there is an existing merged file. """ # Check that the relevant folder exists. subdir = os.path.join(self.records_dir, label) if not os.path.isdir(subdir): raise NotADirectoryError(subdir) # Check for existing folders and missing files before merging. work_todo = False if full_merge: work_todo = not os.path.isfile(os.path.join(subdir, label + ".h5")) else: for d in os.listdir(subdir): target = os.path.join(subdir, d) if os.path.isdir(target) and not os.path.isfile(target + ".h5"): work_todo = True break if work_todo or force: self.logger.info("Merging {} files...".format(label)) post.merge_process_files(subdir, cleanup=False, comm=MPI.COMM_WORLD) if full_merge: # Wait for other processes to finish. MPI.COMM_WORLD.Barrier() # Do the last merge. set_paths = glob(os.path.join(subdir, label + "_s*.h5")) post.merge_sets(os.path.join(subdir, label + ".h5"), set_paths, cleanup=True, comm=MPI.COMM_WORLD) self.logger.info("\t{} files now {}merged".format( label, "fully " if full_merge else ""))
def join_temporal(base_path,data_types=None, cleanup=False): logger.info("joining data in time from Dedalus run {:s}".format(base_path)) if data_types is None: data_types = ['scalar', 'profiles'] for data_type in data_types: logger.info("merging {}".format(data_type)) try: path = os.path.join(base_path,data_type) files = glob.glob(os.path.join(path,"*.h5")) joined_filename = os.path.join(path,"{}_joined.h5".format(data_type)) post.merge_sets(joined_filename, files, cleanup=cleanup) except: logger.info("missing {}".format(data_type)) raise logger.info("done temporal join operation for {:s}".format(base_path))
def merge_move(rundir, outdir): # Merge snapshots from different processes: post.merge_process_files(rundir + "snapshots", cleanup=True) set_paths = list( pathlib.Path(rundir + "snapshots").glob("snapshots_s*.h5")) post.merge_sets(rundir + "snapshots/snapshots.h5", set_paths, cleanup=True) # ifields post.merge_process_files(rundir + "ifields", cleanup=True) set_paths = list(pathlib.Path(rundir + "ifields").glob("ifields_s*.h5")) post.merge_sets(rundir + "ifields/ifields.h5", set_paths, cleanup=True) # moments post.merge_process_files(rundir + "moments", cleanup=True) set_paths = list(pathlib.Path(rundir + "moments").glob("moments_s*.h5")) post.merge_sets(rundir + "moments/moments.h5", set_paths, cleanup=True)
############################# These strings need editting ############################# #-----# #-----# #-----# #-----# #-----# #-----# #-----# #-----# #-----# run_name = "test1" direc_folder = "raw_data2/" #-----# #-----# #-----# #-----# #-----# #-----# #-----# #-----# #-----# ####################################################################################### #-----# #-----# #-----# #-----# #-----# #-----# #-----# #-----# #-----# subfolder = "snapshots" direc = direc_folder + subfolder print(direc_folder) # name of the resulting output file output_name = subfolder + "_" + run_name set_paths = list(pathlib.Path(direc).glob(subfolder + "_s*.h5")) post.merge_sets(direc + "/" + output_name + ".h5", set_paths, cleanup=True) # direc is the folder in which the various data folders are located subfolder = "analysis" direc = direc_folder + subfolder # name of the resulting output file output_name = subfolder + "_" + run_name set_paths = list(pathlib.Path(direc).glob(subfolder + "_s*.h5")) post.merge_sets(direc + "/" + output_name + ".h5", set_paths, cleanup=True) # direc is the folder in which the various data folders are located subfolder = "run_parameters" direc = direc_folder + subfolder # name of the resulting output file output_name = subfolder + "_" + run_name
--cleanup Delete distributed files after merging """ import h5py import h5py import subprocess from dedalus.tools import post import pathlib import sys import os import argparse import glob from docopt import docopt args = docopt(__doc__) base = args['<base_path>'] iter_paths = os.listdir(args['<base_path>']) for path in iter_paths: print(base + "/" + path) folder = base + "/" + path post.merge_analysis(folder) for path in iter_paths: print(base + "/" + path) set_paths = list(glob.glob(base + "/" + path + "/*h5")) print(set_paths) post.merge_sets(base + "/" + path + ".h5", set_paths, cleanup=False) #~ print(file_name)
print(os.path.join(r, file)) # Useful for error hunting if the loop above doesn't appear to work as it should #print(list_of_run_parameter_files) # If there are 0 or 1 files nothing needs to be merged. But useful to know how many files there are. if len(list_of_run_parameter_files) == 1: final_file = list_of_run_parameter_files[0] print(final_file) # are we just going to pass? elif len(list_of_run_parameter_files) == 0: print("No Run Parameter files found") else: # Do the merge for run_parameters here. final_file = post.merge_sets(results_direc + "/raw_data2/" + section + ".h5", list_of_run_parameter_files, cleanup=True) section = "analysis" specificdir = specificdir = os.path.join(direc, section) # r=root, d=directories, f = files list_of_analysis_files = [] for r, d, f in os.walk(specificdir): #you need thisdir not specificdir for file in f: if file.endswith(".h5"): list_of_analysis_files.append(os.path.join(r, file)) print(os.path.join(r, file)) # Useful for error hunting if the loop above doesn't appear to work as it should #print(list_of_analysis_files)
# Analysis handler analysis = solver.evaluator.add_file_handler('analysis', iter=5, max_writes=100) analysis.add_task("integ(u,'x')", layout='g', name='<u>') analysis.add_task("integ(u**2,'x')", layout='g', name='<uu>') analysis.add_system(solver.state, layout='g') # Simulation import time # Main loop dt = 1e-2 start_time = time.time() while solver.ok: solver.step(dt) if solver.iteration % 100 == 0: print('Completed iteration {}'.format(solver.iteration)) end_time = time.time() print('Runtime:', end_time-start_time) # Merge process files from dedalus.tools import post post.merge_process_files("analysis", cleanup=True) # Merge files into one import pathlib set_paths = list(pathlib.Path("analysis").glob("analysis_s*.h5")) post.merge_sets("analysis/1d_kdv_analysis.h5", set_paths, cleanup=True)
import pathlib from dedalus.tools import post set_paths = list(pathlib.Path("ugm_28_1hr/").glob("ugm_28_1hr_s*.h5")) post.merge_sets("ugm_28_1hr/ugm_28_1hr.h5", set_paths, cleanup=True)
""" Merge h5 data into one file. Usage: merge2.py <joint_path> <set_path> [--cleanup] Options: --cleanup Delete distributed files after merging """ if __name__ == "__main__": import pathlib from docopt import docopt from dedalus.tools import logging from dedalus.tools import post print('0') args = docopt(__doc__) set_paths = list(pathlib.Path(args['<set_path>']).glob("*.h5")) #print(set_paths) post.merge_sets(args['<joint_path>'], set_paths, args['--cleanup']) print('2')
# will take approximately forever*** from dedalus.tools import post # post.merge_process_files("slices", cleanup=True) # post.merge_process_files("dump",cleanup=True) post.merge_process_files("checkpoint",cleanup=True) import subprocess # print(subprocess.check_output("find slices", shell=True).decode()) # print(subprocess.check_output("find dump", shell=True).decode()) print(subprocess.check_output("find checkpoint", shell=True).decode()) import pathlib # set_paths=list(pathlib.Path("slices").glob("slices_s*.h5")) # post.merge_sets("slices/slices.h5",set_paths,cleanup=True) # set_paths=list(pathlib.Path("dump").glob("dump_s*.h5")) # post.merge_sets("dump/dump.h5",set_paths,cleanup=True) set_paths=list(pathlib.Path("checkpoint").glob("checkpoint_s*.h5")) post.merge_sets("checkpoint/checkpoint.h5",set_paths,cleanup=True) # Rename folders import os # os.rename("dump", "dump_old") # os.rename("slices", "slices_old") os.rename("checkpoint", "checkpoint_old")
from dedalus.tools import post import pathlib print('Merging data files...') post.merge_process_files("analysis_tasks", cleanup=True) set_paths = list(pathlib.Path("analysis_tasks").glob("analysis_tasks_s*.h5")) post.merge_sets("analysis_tasks/analysis.h5", set_paths, cleanup=True)
if __name__ == "__main__": import pathlib import shutil import tarfile from docopt import docopt from dedalus.tools import logging from dedalus.tools import post from dedalus.tests import test, bench, cov args = docopt(__doc__) if args['test']: test() elif args['bench']: bench() elif args['cov']: cov() elif args['get_config']: config_path = pathlib.Path(__file__).parent.joinpath('dedalus.cfg') shutil.copy(str(config_path), '.') elif args['get_examples']: example_path = pathlib.Path(__file__).parent.joinpath('examples.tar.gz') with tarfile.open(str(example_path), mode='r:gz') as archive: archive.extractall('dedalus_examples') elif args['merge_procs']: post.merge_process_files(args['<base_path>'], cleanup=args['--cleanup']) elif args['merge_sets']: post.merge_sets(args['<joint_path>'], args['<set_paths>'], cleanup=args['--cleanup'])
""" Perform after execution of the merge.py script to combine all .h5 data files into a singular .h5 file. WARNING: This script may produce an extremely large output file. """ import pathlib import sys from dedalus.tools import post direc = sys.argv[1] run_name = sys.argv[2] for subfolder in ['snapshots', 'analysis', 'run_parameters']: # print(f"Creating file {direc}{subfolder}/{subfolder}_{run_name}.h5") set_paths = list(pathlib.Path(f"{direc}{subfolder}").glob(f"{subfolder}_s*.h5")) post.merge_sets(f"{direc}{subfolder}/{subfolder}_{run_name}.h5", set_paths, cleanup=True)
""" Merge analysis sets from a FileHandler. Usage: merge_sets.py <joint_path> <set_paths>... """ if __name__ == "__main__": from docopt import docopt from mpi4py import MPI from dedalus.tools import logging from dedalus.tools import post args = docopt(__doc__) if MPI.COMM_WORLD.rank == 0: post.merge_sets(args['<joint_path>'], args['<set_paths>'])
# Main loop ------------------------------------------------------------------| try: logger.info('Starting loop') start_time = time.time() while solver.ok: # We start with a very small dt and increase it. dt = dtm + (dtM - dtm) * (1 - np.exp(-sigma_dt * solver.sim_time)) solver.step(dt) if (solver.iteration - 1) % 100 == 0: logger.info('Ite #{0:d}, Time: {1:.2e} IPs, dt: {2:e}'.format( solver.iteration, solver.sim_time / TF, dt)) except NameError: logger.error('Exception raised, triggering end of main loop.') raise finally: end_time = time.time() logger.info('Iterations: {0:d}'.format(solver.iteration)) logger.info('Sim end time: {0:f}'.format(solver.sim_time)) logger.info('Run time: {0:.2f} sec'.format(end_time - start_time)) logger.info('Run time: {0:f} cpu-min'.format( (end_time - start_time) / 30 * domain.dist.comm_cart.size)) # Post-processing ------------------------------------------------------------| print(subprocess.check_output("find anim", shell=True).decode()) post.merge_process_files("anim", cleanup=True) # merges the sub-domains if any set_paths = list( pathlib.Path("anim").glob("anim_s*.h5")) # finds all of the time series # merges the time series post.merge_sets("anim/anim.h5", set_paths, cleanup=True) print(subprocess.check_output("find anim", shell=True).decode())