def crop_images(file_index_fn, table_name="hdf5_proc", dataset="data", roi={"top": 26, "bottom": 24, "left": 21, "right": 19}, date=None, sample=None, energy=None, cores=-2, query=None): """Crop images of one experiment. If date, sample and/or energy are indicated, only the corresponding images for the given date, sample and/or energy are cropped. The crop of the different images will be done in parallel: all cores but one used (Value=-2). Each file, contains a single image to be cropped. """ start_time = time.time() file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) if date or sample or energy: file_index_db = filter_file_index(file_index_db, date=date, sample=sample, energy=energy, query=query) root_path = os.path.dirname(os.path.abspath(file_index_fn)) if query is not None: file_records = file_index_db.search(query) else: file_records = file_index_db.all() files = get_file_paths(file_records, root_path) if files: Parallel(n_jobs=cores, backend="multiprocessing")( delayed(crop_and_store)(h5_file, dataset=dataset, roi=roi) for h5_file in files) n_files = len(files) print("--- Crop %d files took %s seconds ---\n" % (n_files, (time.time() - start_time))) db.close()
def _get_couples_to_align(couples_to_align, h5_records, root_path): files = get_file_paths(h5_records, root_path) ref_file = files[0] files.pop(0) for file in files: couple_to_align = (ref_file, file) couples_to_align.append(couple_to_align)
def multiple_xrm_2_hdf5(file_index_db, subfolders=False, cores=-2, update_db=True, query=None): """Using all cores but one for the computations""" start_time = time.time() db = TinyDB(file_index_db, storage=CachingMiddleware(JSONStorage)) if query is not None: file_records = db.search(query) else: file_records = db.all() # import pprint # printer = pprint.PrettyPrinter(indent=4) # printer.pprint(file_records) root_path = os.path.dirname(os.path.abspath(file_index_db)) files = get_file_paths(file_records, root_path, use_subfolders=subfolders) # The backend parameter can be either "threading" or "multiprocessing". Parallel(n_jobs=cores, backend="multiprocessing")(delayed(convert_xrm2h5)(xrm_file) for xrm_file in files) if update_db: util.update_db_func(db, "hdf5_raw", file_records) db.close() n_files = len(files) print("--- Convert from xrm to hdf5 %d files took %s seconds ---\n" % (n_files, (time.time() - start_time))) return db
def copy2proc_multiple(file_index_db, table_in_name="hdf5_raw", table_out_name="hdf5_proc", suffix="_proc", use_subfolders=False, cores=-1, update_db=True, query=None, purge=False, magnetism_partial=False): """Copy many files to processed files""" # printer = pprint.PrettyPrinter(indent=4) start_time = time.time() db = TinyDB(file_index_db, storage=CachingMiddleware(JSONStorage)) files_query = Query() if table_in_name == "default": query_cmd = (files_query.extension == ".hdf5") if query is not None: query_cmd &= query hdf5_records = db.search(query_cmd) else: table_in = db.table(table_in_name) hdf5_records = table_in.all() if magnetism_partial: query_cmd = (files_query.extension == ".hdf5") if query is not None: query_cmd &= query table_proc = db.table(table_out_name) table_proc.remove(query_cmd) # import pprint # prettyprinter = pprint.PrettyPrinter(indent=4) # prettyprinter.pprint(hdf5_records) root_path = os.path.dirname(os.path.abspath(file_index_db)) files = get_file_paths(hdf5_records, root_path, use_subfolders=use_subfolders) # The backend parameter can be either "threading" or "multiprocessing" Parallel(n_jobs=cores, backend="multiprocessing")(delayed(copy_2_proc)(h5_file, suffix) for h5_file in files) if update_db: update_db_func(db, table_out_name, hdf5_records, suffix, purge=purge) n_files = len(files) print("--- Copy for processing %d files took %s seconds ---\n" % (n_files, (time.time() - start_time))) #print(db.table(table_out_name).all()) db.close()
def average_image_groups(file_index_fn, table_name="hdf5_proc", dataset_for_averaging="data", variable="zpz", description="", dataset_store="data", date=None, sample=None, energy=None, cores=-2, jj=True): """Average images of one experiment by zpz. If date, sample and/or energy are indicated, only the corresponding images for the given date, sample and/or energy are processed. The average of the different groups of images will be done in parallel: all cores but one used (Value=-2). All data images of the same angle, for the different ZPz are averaged. """ """ TODO: In the future it should be made available, the average by variable == repetition and just after by variable == zpz. Finally this three features should exist: - average by same angle and different zpz positions (DONE) - average by same angle, same zpz and different repetition (ONGOING) - average by same angle, first by same zpz and different repetition, and afterwards by same angle and different zpz positions (TODO) """ start_time = time.time() root_path = os.path.dirname(os.path.abspath(file_index_fn)) file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) files_query = Query() file_index_db = filter_file_index(file_index_db, files_query, date=date, sample=sample, energy=energy, ff=False) all_file_records = file_index_db.all() n_files = len(all_file_records) averages_table = db.table("hdf5_averages") averages_table.purge() groups_to_average = [] if variable == "zpz": dates_samples_energies_angles = [] for record in all_file_records: dates_samples_energies_angles.append( (record["date"], record["sample"], record["energy"], record["angle"])) dates_samples_energies_angles = list( set(dates_samples_energies_angles)) for date_sample_energy_angle in dates_samples_energies_angles: date = date_sample_energy_angle[0] sample = date_sample_energy_angle[1] energy = date_sample_energy_angle[2] angle = date_sample_energy_angle[3] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.angle == angle)) img_records = file_index_db.search(query_cmd) num_zpz = len(img_records) central_zpz = 0 for img_record in img_records: central_zpz += img_record["zpz"] central_zpz /= round(float(num_zpz), 1) files = get_file_paths(img_records, root_path) central_zpz_with_group_to_average = [central_zpz] group_to_average = [] for file in files: group_to_average.append(file) central_zpz_with_group_to_average.append(group_to_average) central_zpz_with_group_to_average.append(date_sample_energy_angle) groups_to_average.append(central_zpz_with_group_to_average) elif variable == "repetition" and jj: dates_samples_energies_jjs_angles = [] for record in all_file_records: dates_samples_energies_jjs_angles.append( (record["date"], record["sample"], record["energy"], record["jj_u"], record["jj_d"], record["angle"])) dates_samples_energies_jjs_angles = list( set(dates_samples_energies_jjs_angles)) for date_sample_energy_jj_angle in dates_samples_energies_jjs_angles: date = date_sample_energy_jj_angle[0] sample = date_sample_energy_jj_angle[1] energy = date_sample_energy_jj_angle[2] jj_u = date_sample_energy_jj_angle[3] jj_d = date_sample_energy_jj_angle[4] angle = date_sample_energy_jj_angle[5] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.jj_u == jj_u) & (files_query.jj_d == jj_d) & (files_query.angle == angle)) img_records = file_index_db.search(query_cmd) num_repetitions = len(img_records) files = get_file_paths(img_records, root_path) complete_group_to_average = [num_repetitions] group_to_average = [] for file in files: group_to_average.append(file) complete_group_to_average.append(group_to_average) complete_group_to_average.append(date_sample_energy_jj_angle) groups_to_average.append(complete_group_to_average) elif variable == "repetition" and not jj: dates_samples_energies = [] for record in all_file_records: dates_samples_energies.append( (record["date"], record["sample"], record["energy"])) dates_samples_energies = list(set(dates_samples_energies)) for date_sample_energy in dates_samples_energies: date = date_sample_energy[0] sample = date_sample_energy[1] energy = date_sample_energy[2] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy)) img_records = file_index_db.search(query_cmd) num_repetitions = len(img_records) files = get_file_paths(img_records, root_path) complete_group_to_average = [num_repetitions] group_to_average = [] for file in files: group_to_average.append(file) complete_group_to_average.append(group_to_average) complete_group_to_average.append(date_sample_energy) groups_to_average.append(complete_group_to_average) if groups_to_average[0][1]: records = Parallel(n_jobs=cores, backend="multiprocessing")( delayed(average_and_store)( group_to_average, dataset_for_averaging=dataset_for_averaging, variable=variable, description=description, dataset_store=dataset_store, jj=jj) for group_to_average in groups_to_average) averages_table.insert_multiple(records) print("--- Average %d files by groups, took %s seconds ---\n" % (n_files, (time.time() - start_time))) # import pprint # pobj = pprint.PrettyPrinter(indent=4) # print("----") # print("average records") # for record in records: # pobj.pprint(record) db.close()
def average_image_group_by_angle(file_index_fn, table_name="hdf5_proc", angle=0.0, dataset_for_averaging="data", variable="repetition", description="", dataset_store="data", date=None, sample=None, energy=None): """Average images by repetition for a single angle. If date, sample and/or energy are indicated, only the corresponding images for the given date, sample and/or energy are processed. All data images of the same angle, for the different repetitions are averaged. """ root_path = os.path.dirname(os.path.abspath(file_index_fn)) file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) files_query = Query() file_index_db = filter_file_index(file_index_db, files_query, date=date, sample=sample, energy=energy, angle=angle, ff=False) all_file_records = file_index_db.all() averages_table = db.table("hdf5_averages") # We only have files for a single angle if variable == "repetition": dates_samples_energies_jjs_angles = [] for record in all_file_records: dates_samples_energies_jjs_angles.append( (record["date"], record["sample"], record["energy"], record["jj_u"], record["jj_d"], record["angle"])) dates_samples_energies_jjs_angles = list( set(dates_samples_energies_jjs_angles)) for date_sample_energy_jj_angle in dates_samples_energies_jjs_angles: date = date_sample_energy_jj_angle[0] sample = date_sample_energy_jj_angle[1] energy = date_sample_energy_jj_angle[2] jj_u = date_sample_energy_jj_angle[3] jj_d = date_sample_energy_jj_angle[4] angle = date_sample_energy_jj_angle[5] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.jj_u == jj_u) & (files_query.jj_d == jj_d) & (files_query.angle == angle)) img_records = file_index_db.search(query_cmd) num_repetitions = len(img_records) files = get_file_paths(img_records, root_path) complete_group_to_average = [num_repetitions] group_to_average = [] for file in files: group_to_average.append(file) complete_group_to_average.append(group_to_average) complete_group_to_average.append(date_sample_energy_jj_angle) record = average_and_store( complete_group_to_average, dataset_for_averaging=dataset_for_averaging, variable=variable, description=description, dataset_store=dataset_store) if record not in averages_table.all(): averages_table.insert(record) #import pprint #pobj = pprint.PrettyPrinter(indent=4) #print("----") #print("average records") #for record in records: # pobj.pprint(record) #pobj.pprint(averages_table.all()) db.close()
def get_samples(self, txm_txt_script, use_existing_db=False, use_subfolders=True, organize_by_repetitions=False): """Organize the files by samples""" #prettyprinter = pprint.PrettyPrinter(indent=4) if use_subfolders: print("Using Subfolders for finding the files") else: print("Searching files through the whole root path") root_path = os.path.dirname(os.path.abspath(txm_txt_script)) db = get_db(txm_txt_script, use_existing_db=use_existing_db) all_file_records = db.all() #prettyprinter.pprint(all_file_records) dates_samples_energies = [] for record in all_file_records: dates_samples_energies.append( (record["date"], record["sample"], record["energy"])) dates_samples_energies = list(set(dates_samples_energies)) samples = {} files_query = Query() for date_sample_energie in dates_samples_energies: files_raw_data = {} files_for_sample_subdict = {} date = date_sample_energie[0] sample = date_sample_energie[1] energy = date_sample_energie[2] query_impl = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.FF == False)) records_by_sample_and_energy = db.search(query_impl) if not organize_by_repetitions: zps_by_sample_and_e = [ record["zpz"] for record in records_by_sample_and_energy ] zpz_positions_by_sample_e = sorted(set(zps_by_sample_and_e)) for zpz in zpz_positions_by_sample_e: query_impl = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.zpz == zpz) & (files_query.FF == False)) fn_by_zpz_query = db.search(query_impl) sorted_fn_by_zpz_query = sorted(fn_by_zpz_query, key=itemgetter('angle')) files = get_file_paths(sorted_fn_by_zpz_query, root_path, use_subfolders=use_subfolders) files_raw_data[zpz] = files else: repetitions_by_sample_and_e = [ record["repetition"] for record in records_by_sample_and_energy ] repetitions_by_sample_and_e = sorted( set(repetitions_by_sample_and_e)) for repetition in repetitions_by_sample_and_e: query_impl = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.repetition == repetition) & (files_query.FF == False)) fn_by_repetition_query = db.search(query_impl) sorted_fn_by_repetition_query = sorted( fn_by_repetition_query, key=itemgetter('angle')) files = get_file_paths(sorted_fn_by_repetition_query, root_path, use_subfolders=use_subfolders) files_raw_data[repetition] = files # Get FF image records fn_ff_query_by_energy = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.FF == True)) query_output = db.search(fn_ff_query_by_energy) files_FF = get_file_paths(query_output, root_path, use_subfolders=use_subfolders) files_for_sample_subdict['tomos'] = files_raw_data files_for_sample_subdict['ff'] = files_FF samples[date_sample_energie] = files_for_sample_subdict #prettyprinter.pprint(samples) return samples
def average_ff(file_index_fn, table_name="hdf5_proc", date=None, sample=None, energy=None, cores=-2, query=None, jj=False): start_time = time.time() file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) files_query = Query() if date or sample or energy: temp_db = TinyDB(storage=MemoryStorage) if date: records = file_index_db.search(files_query.date == date) temp_db.insert_multiple(records) if sample: records = temp_db.search(files_query.sample == sample) temp_db.purge() temp_db.insert_multiple(records) if energy: records = temp_db.search(files_query.energy == energy) temp_db.purge() temp_db.insert_multiple(records) file_index_db = temp_db root_path = os.path.dirname(os.path.abspath(file_index_fn)) file_records = file_index_db.all() dates_samples_energies = [] for record in file_records: data = (record["date"], record["sample"], record["energy"]) if jj is True: data += (record["jj_u"], record["jj_d"]) dates_samples_energies.append(data) dates_samples_energies = list(set(dates_samples_energies)) num_files_total = 0 for date_sample_energy in dates_samples_energies: date = date_sample_energy[0] sample = date_sample_energy[1] energy = date_sample_energy[2] # FF records by given date, sample and energy query_cmd_ff = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.FF == True)) if jj is True: jj_u = date_sample_energy[3] jj_d = date_sample_energy[4] query_cmd_ff &= ((files_query.jj_u == jj_u) & (files_query.jj_d == jj_d)) h5_ff_records = file_index_db.search(query_cmd_ff) files_ff = get_file_paths(h5_ff_records, root_path) normalize_ff(files_ff)
def normalize_images(file_index_fn, table_name="hdf5_proc", date=None, sample=None, energy=None, average_ff=True, cores=-2, query=None, jj=False, read_norm_ff=False): """Normalize images of one experiment. If date, sample and/or energy are indicated, only the corresponding images for the given date, sample and/or energy are normalized. The normalization of different images will be done in parallel. Each file, contains a single image to be normalized. .. todo: This method should be divided in two. One should calculate the average FF, and the other (normalize_images), should receive as input argument, the averaged FF image (or the single FF image). """ start_time = time.time() file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) #print(file_index_db.all()) files_query = Query() if date or sample or energy: temp_db = TinyDB(storage=MemoryStorage) if date: records = file_index_db.search(files_query.date == date) temp_db.insert_multiple(records) if sample: records = temp_db.search(files_query.sample == sample) temp_db.purge() temp_db.insert_multiple(records) if energy: records = temp_db.search(files_query.energy == energy) temp_db.purge() temp_db.insert_multiple(records) file_index_db = temp_db root_path = os.path.dirname(os.path.abspath(file_index_fn)) file_records = file_index_db.all() #print(file_records) dates_samples_energies = [] for record in file_records: data = (record["date"], record["sample"], record["energy"]) if jj is True: data += (record["jj_u"], record["jj_d"]) dates_samples_energies.append(data) dates_samples_energies = list(set(dates_samples_energies)) num_files_total = 0 for date_sample_energy in dates_samples_energies: date = date_sample_energy[0] sample = date_sample_energy[1] energy = date_sample_energy[2] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.FF == False)) if jj is True: jj_u = date_sample_energy[3] jj_d = date_sample_energy[4] query_cmd &= ((files_query.jj_u == jj_u) & (files_query.jj_d == jj_d)) if query is not None: query_cmd &= query h5_records = file_index_db.search(query_cmd) # FF records by given date, sample and energy query_cmd_ff = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.FF == True)) if jj is True: jj_u = date_sample_energy[3] jj_d = date_sample_energy[4] query_cmd_ff &= ((files_query.jj_u == jj_u) & (files_query.jj_d == jj_d)) h5_ff_records = file_index_db.search(query_cmd_ff) files = get_file_paths(h5_records, root_path) #print(files) n_files = len(files) num_files_total += n_files files_ff = get_file_paths(h5_ff_records, root_path) if not files_ff: msg = "FlatFields are not present, images cannot be normalized" raise Exception(msg) # print("------------norm") # import pprint # prettyprinter = pprint.PrettyPrinter(indent=4) # prettyprinter.pprint(files) # prettyprinter.pprint(files_ff) if average_ff: # Average the FF files and use always the same average (for a # same date, sample, energy and jj's) # Normally the case of magnetism if read_norm_ff is True: ff_norm_image = get_normalized_ff(files_ff) else: #print("---files ff") #print(files_ff) #print("---files") #print(files) _, ff_norm_image = normalize_image(files[0], ff_img_filenames=files_ff) files.pop(0) if len(files): Parallel(n_jobs=cores, backend="multiprocessing")(delayed(normalize_image)( h5_file, average_normalized_ff_img=ff_norm_image) for h5_file in files) else: # Same number of FF as sample data files # Normalize each single sample data image for a single FF image # Normally the case of spectrocopies # TODO pass print("--- Normalize %d files took %s seconds ---\n" % (num_files_total, (time.time() - start_time))) db.close()
def many_images_to_h5_stack(file_index_fn, table_name="hdf5_proc", type_struct="normalized", suffix="_stack", date=None, sample=None, energy=None, zpz=None, ff=None, subfolders=False, cores=-2): """Go from many images hdf5 files to a single stack of images hdf5 file. Using all cores but one, for the computations""" # TODO: spectroscopy normalized not implemented (no Avg FF, etc) print("--- Individual images to stacks ---") start_time = time.time() file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) files_query = Query() if (date is not None or sample is not None or energy is not None or zpz is not None or ff is not None): file_index_db = filter_file_index(file_index_db, files_query, date=date, sample=sample, energy=energy, zpz=zpz, ff=ff) root_path = os.path.dirname(os.path.abspath(file_index_fn)) all_file_records = file_index_db.all() stack_table = db.table("hdf5_stacks") stack_table.purge() files_list = [] if type_struct == "normalized" or type_struct == "aligned": dates_samples_energies_zpzs = [] for record in all_file_records: dates_samples_energies_zpzs.append( (record["date"], record["sample"], record["energy"], record["zpz"])) dates_samples_energies_zpzs = list(set(dates_samples_energies_zpzs)) for date_sample_energy_zpz in dates_samples_energies_zpzs: date = date_sample_energy_zpz[0] sample = date_sample_energy_zpz[1] energy = date_sample_energy_zpz[2] zpz = date_sample_energy_zpz[3] # Query building parts da = (files_query.date == date) sa = (files_query.sample == sample) en = (files_query.energy == energy) zp = (files_query.zpz == zpz) ff_false = (files_query.FF == False) ff_true = (files_query.FF == True) data_files_ff = [] if file_index_db.search(files_query.FF.exists()): # Query command query_cmd_ff = (da & sa & en & ff_true) h5_ff_records = file_index_db.search(query_cmd_ff) data_files_ff = get_file_paths(h5_ff_records, root_path, use_subfolders=subfolders) if file_index_db.search(files_query.FF.exists()): # Query command query_cmd = (da & sa & en & zp & ff_false) else: # Query command query_cmd = (da & sa & en & zp) h5_records = file_index_db.search(query_cmd) h5_records = sorted(h5_records, key=itemgetter('angle')) data_files = get_file_paths(h5_records, root_path, use_subfolders=subfolders) files_dict = { "data": data_files, "ff": data_files_ff, "date": date, "sample": sample, "energy": energy, "zpz": zpz } files_list.append(files_dict) elif (type_struct == "normalized_multifocus" or type_struct == "normalized_simple" or type_struct == "aligned_multifocus"): dates_samples_energies = [] for record in all_file_records: dates_samples_energies.append( (record["date"], record["sample"], record["energy"])) dates_samples_energies = list(set(dates_samples_energies)) for date_sample_energy in dates_samples_energies: date = date_sample_energy[0] sample = date_sample_energy[1] energy = date_sample_energy[2] # Query building parts da = (files_query.date == date) sa = (files_query.sample == sample) en = (files_query.energy == energy) # Query command query_cmd = (da & sa & en) h5_records = file_index_db.search(query_cmd) h5_records = sorted(h5_records, key=itemgetter('angle')) data_files = get_file_paths(h5_records, root_path, use_subfolders=subfolders) files_dict = { "data": data_files, "date": date, "sample": sample, "energy": energy } files_list.append(files_dict) elif type_struct == "normalized_magnetism_many_repetitions": dates_samples_energies_jjs = [] for record in all_file_records: dates_samples_energies_jjs.append( (record["date"], record["sample"], record["energy"], record["jj_offset"])) dates_samples_energies_jjs = list(set(dates_samples_energies_jjs)) for date_sample_energy_jj in dates_samples_energies_jjs: date = date_sample_energy_jj[0] sample = date_sample_energy_jj[1] energy = date_sample_energy_jj[2] jj_offset = date_sample_energy_jj[3] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.jj_offset == jj_offset)) h5_records = file_index_db.search(query_cmd) h5_records = sorted(h5_records, key=itemgetter('angle')) data_files = get_file_paths(h5_records, root_path, use_subfolders=subfolders) files_dict = { "data": data_files, "date": date, "sample": sample, "energy": energy, "jj_offset": jj_offset } files_list.append(files_dict) elif type_struct == "normalized_spectroscopy": dates_samples = [] for record in all_file_records: dates_samples.append((record["date"], record["sample"])) dates_samples = list(set(dates_samples)) for date_sample in dates_samples: date = date_sample[0] sample = date_sample[1] # Query building parts da = (files_query.date == date) sa = (files_query.sample == sample) # Query command query_cmd = (da & sa) h5_records = file_index_db.search(query_cmd) h5_records = sorted(h5_records, key=itemgetter('energy')) data_files = get_file_paths(h5_records, root_path, use_subfolders=subfolders) files_dict = {"data": data_files, "date": date, "sample": sample} files_list.append(files_dict) # Parallelization of making the stacks records = Parallel(n_jobs=cores, backend="multiprocessing")( delayed(make_stack)( files_for_stack, root_path, type_struct=type_struct, suffix=suffix) for files_for_stack in files_list) stack_table.insert_multiple(records) pretty_printer = pprint.PrettyPrinter(indent=4) print("Created stacks:") for record in stack_table.all(): pretty_printer.pprint(record["filename"]) db.close() print("--- Individual images to stacks took %s seconds ---\n" % (time.time() - start_time))