def worker(k): print "Converting %s" % k data = h5["/entry/data"][k][:] data, u, l = software_binning(data, params.bin, params.dead_area_treatment) dfile = os.path.splitext(h5["/entry/data"].get(k, getlink=True).filename)[0]+"_bin%d.h5"%params.bin eiger.create_data_file(os.path.join(os.path.dirname(outfile), dfile), data, (1, data.shape[1], data.shape[2]), h5["/entry/data"][k].attrs["image_nr_low"], h5["/entry/data"][k].attrs["image_nr_high"]) return u, l
def worker(k): print "Converting %s" % k data = h5["/entry/data"][k][:] data, u, l = software_binning(data, params.bin, params.dead_area_treatment) dfile = os.path.splitext(h5["/entry/data"].get( k, getlink=True).filename)[0] + "_bin%d.h5" % params.bin eiger.create_data_file(os.path.join(os.path.dirname(outfile), dfile), data, (1, data.shape[1], data.shape[2]), h5["/entry/data"][k].attrs["image_nr_low"], h5["/entry/data"][k].attrs["image_nr_high"]) return u, l
def run_safe(infile, check_data=True): startt = time.time() h5in = h5py.File(infile, "r") if is_bslz4_applied(h5in, "/entry/data/data"): print "SKIPPING. Already bslz4'd: %s" % infile return outfile = tempfile.mkstemp(prefix=os.path.basename(infile), dir="/dev/shm")[1] # copy and compress data data = h5in["/entry/data/data"] eiger.create_data_file(outfile, data, data.chunks, h5in["/entry/data/data"].attrs["image_nr_low"], h5in["/entry/data/data"].attrs["image_nr_high"]) h5in.close() eltime = time.time() - startt size1 = os.path.getsize(infile) / 1024**2 size2 = os.path.getsize(outfile) / 1024**2 if check_data: # Check data and overwrite if ok h5in = h5py.File(infile, "r") h5out = h5py.File(outfile, "r") if (h5in["/entry/data/data"][:] == h5out["/entry/data/data"][:]).all(): print "OK. overwriting with compressed file: %s # %.3f sec %.2f MB -> %.2f MB (%.1f %%)" % ( infile, eltime, size1, size2, size2 / size1 * 100.) shutil.move(outfile, infile) else: print "Error! data not match: %s # %.3f sec" % (infile, eltime) else: print "Overwriting with compressed file: %s # %.3f sec %.2f MB -> %.2f MB (%.1f %%)" % ( infile, eltime, size1, size2, size2 / size1 * 100.) shutil.move(outfile, infile) if os.path.isfile(outfile): print " temporary file removed: %s" % outfile os.remove(outfile)
def run_safe(infile, check_data=True): startt = time.time() h5in = h5py.File(infile, "r") if is_bslz4_applied(h5in, "/entry/data/data"): print "SKIPPING. Already bslz4'd: %s" % infile return outfile = tempfile.mkstemp(prefix=os.path.basename(infile), dir="/dev/shm")[1] # copy and compress data data = h5in["/entry/data/data"] eiger.create_data_file(outfile, data, data.chunks, h5in["/entry/data/data"].attrs["image_nr_low"], h5in["/entry/data/data"].attrs["image_nr_high"]) h5in.close() eltime = time.time() - startt size1 = os.path.getsize(infile) / 1024**2 size2 = os.path.getsize(outfile) / 1024**2 if check_data: # Check data and overwrite if ok h5in = h5py.File(infile, "r") h5out = h5py.File(outfile, "r") if (h5in["/entry/data/data"][:] == h5out["/entry/data/data"][:]).all(): print "OK. overwriting with compressed file: %s # %.3f sec %.2f MB -> %.2f MB (%.1f %%)" % (infile, eltime, size1, size2, size2/size1*100.) shutil.move(outfile, infile) else: print "Error! data not match: %s # %.3f sec" % (infile, eltime) else: print "Overwriting with compressed file: %s # %.3f sec %.2f MB -> %.2f MB (%.1f %%)" % (infile, eltime, size1, size2, size2/size1*100.) shutil.move(outfile, infile) if os.path.isfile(outfile): print " temporary file removed: %s" % outfile os.remove(outfile)
def run(infile, nframes, tmpdir="/dev/shm"): wdir = tempfile.mkdtemp(prefix="h5split", dir=tmpdir) orgdir = os.path.normpath(os.path.dirname(infile)) print "Workdir: %s" % wdir # Copy original master file to wdir infile_tmp = os.path.join(wdir, os.path.basename(infile)) shutil.copyfile(infile, infile_tmp) h5in = h5py.File(infile_tmp, "a") h5org = h5py.File(infile, "r") datasets = [] n_all = max(map(lambda k: h5org["/entry/data"][k].attrs["image_nr_high"], h5org["/entry/data"].keys())) lookup = map(lambda x: 0, xrange(n_all)) org_files = [infile] print "Reading original data" for i, k in enumerate(sorted(h5org["/entry/data"].keys())): print " %s %s" % (k, h5org["/entry/data"][k].shape) datasets.append(h5org["/entry/data"][k]) low, high = h5org["/entry/data"][k].attrs["image_nr_low"], h5org["/entry/data"][k].attrs["image_nr_high"] for j in xrange(low, high+1): lookup[j-1] = i del h5in["/entry/data"][k] org_files.append(os.path.join(orgdir, h5org["/entry/data"].get(k, getlink=True).filename)) # Write data cur_idx = 0 for i in xrange(int(math.ceil(n_all/float(nframes)))): outname = "data_%.6d" % (i+1) print "preparing", outname newlow, newhigh = i*nframes+1, min((i+1)*nframes, n_all) if lookup[newlow-1] == lookup[newhigh-1]: lidx = len(filter(lambda x: x==lookup[newlow-1], lookup[:newlow-1])) ridx = lidx + (newhigh - newlow + 1) data = datasets[lookup[newlow-1]][lidx:ridx] print " data_%.6d [%6d, %6d)" % (lookup[newlow-1]+1, lidx, ridx) else: data = None for j in xrange(lookup[newlow-1], lookup[newhigh-1]+1): if j == lookup[newlow-1]: lidx = len(filter(lambda x: x==j, lookup[:newlow])) - 1 ridx = None # till end elif j == lookup[newhigh-1]: lidx = 0 ridx = len(filter(lambda x: x==j, lookup[:newhigh])) else: lidx = 0 ridx = None # till end print " data_%.6d [%6s, %6s)" % (j+1, lidx, ridx) if data is None: data = datasets[j][lidx:ridx] else: data = numpy.concatenate((data, datasets[j][lidx:ridx])) eiger.create_data_file(os.path.join(wdir, outname+".h5"), data, datasets[0].chunks, newlow, newhigh) h5in["/entry/data/%s"%outname] = h5py.ExternalLink(outname+".h5", "/entry/data/data") print " wrote %s %s" % (outname+".h5", data.shape) h5in.close() bdir = os.path.join(orgdir, "split_org_%s"%time.strftime("%y%m%d-%H%M%S")) os.mkdir(bdir) print "Moving old files" for f in org_files: print " %s to %s" % (f, bdir) shutil.move(f, bdir) print "Moving new files" for f in glob.glob(os.path.join(wdir, "*")): print " %s to %s" % (f, orgdir) shutil.move(f, orgdir) os.rmdir(wdir)
def run(infile, nframes, tmpdir="/dev/shm"): wdir = tempfile.mkdtemp(prefix="h5split", dir=tmpdir) orgdir = os.path.normpath(os.path.dirname(infile)) print "Workdir: %s" % wdir # Copy original master file to wdir infile_tmp = os.path.join(wdir, os.path.basename(infile)) shutil.copyfile(infile, infile_tmp) h5in = h5py.File(infile_tmp, "a") h5org = h5py.File(infile, "r") datasets = [] n_all = max( map(lambda k: h5org["/entry/data"][k].attrs["image_nr_high"], h5org["/entry/data"].keys())) lookup = map(lambda x: 0, xrange(n_all)) org_files = [infile] print "Reading original data" for i, k in enumerate(sorted(h5org["/entry/data"].keys())): print " %s %s" % (k, h5org["/entry/data"][k].shape) datasets.append(h5org["/entry/data"][k]) low, high = h5org["/entry/data"][k].attrs["image_nr_low"], h5org[ "/entry/data"][k].attrs["image_nr_high"] for j in xrange(low, high + 1): lookup[j - 1] = i del h5in["/entry/data"][k] org_files.append( os.path.join(orgdir, h5org["/entry/data"].get(k, getlink=True).filename)) # Write data cur_idx = 0 for i in xrange(int(math.ceil(n_all / float(nframes)))): outname = "data_%.6d" % (i + 1) print "preparing", outname newlow, newhigh = i * nframes + 1, min((i + 1) * nframes, n_all) if lookup[newlow - 1] == lookup[newhigh - 1]: lidx = len( filter(lambda x: x == lookup[newlow - 1], lookup[:newlow - 1])) ridx = lidx + (newhigh - newlow + 1) data = datasets[lookup[newlow - 1]][lidx:ridx] print " data_%.6d [%6d, %6d)" % (lookup[newlow - 1] + 1, lidx, ridx) else: data = None for j in xrange(lookup[newlow - 1], lookup[newhigh - 1] + 1): if j == lookup[newlow - 1]: lidx = len(filter(lambda x: x == j, lookup[:newlow])) - 1 ridx = None # till end elif j == lookup[newhigh - 1]: lidx = 0 ridx = len(filter(lambda x: x == j, lookup[:newhigh])) else: lidx = 0 ridx = None # till end print " data_%.6d [%6s, %6s)" % (j + 1, lidx, ridx) if data is None: data = datasets[j][lidx:ridx] else: data = numpy.concatenate((data, datasets[j][lidx:ridx])) eiger.create_data_file(os.path.join(wdir, outname + ".h5"), data, datasets[0].chunks, newlow, newhigh) h5in["/entry/data/%s" % outname] = h5py.ExternalLink( outname + ".h5", "/entry/data/data") print " wrote %s %s" % (outname + ".h5", data.shape) h5in.close() bdir = os.path.join(orgdir, "split_org_%s" % time.strftime("%y%m%d-%H%M%S")) os.mkdir(bdir) print "Moving old files" for f in org_files: print " %s to %s" % (f, bdir) shutil.move(f, bdir) print "Moving new files" for f in glob.glob(os.path.join(wdir, "*")): print " %s to %s" % (f, orgdir) shutil.move(f, orgdir) os.rmdir(wdir)