import time import numpy as np os.environ['NUMEXPR_MAX_THREADS'] = '32' os.environ['NUMEXPR_NUM_THREADS'] = '16' os.environ["TMPDIR"] = '/mnt/data0/tmp/' import tables.parameters tables.parameters.MAX_BLOSC_THREADS = 4 import mkidpipeline as pipe datafile = 'data_HD1160.yml' cfgfile = 'pipe_HD1160.yml' pipe.logtoconsole() pcfg = pipe.configure_pipeline(cfgfile) pipe.getLogger('mkidpipeline.calibration.wavecal').setLevel('INFO') pipe.getLogger('mkidpipeline.badpix').setLevel('INFO') pipe.getLogger('mkidpipeline.hdf.photontable').setLevel('INFO') ncpu = 7 def run_stage1(dataset): times = [] times.append(time.time()) pipe.bin2hdf.buildtables(dataset.timeranges, ncpu=ncpu, remake=False, chunkshape=250)
writer.grab_frame() if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser( description="Temporal drizzle animation utility") parser.add_argument('cfg', type=str, help='YML config file for the drizzle animator.') args = parser.parse_args() pipe.configure_pipeline(args.cfg) config = pipe.config.config if config.animation.power: VALID_STRETCHES['power'] = PowerStretch(config.animation.power) if not config.animation.target: log.warning( 'Target missing! Must be specified in the config for metadata.') if not config.data.startt: start = 0 else: start = config.data.startt if config.data.duration and config.data.stopt:
def report(timesort=False): matplotlib.use('Qt5Agg') plt.ion() pipe.logtoconsole() pipe.configure_pipeline( resource_filename('mkidpipeline', os.path.join('tests', 'h5speed_pipe.yml'))) results = SpeedResults('/scratch/baileyji/mec/speedtest/recovery.pickle') phot_per_RrID = results.determine_mean_photperRID() all_chunkshapes = results.all_chunkshapes mymin = lambda x: np.min(x) if len(x) else np.nan mymax = lambda x: np.max(x) if len(x) else np.nan array_range = lambda x: (min(x), max(x)) summary = ( 'The {dset_name} dataset file ranges from {gblo:.1f}-{gbhi:.1f} GB and contains ' '{nphot:.0f}E6 photons. ({ppg:.0f} Mp/GB)') SERIES_NAMES = ['F9', 'UL3', 'UL9', 'M3', 'M9', 'BShufM9', 'NShufM9'] series_settings = [ dict(index=('full', 9), timesort=timesort), dict(index=('ultralight', 3), timesort=timesort), dict(index=('ultralight', 9), timesort=timesort), dict(index=('medium', 3), timesort=timesort), dict(index=('medium', 9), timesort=timesort), dict(index=('medium', 9), timesort=timesort, bitshuffle=True), dict(index=('medium', 9), timesort=timesort, shuffle=False, bitshuffle=False) ] # Generate the plot scaffold fig, axes = plt.subplots(nrows=len(DATASET_NAMES), ncols=len(QUERY_NAMES), figsize=(18, 9)) # Row & Column Labels pad = 5 # in points for ax, col in zip(axes[0], QUERY_NAMES): ax.annotate(col, xy=(0.5, 1), xytext=(0, pad), xycoords='axes fraction', textcoords='offset points', size='large', ha='center', va='baseline') for ax in axes[-1]: plt.setp(ax, xlabel='Chunkshape (rows)') for ax, row in zip(axes[:, 0], DATASET_NAMES): ax.annotate(row, xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0), xycoords=ax.yaxis.label, textcoords='offset points', size='large', ha='right', va='center', rotation=90) plt.setp(ax, ylabel='Query Time ($s/\gamma$)') for i, d in enumerate(results.datasets): file_sizes = [v['size'] for k, v in results._r.items() if str(d) in k] duration = DURATIONS[d] # Fetch the total number of photons nphot = 0 for k in results._r: if str(d) in k: nphot = results._r[k]['nphotons'] break # Print a summary print( summary.format(dset_name=DATASET_NAMES[i], gblo=min(file_sizes) / 1024, gbhi=max(file_sizes) / 1024, nphot=nphot / 1e6, ppg=nphot / 1e6 / (min(file_sizes) / 1024))) nqpoht = 0 for j, q in enumerate(TEST_QUERIES): plt.sca(axes[i, j]) for s, name in zip(series_settings, SERIES_NAMES): res = list(results.query_iter(q, settings=s, dataset=d)) if not res: continue res.sort(key=lambda x: x.chunkshape) print('{} - {}: {}'.format(DATASET_NAMES[i], QUERY_NAMES[j], name)) for r in res: print(r) nqpoht = res[0].queryn chunkshapes = list(set([r.chunkshape for r in res])) chunkshapes.sort() queryts = [[r.queryt for r in res if r.chunkshape == cs] for cs in chunkshapes] norm = max(nqpoht, 1) min_queryts = np.array(list(map(mymin, queryts))) / norm max_queryts = np.array(list(map(mymax, queryts))) / norm mean_queryts = np.array(list(map(np.mean, queryts))) / norm n_queryts = np.array(list(map(len, queryts))) plt.errorbar(chunkshapes, mean_queryts, yerr=max_queryts - min_queryts, label=name, marker='.') #plt.plot(chunkshapes, mean_queryts, label=name, marker='.') # plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), useOffset=True) plt.ylim(1e-8, 1e-2) plt.semilogy() plt.xlim(10, None) plt.semilogx() plt.legend(framealpha=1, frameon=False) for n, x, y in zip(n_queryts, chunkshapes, mean_queryts): if n > 1: plt.annotate(str(n), (x, y)) plt.annotate('${:.4g} \gamma$'.format(nqpoht), (15, 5e-3)) # minimum of nqphot/chunkshape chunks print(('Timesort: ~{0[0]:.0f} - {0[1]:.0f} chunks/s. \n' 'ResID Sort: ~{1[0]:.0f} - {1[1]:.0f} chunks/rid').format( array_range(nphot / all_chunkshapes / duration), array_range(phot_per_RrID[d] / all_chunkshapes))) fig.suptitle('ResID Sort' if not timesort else 'Time Sort') fig.subplots_adjust(top=0.94, bottom=0.07, left=0.06, right=0.985, hspace=0.140, wspace=0.215) return results
from mkidpipeline.config import * import mkidpipeline as pipe df = '/scratch/baileyji/mec/data.yml' pf = '/scratch/baileyji/mec/pipe.yml' of = '/scratch/baileyji/mec/out.yml' pipe.logtoconsole() pcfg = pipe.configure_pipeline(pf) dataset = pipe.load_data_description(df) out = MKIDOutputCollection(of, df) import mkidcore.config from datetime import datetime import json from mkidcore.config import ConfigThing def parse_obslog(file): with open(file, 'r') as f: lines = f.readlines() ret = [] for l in lines: ct = ConfigThing(json.loads(l).items()) ct.register('utc', datetime.strptime(ct.utc, "%Y%m%d%H%M%S"), update=True) ret.append(ct) return ret
fig.subplots_adjust(top=0.94, bottom=0.07, left=0.06, right=0.985, hspace=0.140, wspace=0.215) return results if __name__ == '__main__': # from mkidpipeline.tests.h5speed import * pipe.logtoconsole(file='/scratch/baileyji/mec/speedtest/lastrun-pold.log') pipe.configure_pipeline( resource_filename('mkidpipeline', os.path.join('tests', 'h5speed_pipe.yml'))) d = pipe.load_data_description( resource_filename('mkidpipeline', os.path.join('tests', 'h5speed_data.yml'))) # Basic checks print(numexpr.get_vml_version()) b2h_configs = pipe.bin2hdf.gen_configs(d.timeranges) # Summary as of 9/27/19 # Plain shuffling of the indices seemed the clear winner for a CSI on a large 25MB/s 900s dataset # Bit shuffling of the data has no advantage # Plain shuffling of the data reduces file size a bit at a penalty to query time.