def _freeze_script(fns, freeze_dict, python_cmd): for fn in fns: try: hadoopy._runner._check_script(fn, files=glob.glob(os.path.dirname(fn) + '/*.py'), python_cmd=python_cmd) except ValueError: logging.warn('prefreeze: Skipping script[%s] as it could not be executed' % fn) continue logging.info('prefreeze: Freezing script[%s]' % fn) if fn in freeze_dict: continue hadoopy.freeze_script(fn) freeze_dict.update(hadoopy._freeze.FREEZE_CACHE)
def throughput_test(launcher): output_path = '_hadoopy_bench/%f' % time.time() v = 'blah' kv = (v, {'client_time': time.time(), 'value_len': len(v), 'count': 0}) num_files = 3 num_kvs = 10000000 hadoopy.writetb(output_path + '/input/0', (kv for x in xrange(num_kvs))) for x in range(1, num_files): hadoopy.cp(output_path + '/input/0', output_path + '/input/%d' % x) hadoopy.freeze_script('time_job.py') # Factor out Pyinstaller time st = time.time() launcher(output_path + '/input', output_path + '/output', 'time_job.py') print((num_kvs * num_files) / (time.time() - st)) hadoopy.rmr(output_path)
def _freeze_script(script_path): import hadoopy if script_path not in _FROZEN_PATHS: _FROZEN_PATHS[script_path] = hadoopy.freeze_script(script_path) return _FROZEN_PATHS[script_path]