def _test_buffered_generator_general2(bgfunc, bgargs, fgfunc, target_looptime=1.0, serial_cheat=1, buffer_size=2, show_serial=True): """ # We are going to generate output of bgfunc in the background while # fgfunc is running in the foreground. fgfunc takes results of bffunc as # args. # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut with ut.Timer('One* call to bgfunc') as t_bgfunc: results = [bgfunc(arg) for arg in bgargs] bgfunctime = t_bgfunc.ellapsed / len(bgargs) #fgfunc = ut.is_prime with ut.Timer('One* call to fgfunc') as t_fgfunc: [fgfunc(x) for x in results] fgfunctime = t_fgfunc.ellapsed / len(bgargs) # compute amount of loops to run est_looptime = (bgfunctime + fgfunctime) _num_loops = round(target_looptime // est_looptime) num_data = int(_num_loops // len(bgargs)) num_loops = int(num_data * len(bgargs)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([bgargs] * num_data) est_tfg = fgfunctime * num_loops est_tbg = bgfunctime * num_loops est_needed_buffers = fgfunctime / bgfunctime print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [ 'num_loops', 'bgfunctime', 'fgfunctime', 'est_tfg', 'est_tbg', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(bgfunc, data[:len(data) // serial_cheat]): fgfunc(x) t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size) for x in gen_: fgfunc(x) with ut.Timer('ut.generate') as t3: gen_ = ut.generate(bgfunc, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: fgfunc(x) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tfg, est_tbg): return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % ( parallel_efficiency(t_serial, est_tfg, est_tbg),)) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tfg, est_tbg),)) print('Theoretical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tfg, est_tbg),)) if show_serial: prac_tbg = t_serial - est_tfg print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % ( parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg),)) print('Practical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg),)) print('Practical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg),))
def _test_buffered_generator_general(func, args, sleepfunc, target_looptime=1.0, serial_cheat=1, argmode=False, buffer_size=2): """ # We are going to generate output of func in the background while sleep # func is running in the foreground # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut #serial_cheat = 1 # approx division factor to run serial less times show_serial = True # target_looptime < 10. # 3.0 with ut.Timer('One* call to func') as t_fgfunc: results = [func(arg) for arg in args] functime = t_fgfunc.ellapsed / len(args) #sleepfunc = ut.is_prime with ut.Timer('One* call to sleep func') as t_sleep: if argmode: [sleepfunc(x) for x in results] else: [sleepfunc() for x in results] sleeptime = t_sleep.ellapsed / len(args) # compute amount of loops to run _num_loops = round(target_looptime // (functime + sleeptime)) num_data = int(_num_loops // len(args)) num_loops = int(num_data * len(args)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([args] * num_data) est_tsleep = sleeptime * num_loops est_tfunc = functime * num_loops est_needed_buffers = sleeptime / functime print('Estimated stats' + ut.repr4(ut.dict_subset(locals(), [ 'num_loops', 'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(func, data[:len(data) // serial_cheat]): if argmode: sleepfunc(x) else: sleepfunc() t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc() with ut.Timer('ut.generate') as t3: gen_ = ut.generate(func, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc( ) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tsleep, est_tfunc): return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % ( parallel_efficiency(t_serial, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc),)) if show_serial: prac_tfunc = t_serial - est_tsleep print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % ( parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc),))
def _test_buffered_generator_general2(bgfunc, bgargs, fgfunc, target_looptime=1.0, serial_cheat=1, buffer_size=2, show_serial=True): """ # We are going to generate output of bgfunc in the background while # fgfunc is running in the foreground. fgfunc takes results of bffunc as # args. # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut with ut.Timer('One* call to bgfunc') as t_bgfunc: results = [bgfunc(arg) for arg in bgargs] bgfunctime = t_bgfunc.ellapsed / len(bgargs) #fgfunc = ut.is_prime with ut.Timer('One* call to fgfunc') as t_fgfunc: [fgfunc(x) for x in results] fgfunctime = t_fgfunc.ellapsed / len(bgargs) # compute amount of loops to run est_looptime = (bgfunctime + fgfunctime) _num_loops = round(target_looptime // est_looptime) num_data = int(_num_loops // len(bgargs)) num_loops = int(num_data * len(bgargs)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([bgargs] * num_data) est_tfg = fgfunctime * num_loops est_tbg = bgfunctime * num_loops est_needed_buffers = fgfunctime / bgfunctime print('Estimated stats' + ut.dict_str( ut.dict_subset(locals(), [ 'num_loops', 'bgfunctime', 'fgfunctime', 'est_tfg', 'est_tbg', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(bgfunc, data[:len(data) // serial_cheat]): fgfunc(x) t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size) for x in gen_: fgfunc(x) with ut.Timer('ut.generate') as t3: gen_ = ut.generate(bgfunc, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: fgfunc(x) # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tfg, est_tbg): return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % (parallel_efficiency(t_serial, est_tfg, est_tbg), )) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tfg, est_tbg), )) print('Theoretical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tfg, est_tbg), )) if show_serial: prac_tbg = t_serial - est_tfg print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % (parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg), )) print('Practical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg), )) print('Practical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg), ))
def process_batch(model, X, y, theano_fn, fix_output=False, buffered=False, show=False, spatial=False, showprog=True, **kwargs): """ Compute the loss over all training batches. Passes data to function that splits it into batches and appropriately preproecsses the data. Then this function sends that data to theano. Then the results are packaged up nicely before returning. CommandLine: python -m ibeis_cnn --tf process_batch --verbose python -m ibeis_cnn --tf process_batch:0 --verbose python -m ibeis_cnn --tf process_batch:1 --verbose Example0: >>> # ENABLE_DOCTEST >>> from ibeis_cnn.batch_processing import * # NOQA >>> from ibeis_cnn import models >>> model = models.DummyModel(batch_size=128) >>> X, y = model.make_random_testdata(num=2000, seed=None) >>> model.init_arch() >>> theano_fn = model.build_predict_func() >>> kwargs = {'X_is_cv2_native': False, 'showprog': True, ... 'randomize_batch_order': True} >>> outputs_ = process_batch(model, X, y, theano_fn, **kwargs) >>> result = ut.dict_str(outputs_) >>> print(result) Example0: >>> # ENABLE_DOCTEST >>> from ibeis_cnn.batch_processing import * # NOQA >>> from ibeis_cnn import models >>> model = models.SiameseL2(batch_size=128, data_shape=(32, 32, 1), ... strict_batch_size=True) >>> X, y = model.make_random_testdata(num=2000, seed=None) >>> model.init_arch() >>> theano_fn = model.build_predict_func() >>> kwargs = {'X_is_cv2_native': False, 'showprog': True, ... 'randomize_batch_order': True} >>> outputs_ = process_batch(model, X, y, theano_fn, **kwargs) >>> result = ut.dict_str(outputs_) >>> print(result) Ignore: Xb, yb = batch_iter.next() assert Xb.shape == (8, 1, 4, 4) yb.shape == (8,) Ignore: X, y = model.make_random_testdata(num=2000, seed=None) kwargs = {'X_is_cv2_native': False, 'showprog': True, 'randomize_batch_order': True, 'time_thresh': .5, } print('Testing Unbuffered') batch_iter = batch_iterator(model, X, y, lbl=theano_fn.name, **kwargs) for Xb, yb in ut.ProgressIter(batch_iter, lbl=':EXEC FG'): [ut.is_prime(346373) for _ in range(2)] # Notice how the progress iters are not interlaced like # they are in the unbuffered version import sys sys.stdout.flush() print('Testing Buffered') sys.stdout.flush() batch_iter2 = batch_iterator(model, X, y, lbl=theano_fn.name, **kwargs) batch_iter2 = ut.buffered_generator(batch_iter2, buffer_size=4) print('Iterating') for Xb, yb in ut.ProgressIter(batch_iter2, lbl=':EXEC FG'): [ut.is_prime(346373) for _ in range(2)] """ import vtool as vt batch_output_list = [] output_names = [ str(outexpr.variable) if outexpr.variable.name is None else outexpr.variable.name for outexpr in theano_fn.outputs ] # augmented label list batch_target_list = [] show = VERBOSE_BATCH or show # Break data into generated batches # generated data with explicit iteration batch_iter = batch_iterator(model, X, y, **kwargs) if buffered: batch_iter = ut.buffered_generator(batch_iter) if showprog: bs = VERBOSE_BATCH < 1 num_batches = (X.shape[0] + model.batch_size - 1) // model.batch_size # progress iterator should be outside of this function batch_iter = ut.ProgressIter(batch_iter, nTotal=num_batches, lbl=theano_fn.name, freq=10, bs=bs, adjust=True) if y is None: # Labels are not known, only one argument for Xb, yb in batch_iter: pass batch_output = theano_fn(Xb) batch_output_list.append(batch_output) else: # TODO: sliced batches for Xb, yb in batch_iter: # Runs a batch through the network and updates the weights. Just # returns what it did batch_output = theano_fn(Xb, yb) batch_output_list.append(batch_output) batch_target_list.append(yb) if show: # Print the network output for the first batch print('--------------') print(ut.list_str(zip(output_names, batch_output))) print('Correct: ', yb) print('--------------') show = False # get outputs of each type unstacked_output_gen = ([bop[count] for bop in batch_output_list] for count, name in enumerate(output_names)) if spatial: unstacked_output_gen = list(unstacked_output_gen) stacked_output_list = [[] for _ in range(len(unstacked_output_gen))] for index, output in enumerate(unstacked_output_gen): output = np.vstack(output) stacked_output_list[index] = output else: stacked_output_list = [ vt.safe_cat(_output_unstacked, axis=0) # concatenate_hack(_output_unstacked, axis=0) for _output_unstacked in unstacked_output_gen ] outputs_ = dict(zip(output_names, stacked_output_list)) if y is not None: auglbl_list = np.hstack(batch_target_list) outputs_['auglbl_list'] = auglbl_list if fix_output: # batch iteration may wrap-around returned data. slice off the padding num_inputs = X.shape[0] / model.data_per_label_input num_outputs = num_inputs * model.data_per_label_output for key in six.iterkeys(outputs_): outputs_[key] = outputs_[key][0:num_outputs] encoder = getattr(model, 'encoder', None) if encoder is not None and 'predictions' in outputs_: pred = outputs_['predictions'] outputs_['labeled_predictions'] = encoder.inverse_transform(pred) return outputs_
def _test_buffered_generator_general(func, args, sleepfunc, target_looptime=1.0, serial_cheat=1, argmode=False, buffer_size=2): """ # We are going to generate output of func in the background while sleep # func is running in the foreground # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ import utool as ut #serial_cheat = 1 # approx division factor to run serial less times show_serial = True # target_looptime < 10. # 3.0 with ut.Timer('One* call to func') as t_fgfunc: results = [func(arg) for arg in args] functime = t_fgfunc.ellapsed / len(args) #sleepfunc = ut.is_prime with ut.Timer('One* call to sleep func') as t_sleep: if argmode: [sleepfunc(x) for x in results] else: [sleepfunc() for x in results] sleeptime = t_sleep.ellapsed / len(args) # compute amount of loops to run _num_loops = round(target_looptime // (functime + sleeptime)) num_data = int(_num_loops // len(args)) num_loops = int(num_data * len(args)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([args] * num_data) est_tsleep = sleeptime * num_loops est_tfunc = functime * num_loops est_needed_buffers = sleeptime / functime print('Estimated stats' + ut.dict_str( ut.dict_subset(locals(), [ 'num_loops', 'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat', 'buffer_size', 'est_needed_buffers', ]))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(func, data[:len(data) // serial_cheat]): if argmode: sleepfunc(x) else: sleepfunc() t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc() with ut.Timer('ut.generate') as t3: gen_ = ut.generate(func, data, chunksize=buffer_size, quiet=1, verbose=0) for x in gen_: if argmode: sleepfunc(x) else: sleepfunc() # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tsleep, est_tfunc): return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % (parallel_efficiency(t_serial, est_tsleep, est_tfunc), )) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc), )) print('Theoretical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc), )) if show_serial: prac_tfunc = t_serial - est_tsleep print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % (parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc), )) print('Practical gain (ut.buffered_generator) = %.3f%%' % (parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc), )) print('Practical gain (ut.generate) = %.2f%%' % (parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc), ))