def test2(): metadata = MetaData() eng = predictiondb.get_engine('prediction') con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng) s = select([tbl]) rp = con.execute(s) results = rp.fetchall() print(results)
def test2(dbname): metadata = MetaData() eng = predictiondb.get_engine(dbname) con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng) s = select([tbl]) rp = con.execute(s) results = rp.fetchall() print(results)
def test_make_file_of_unique_runs_and_subs(dbname): metadata = MetaData() eng = predictiondb.get_engine(dbname) con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng) s = select([tbl.c.run, tbl.c.subrun]) rp = con.execute(s) results = rp.fetchall() runs_subs = set() _ = [runs_subs.add(tuple(i)) for i in results] runs_subs = list(runs_subs) sorted_runs_subs = sorted(runs_subs, key=lambda x: x[1]) f = open('runs_subs.txt', 'w') for i in sorted_runs_subs: f.write(str(i) + '\n') f.close()
def test1(): metadata = MetaData() eng = predictiondb.get_engine('prediction') con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng) ins = tbl.insert().values( run=1, subrun=1, gate=1, phys_evt=1, segment=0, prob00=0.95, prob01=0.05, prob02=0.00, prob03=0.00, prob04=0.00, prob05=0.00, prob06=0.00, prob07=0.00, prob08=0.00, prob09=0.00, prob10=0.00) result = con.execute(ins) return result
def test1(dbname): metadata = MetaData() eng = predictiondb.get_engine(dbname) con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng) ins = tbl.insert().values( run=1, subrun=1, gate=1, phys_evt=1, segment=0, prob00=0.95, prob01=0.05, prob02=0.00, prob03=0.00, prob04=0.00, prob05=0.00, prob06=0.00, prob07=0.00, prob08=0.00, prob09=0.00, prob10=0.00) result = con.execute(ins) return result
def categorical_predict(build_cnn_fn, hyperpars, imgdat, runopts, networkstr, get_eventids_hits_and_targets_fn, get_id_tagged_inputlist_fn): """ Make predictions based on the model _only_ (e.g., this routine should be used to produce prediction db's quickly or for data) `get_eventids_hits_and_targets_fn` needs to extract from a data slice a tuple of (eventids, [inputs], targets), where `[inputs]` might hold a single view or all three, etc. """ logger.info("Loading data for prediction...") train_sizes, valid_sizes, test_sizes = \ get_and_print_dataset_subsizes(runopts['data_file_list']) used_sizes, used_data_size = get_used_data_sizes_for_testing( train_sizes, valid_sizes, test_sizes, runopts['test_all_data']) metadata = None try: import predictiondb from sqlalchemy import MetaData except ImportError: logger.info("Cannot import sqlalchemy...") write_db = False if runopts['write_db']: db_tbl_fun = None if networkstr['noutputs'] == 67: db_tbl_fun = predictiondb.get_67segment_prediction_table elif networkstr['noutputs'] == 11: db_tbl_fun = predictiondb.get_11segment_prediction_table else: raise Exception('Invalid number of outputs for DB tables.') tstamp = get_tstamp_from_model_name(runopts['save_model_file']) metadata = MetaData() dbname = 'prediction' + tstamp eng = predictiondb.get_engine(dbname) con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng, get_table_fn=db_tbl_fun) # Prepare Theano variables for inputs inputlist = networkstr['input_list'] # Build the model network = build_cnn_fn(inputlist=inputlist, imgw=imgdat['imgw'], imgh=imgdat['imgh'], convpooldictlist=networkstr['topology'], nhidden=networkstr['nhidden'], dropoutp=networkstr['dropoutp'], noutputs=networkstr['noutputs'], depth=networkstr['img_depth']) with np.load(runopts['save_model_file']) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Compute the prediction test_prediction = lasagne.layers.get_output(network, deterministic=True) test_prediction_values = T.argmax(test_prediction, axis=1) pred_fn = theano.function(inputlist, [test_prediction, test_prediction_values], allow_input_downcast=True) logger.info("Starting prediction...") test_slices = [] for tsize in used_sizes: test_slices.append(slices_maker(tsize, slice_size=50000)) test_set = None evtcounter = 0 verbose_evt_print_freq = hyperpars['batchsize'] * 4 for i, data_file in enumerate(runopts['data_file_list']): for tslice in test_slices[i]: t0 = time.time() test_set = None if runopts['test_all_data']: test_set = load_all_datasubsets(data_file, tslice) else: test_set = load_datasubset(data_file, 'test', tslice) _, test_dstream = make_scheme_and_stream(test_set, hyperpars['batchsize'], shuffle=False) t1 = time.time() logger.info(" Loading slice {} from {} took {:.3f}s.".format( tslice, data_file, t1 - t0)) logger.debug(" dset sources: {}".format( test_set.provides_sources)) t0 = time.time() for data in test_dstream.get_epoch_iterator(): eventids, hits_list = get_id_tagged_inputlist_fn(data) probs, pred = pred_fn(*hits_list) evtcounter += hyperpars['batchsize'] if runopts['write_db']: for i, evtid in enumerate(eventids): filldb(tbl, con, evtid, pred[i], probs[i]) if runopts['be_verbose']: if evtcounter % verbose_evt_print_freq == 0: logger.info("processed {}/{}".format( evtcounter, used_data_size)) t1 = time.time() logger.info(" -Iterating over the slice took {:.3f}s.".format(t1 - t0)) del test_set del test_dstream logger.info("Finished producing predictions!")
def categorical_predict( build_cnn_fn, hyperpars, imgdat, runopts, networkstr, get_eventids_hits_and_targets_fn, get_id_tagged_inputlist_fn ): """ Make predictions based on the model _only_ (e.g., this routine should be used to produce prediction db's quickly or for data) `get_eventids_hits_and_targets_fn` needs to extract from a data slice a tuple of (eventids, [inputs], targets), where `[inputs]` might hold a single view or all three, etc. """ logger.info("Loading data for prediction...") train_sizes, valid_sizes, test_sizes = \ get_and_print_dataset_subsizes(runopts['data_file_list']) used_sizes, used_data_size = get_used_data_sizes_for_testing( train_sizes, valid_sizes, test_sizes, runopts['test_all_data'] ) metadata = None try: import predictiondb from sqlalchemy import MetaData except ImportError: logger.info("Cannot import sqlalchemy...") write_db = False if runopts['write_db']: tstamp = get_tstamp_from_model_name(runopts['save_model_file']) metadata = MetaData() dbname = 'prediction' + tstamp eng = predictiondb.get_engine(dbname) con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng) # Prepare Theano variables for inputs inputlist = networkstr['input_list'] # Build the model network = build_cnn_fn(inputlist=inputlist, imgw=imgdat['imgw'], imgh=imgdat['imgh'], convpooldictlist=networkstr['topology'], nhidden=networkstr['nhidden'], dropoutp=networkstr['dropoutp'], noutputs=networkstr['noutputs'], depth=networkstr['img_depth'] ) with np.load(runopts['save_model_file']) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Compute the prediction test_prediction = lasagne.layers.get_output(network, deterministic=True) test_prediction_values = T.argmax(test_prediction, axis=1) pred_fn = theano.function(inputlist, [test_prediction, test_prediction_values], allow_input_downcast=True) logger.info("Starting prediction...") test_slices = [] for tsize in used_sizes: test_slices.append(slices_maker(tsize, slice_size=50000)) test_set = None evtcounter = 0 verbose_evt_print_freq = hyperpars['batchsize'] * 4 for i, data_file in enumerate(runopts['data_file_list']): for tslice in test_slices[i]: t0 = time.time() test_set = None if runopts['test_all_data']: test_set = load_all_datasubsets(data_file, tslice) else: test_set = load_datasubset(data_file, 'test', tslice) _, test_dstream = make_scheme_and_stream(test_set, hyperpars['batchsize'], shuffle=False) t1 = time.time() logger.info(" Loading slice {} from {} took {:.3f}s.".format( tslice, data_file, t1 - t0) ) logger.debug( " dset sources: {}".format(test_set.provides_sources) ) t0 = time.time() for data in test_dstream.get_epoch_iterator(): eventids, hits_list = get_id_tagged_inputlist_fn(data) probs, pred = pred_fn(*hits_list) evtcounter += hyperpars['batchsize'] if runopts['write_db']: for i, evtid in enumerate(eventids): filldb(tbl, con, evtid, pred[i], probs[i]) if runopts['be_verbose']: if evtcounter % verbose_evt_print_freq == 0: logger.info("processed {}/{}". format(evtcounter, used_data_size)) t1 = time.time() logger.info(" -Iterating over the slice took {:.3f}s.".format(t1 - t0)) del test_set del test_dstream logger.info("Finished producing predictions!")
def categorical_predict(build_cnn=None, data_file_list=None, views='xuv', imgw=50, imgh=50, target_idx=5, save_model_file='./params_file.npz', be_verbose=False, convpooldictlist=None, nhidden=None, dropoutp=None, write_db=True, test_all_data=False, debug_print=False, noutputs=11): """ Make predictions based on the model _only_ (e.g., this routine should be used to produce prediction db's quickly or for data) noutputs=11 for zsegments, other vals for planecodes, etc. """ print("Loading data for testing...") train_sizes, valid_sizes, test_sizes = \ get_and_print_dataset_subsizes(data_file_list) used_sizes, used_data_size = get_used_data_sizes_for_testing(train_sizes, valid_sizes, test_sizes, test_all_data) metadata = None try: import predictiondb from sqlalchemy import MetaData except ImportError: print("Cannot import sqlalchemy...") write_db = False if write_db: tstamp = get_tstamp_from_model_name(save_model_file) metadata = MetaData() dbname = 'prediction' + tstamp eng = predictiondb.get_engine(dbname) con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table(metadata, eng) # Prepare Theano variables for inputs input_var_x = T.tensor4('inputs') input_var_u = T.tensor4('inputs') input_var_v = T.tensor4('inputs') inputlist = build_inputlist(input_var_x, input_var_u, input_var_v, views) # Build the model network = build_cnn(inputlist=inputlist, imgw=imgw, imgh=imgh, convpooldictlist=convpooldictlist, nhidden=nhidden, dropoutp=dropoutp, noutputs=noutputs) with np.load(save_model_file) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Compute the prediction test_prediction = lasagne.layers.get_output(network, deterministic=True) test_prediction_values = T.argmax(test_prediction, axis=1) pred_fn = theano.function(inputlist, [test_prediction, test_prediction_values], allow_input_downcast=True) print("Starting prediction...") test_slices = [] for tsize in used_sizes: test_slices.append(slices_maker(tsize, slice_size=50000)) test_set = None evtcounter = 0 batch_size = 500 evt_print_freq = batch_size * 4 for i, data_file in enumerate(data_file_list): for tslice in test_slices[i]: t0 = time.time() test_set = None if test_all_data: test_set = load_all_datasubsets(data_file, tslice) else: test_set = load_datasubset(data_file, 'test', tslice) _, test_dstream = make_scheme_and_stream(test_set, batch_size, shuffle=False) t1 = time.time() print(" Loading slice {} from {} took {:.3f}s.".format( tslice, data_file, t1 - t0)) if debug_print: print(" dset sources:", test_set.provides_sources) t0 = time.time() for data in test_dstream.get_epoch_iterator(): eventids, hits_list = \ get_id_tagged_inputlist_from_data(data, views) probs, pred = pred_fn(*hits_list) evtcounter += batch_size if write_db: for i, evtid in enumerate(eventids): filldb(tbl, con, evtid, pred[i], probs[i]) if be_verbose: if evtcounter % evt_print_freq == 0: print("processed {}/{}". format(evtcounter, used_data_size)) t1 = time.time() print(" -Iterating over the slice took {:.3f}s.".format(t1 - t0)) del test_set del test_dstream print("Finished producing predictions!")
def test3(): metadata = MetaData() eng = predictiondb.get_engine('prediction') con = predictiondb.get_connection(eng) tbl = predictiondb.get_active_table( metadata, eng, predictiondb.get_67segment_prediction_table ) ins = tbl.insert().values( run=1, subrun=1, gate=1, phys_evt=1, segment=0, prob00=0.80, prob01=0.05, prob02=0.00, prob03=0.00, prob04=0.00, prob05=0.01, prob06=0.00, prob07=0.00, prob08=0.00, prob09=0.00, prob10=0.01, prob11=0.01, prob12=0.00, prob13=0.00, prob14=0.00, prob15=0.01, prob16=0.01, prob17=0.00, prob18=0.00, prob19=0.00, prob20=0.01, prob21=0.00, prob22=0.00, prob23=0.00, prob24=0.00, prob25=0.01, prob26=0.00, prob27=0.00, prob28=0.00, prob29=0.00, prob30=0.01, prob31=0.00, prob32=0.00, prob33=0.00, prob34=0.00, prob35=0.01, prob36=0.00, prob37=0.00, prob38=0.00, prob39=0.00, prob40=0.01, prob41=0.00, prob42=0.00, prob43=0.00, prob44=0.00, prob45=0.01, prob46=0.00, prob47=0.00, prob48=0.00, prob49=0.00, prob50=0.01, prob51=0.00, prob52=0.00, prob53=0.00, prob54=0.00, prob55=0.01, prob56=0.00, prob57=0.00, prob58=0.00, prob59=0.00, prob60=0.01, prob61=0.00, prob62=0.00, prob63=0.00, prob64=0.00, prob65=0.01, prob66=0.00 ) result = con.execute(ins) return result