def Main(): os.environ['CUDA_VISIBLE_DEVICES'] = '1' all_ckpts = glob.glob('./models/{}'.format(args.glob)) all_ckpts = glob.glob('./models/' + str(args.dataset) + '*.pt') print(args.blacklist) if args.blacklist: all_ckpts = [ckpt for ckpt in all_ckpts if args.blacklist not in ckpt] print(all_ckpts) selected_ckpts = all_ckpts oracle_cards = LoadOracleCardinalities() print('ckpts', selected_ckpts) os.environ['CUDA_VISIBLE_DEVICES'] = '1' if not args.run_bn: # OK to load tables now table, train_data, oracle_est = MakeTable() cols_to_train = table.columns Ckpt = collections.namedtuple( 'Ckpt', 'epoch model_bits bits_gap path loaded_model seed') parsed_ckpts = [] for s in selected_ckpts: if args.order is None: z = re.match('.+model([\d\.]+)-data([\d\.]+).+seed([\d\.]+).*.pt', s) else: z = re.match( '.+model([\d\.]+)-data([\d\.]+).+seed([\d\.]+)-order.*.pt', s) assert z model_bits = float(z.group(1)) data_bits = float(z.group(2)) seed = int(z.group(3)) print('model_bits:', model_bits) bits_gap = model_bits - data_bits order = None if args.order is not None: order = list(args.order) if args.heads > 0: model = MakeTransformer(cols_to_train=table.columns, fixed_ordering=order, seed=seed) else: if args.dataset in ['dmv-tiny', 'dmv', 'forest', 'power']: model = MakeMade( scale=args.fc_hiddens, cols_to_train=table.columns, seed=seed, fixed_ordering=order, ) else: assert False, args.dataset assert order is None or len(order) == model.nin, order ReportModel(model) print('Loading ckpt:', s) model.load_state_dict(torch.load(s)) model.eval() print(s, bits_gap, seed) parsed_ckpts.append( Ckpt(path=s, epoch=None, model_bits=model_bits, bits_gap=bits_gap, loaded_model=model, seed=seed)) # Estimators to run. if args.run_bn: estimators = RunNParallel(estimator_factory=MakeBnEstimators, parallelism=50, rng=np.random.RandomState(1234), num=args.num_queries, num_filters=None, oracle_cards=oracle_cards) else: print(parsed_ckpts) for c in parsed_ckpts: print(c.loaded_model, table, args.psample, args.column_masking) estimators = [ estimators_lib.ProgressiveSampling( c.loaded_model, table, args.psample, device=DEVICE, shortcircuit=args.column_masking) for c in parsed_ckpts ] print(estimators) for est, ckpt in zip(estimators, parsed_ckpts): est.name = str(est) + '_{}_{:.3f}'.format(ckpt.seed, ckpt.bits_gap) if args.inference_opts: print('Tracing forward_with_encoded_input()...') for est in estimators: encoded_input = est.model.EncodeInput( torch.zeros(args.psample, est.model.nin, device=DEVICE)) # NOTE: this line works with torch 1.0.1.post2 (but not 1.2). # The 1.2 version changes the API to # torch.jit.script(est.model) and requires an annotation -- # which was found to be slower. est.traced_fwd = torch.jit.trace( est.model.forward_with_encoded_input, encoded_input) if args.run_sampling: SAMPLE_RATIO = {'dmv': [0.0013]} # ~1.3MB. for p in SAMPLE_RATIO.get(args.dataset, [0.01]): estimators.append(estimators_lib.Sampling(table, p=p)) if args.run_maxdiff: estimators.append( estimators_lib.MaxDiffHistogram(table, args.maxdiff_limit)) # Other estimators can be appended as well. if len(estimators): RunN(table, cols_to_train, estimators, rng=np.random.RandomState(1234), num=args.num_queries, log_every=1, num_filters=None, oracle_cards=oracle_cards, oracle_est=oracle_est) print(len(estimators)) SaveEstimators(args.err_csv, estimators) print('...Done, result:', args.err_csv)
def Main(): all_ckpts = glob.glob('./models/{}'.format(args.glob)) if args.blacklist: all_ckpts = [ckpt for ckpt in all_ckpts if args.blacklist not in ckpt] selected_ckpts = all_ckpts oracle_cards = LoadOracleCardinalities() print('ckpts', selected_ckpts) if not args.run_bn: # OK to load tables now table, train_data, oracle_est = MakeTable() cols_to_train = table.columns Ckpt = collections.namedtuple( 'Ckpt', 'epoch model_bits bits_gap path loaded_model seed') parsed_ckpts = [] for s in selected_ckpts: if args.order is None: z = re.match('.+model([\d\.]+)-data([\d\.]+).+seed([\d\.]+).*.pt', s) else: z = re.match( '.+model([\d\.]+)-data([\d\.]+).+seed([\d\.]+)-order.*.pt', s) assert z model_bits = float(z.group(1)) data_bits = float(z.group(2)) seed = int(z.group(3)) bits_gap = model_bits - data_bits order = None if args.order is not None: order = list(args.order) if args.heads > 0: model = MakeTransformer(cols_to_train=table.columns, fixed_ordering=order, seed=seed) else: if args.dataset in ['dmv-tiny', 'dmv']: model = MakeMade( scale=args.fc_hiddens, cols_to_train=table.columns, seed=seed, fixed_ordering=order, ) else: assert False, args.dataset assert order is None or len(order) == model.nin, order ReportModel(model) print('Loading ckpt:', s) model.load_state_dict(torch.load(s)) model.eval() print(s, bits_gap, seed) parsed_ckpts.append( Ckpt(path=s, epoch=None, model_bits=model_bits, bits_gap=bits_gap, loaded_model=model, seed=seed)) # Estimators to run. if args.run_bn: ests, truths, training_time, test_time, estimators = RunNParallelNew(estimator_factory=MakeBnEstimatorsNew, parallelism=50, rng=np.random.RandomState(1234), num=args.num_queries, num_filters=None, oracle_cards=oracle_cards, test_file_path=args.test_file_path, single_data_path=args.single_data_path, join_data_path=args.join_data_path, join_num_path=args.join_num_path, join_sample_size=args.join_sample_size) # with open(args.test_file_path+'.bayesian.model', 'wb') as f: # dill.dump(models, f) #pickle.dump(models) ests_new = [] truths_new = [] for e, t in zip(ests, truths): if e >= 0: ests_new.append(e) truths_new.append(t) print('Training Time {}s'.format(training_time)) print('Testing Time Per Query {}ms'.format(test_time)) print_qerror(np.array(ests_new), np.array(truths_new)) print_mse(np.array(ests_new), np.array(truths_new)) print_mape(np.array(ests_new), np.array(truths_new)) print_pearson_correlation(np.array(ests_new), np.array(truths_new)) else: estimators = [ estimators_lib.ProgressiveSampling(c.loaded_model, table, args.psample, device=DEVICE, shortcircuit=args.column_masking) for c in parsed_ckpts ] for est, ckpt in zip(estimators, parsed_ckpts): est.name = str(est) + '_{}_{:.3f}'.format(ckpt.seed, ckpt.bits_gap) if args.inference_opts: print('Tracing forward_with_encoded_input()...') for est in estimators: encoded_input = est.model.EncodeInput( torch.zeros(args.psample, est.model.nin, device=DEVICE)) # NOTE: this line works with torch 1.0.1.post2 (but not 1.2). # The 1.2 version changes the API to # torch.jit.script(est.model) and requires an annotation -- # which was found to be slower. est.traced_fwd = torch.jit.trace( est.model.forward_with_encoded_input, encoded_input) if args.run_sampling: SAMPLE_RATIO = {'dmv': [0.0013]} # ~1.3MB. for p in SAMPLE_RATIO.get(args.dataset, [0.01]): estimators.append(estimators_lib.Sampling(table, p=p)) if args.run_maxdiff: estimators.append( estimators_lib.MaxDiffHistogram(table, args.maxdiff_limit)) # Other estimators can be appended as well. if len(estimators): RunN(table, cols_to_train, estimators, rng=np.random.RandomState(1234), num=args.num_queries, log_every=1, num_filters=None, oracle_cards=oracle_cards, oracle_est=oracle_est)