def evaluate(self, num_queries, done, estimators=None): model = self.model if isinstance(model, DataParallelPassthrough): model = model.module model.eval() if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.synchronize() results = {} if num_queries: if estimators is None: estimators = self.MakeProgressiveSamplers( model, self.train_data if self.factorize else self.table, do_fanout_scaling=(self.dataset == 'tpcds')) if self.eval_join_sampling: # None or an int. estimators = [ estimators_lib.JoinSampling(self.train_data, self.table, self.eval_join_sampling) ] assert self.loaded_queries is not None num_queries = min(len(self.loaded_queries), num_queries) for i in range(num_queries): print('Query {}:'.format(i), end=' ') query = self.loaded_queries[i] self.Query(estimators, oracle_card=None if self.oracle_cards is None else self.oracle_cards[i], query=query, table=self.table, oracle_est=self.oracle) if i % 100 == 0: for est in estimators: est.report() for est in estimators: results[str(est) + '_max'] = np.max(est.errs) results[str(est) + '_p99'] = np.quantile(est.errs, 0.99) results[str(est) + '_p95'] = np.quantile(est.errs, 0.95) results[str(est) + '_median'] = np.median(est.errs) est.report() series = pd.Series(est.query_dur_ms) print(series.describe()) series.to_csv(str(est) + '.csv', index=False, header=False) return results
def evaluate(self, num_queries, done, estimators=None): global met0, mee0, met1, mee1 met0 = [] mee0 = [] met1 = [] mee1 = [] model = self.model if isinstance(model, DataParallelPassthrough): model = model.module model.eval() if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.synchronize() results = {} if num_queries: if estimators is None: estimators = self.MakeProgressiveSamplers( model, self.train_data if self.factorize else self.table, do_fanout_scaling=(self.dataset == 'imdb')) if self.eval_join_sampling: # None or an int. estimators = [ estimators_lib.JoinSampling(self.train_data, self.table, self.eval_join_sampling) ] assert self.loaded_queries is not None num_queries = min(len(self.loaded_queries), num_queries) for i in range(num_queries): print('Query {}:'.format(i), end=' ') query = self.loaded_queries[i] self.Query(estimators, oracle_card=None if self.oracle_cards is None else self.oracle_cards[i], query=query, table=self.table, oracle_est=self.oracle) if i % 100 == 0: for est in estimators: est.report() # for est in estimators: # 鏆傛椂娉ㄩ噴 # MSE, MAPE, PCCs print('len0: ', len(mee0)) print('len1: ', len(mee1)) mse0 = mean_squared_error(mee0, met0) if (len(mee1) != 0): mse1 = mean_squared_error(mee1, met1) met0 = np.array(met0) mee0 = np.array(mee0) met1 = np.array(met1) mee1 = np.array(mee1) PCCs0 = sc.stats.pearsonr(mee0, met0) # 鐨皵閫婄浉鍏崇郴鏁? print('PCCs0:',PCCs0[0]) if (len(mee1) != 0): PCCs1 = sc.stats.pearsonr(mee1, met1) # 鐨皵閫婄浉鍏崇郴鏁? print('PCCs1:',PCCs1[0]) # mse = sum(np.square(met - mee))/len(met) mape0 = sum(np.abs((met0 - mee0) / met0)) / len(met0) * 100 if (len(mee1) != 0): mape1 = sum(np.abs((met1 - mee1) / met1)) / len(met1) * 100 print('MSE0: ', mse0) print('MAPE0: ', mape0) if (len(mee1) != 0): print('MSE1: ', mse1) print('MAPE1: ', mape1) dictest = {'est': mee0, 'tr': met0} dfest = pd.DataFrame(dictest) dfest.to_csv('result' + str(args.update) + '.csv', index=False, header=False) for est in estimators: results[str(est) + '_max'] = np.max(est.errs) results[str(est) + '_p99'] = np.quantile(est.errs, 0.99) results[str(est) + '_p95'] = np.quantile(est.errs, 0.95) results[str(est) + '_p90'] = np.quantile(est.errs, 0.90) results[str(est) + '_mean'] = np.mean(est.errs) results[str(est) + '_median'] = np.median(est.errs) est.report() series = pd.Series(est.query_dur_ms) print(series.describe()) series.to_csv(str(est) + '.csv', index=False, header=False) return results