def mass_query_avg(self, table, x="ss_list_price", y="ss_wholesale_cost", percent=5, number=default_mass_query_number, b_random_queries=True): q_range_half_length = (self.q_max - self.q_min) * percent / 100.0 / 2.0 self.logger.logger.info("Start generating queries") if b_random_queries: random.seed(1.0) query_centres = [] for i in range(number): query_centres.append(random.uniform(self.q_min, self.q_max)[0]) else: query_centres = generate_random.make_user_distribution(self.qe.kde, self.q_min, self.q_max, n=number) self.logger.logger.info("Finish generating " + str(number) + " queries, the center points are:") self.logger.logger.info(str(query_centres)) exact_results = [] exact_times = [] approx_results = [] approx_times = [] for i in range(number): self.logger.logger.info("start query No." + str(i + 1) + " out of " + str(number)) # random.uniform(self.q_min, self.q_max) q_centre = query_centres[i] q_left = q_centre - q_range_half_length q_right = q_centre + q_range_half_length sqlStr = "SELECT AVG(" + y + ") FROM " + str(table) + " WHERE " + \ x + " BETWEEN " + str(q_left[0]) + " AND " + str(q_right[0]) self.logger.logger.info(sqlStr) approx_result, approx_time = self.query_2d_avg(l=q_left, h=q_right) self.logger.logger.info(approx_result) exact_result, exact_time = self.query2mysql(sql=sqlStr) self.logger.logger.info(exact_result) if (exact_result is not None) and (exact_result is not 0): exact_results.append(exact_result) exact_times.append(exact_time) approx_results.append(approx_result) approx_times.append(approx_time) else: self.logger.logger.warning( "MYSQL returns None, so this record is ignored.") self.logger.logger.warning("MYSQL query results: " + str(exact_results)) self.logger.logger.warning("MYSQL query time cost: " + str(exact_times)) self.logger.logger.warning("Approximate query results: " + str(approx_results)) self.logger.logger.warning("Approximate query time cost: " + str(approx_times)) return exact_results, approx_results, exact_times, approx_times
def mass_query_correlation(self,table,x="ss_list_price",y="ss_wholesale_cost",percent=5,number=default_mass_query_number): q_range_half_length = (self.q_max-self.q_min)*percent/100.0/2.0 # random.seed(1.0) random_left_boundary = self.q_min + q_range_half_length random_right_boundary = self.q_max - q_range_half_length query_centres = generate_random.make_user_distribution(self.qe.kde, 30, 100,n=number) exact_results = [] exact_times = [] approx_results = [] approx_times = [] for i in range(number): self.logger.logger.info("start query No."+str(i+1) +" out of "+str(number)) q_centre = query_centres[i] #random.uniform(self.q_min,self.q_max) q_left = q_centre - q_range_half_length q_right = q_centre + q_range_half_length sqlStr = "SELECT CORR("+x+", "+y+") FROM " + str(table) +" WHERE "+x+" BETWEEN " + str(q_left[0]) +" AND " +str(q_right[0]) self.logger.logger.info(sqlStr) approx_result, approx_time = self.query_2d_correlation(l=q_left,h=q_right) self.logger.logger.info(approx_result) exact_result, exact_time = self.query2mysql(sql=sqlStr) self.logger.logger.info(exact_result) if (exact_result is not None) and (exact_result is not 0): exact_results.append(exact_result) exact_times.append(exact_time) approx_results.append(approx_result) approx_times.append(approx_time) else: self.logger.logger.warning("MYSQL returns None, so this record is ignored.") self.logger.logger.warning("MYSQL query results: " + str(exact_results)) self.logger.logger.warning("MYSQL query time cost: " + str(exact_times)) self.logger.logger.warning("Approximate query results: " + str(approx_results)) self.logger.logger.warning("Approximate query time cost: " + str(approx_times)) self.logger.logger.info("") return exact_results, approx_results, exact_times, approx_times
def generate_queries(self, table, x="ss_list_price", y="ss_wholesale_cost", percent=5, number=default_mass_query_number, b_random_queries=False, mode=2, file_pre="etrade_"): """Summary Args: table (TYPE): TABLE name x (str, optional): x y (str, optional): y percent (int, optional): query width, or query domain number (TYPE, optional): number of queries generated b_random_queries (bool, optional): Description mode (str, optional): 1 for percentile queries, QRegQL; 2 for other queries, QRegQL; 1 for percentile queries, SQL or HIVEQL; 2 for other queries, SQL or HIVEQL; """ if mode is 1: file_name_qreg = file_pre + "percentile" + str(percent) + ".qreg" file_name_hiveql = file_pre + \ "percentile" + str(percent) + ".hiveql" elif mode is 2: file_name_qreg = file_pre + "queries" + str(percent) + ".qreg" file_name_hiveql = file_pre + "queries" + str(percent) + ".hiveql" else: self.logger.logger.error( "Failed to generate queries, no mode selected!") sys.exit(errno.EPERM) q_range_half_length = (self.q_max - self.q_min) * percent / 100.0 / 2.0 self.logger.logger.info("Start generating queries") if b_random_queries: random.seed(1.0) query_centres = [] for i in range(number): if mode is 1: # _start = (self.q_max - self.q_min) * 0.4 # query_centres.append(random.uniform( # self.q_min + _start, self.q_max)[0]) query_centres.append(random.uniform(0.4, 1)) else: query_centres.append( random.uniform(self.q_min, self.q_max)[0]) else: query_centres = generate_random.make_user_distribution(self.qe.kde, self.q_min, self.q_max, n=number) self.logger.logger.info("Finish generating " + str(number) + " queries, the center points are:") self.logger.logger.info(str(query_centres)) exact_results = [] exact_times = [] approx_results = [] approx_times = [] if mode is 1: with open(file_name_hiveql, 'w+') as f_hiveql: with open(file_name_qreg, 'w+') as f_qreg: for i in range(number): q_centre = query_centres[i] qregStr = str(q_centre) sqlStr = "SELECT percentile_cont(" + x + \ ", " + str(q_centre) + ") FROM " + str(table) f_hiveql.write(sqlStr + "\n") f_qreg.write(qregStr + "\n") else: with open(file_name_hiveql, 'w+') as f_hiveql: with open(file_name_qreg, 'w+') as f_qreg: aggregates = ["COUNT", "SUM", "AVG", "MIN", "MAX"] for aggregate in aggregates: for i in range(number): q_centre = query_centres[i] q_left = q_centre - q_range_half_length q_right = q_centre + q_range_half_length qregStr = str(q_left[0]) + ", " + str(q_right[0]) sqlStr = "SELECT " + aggregate + "(" + y + ") FROM " + str(table) + " WHERE " + \ x + " BETWEEN " + \ str(q_left[0]) + " AND " + str(q_right[0]) f_hiveql.write(sqlStr + "\n") if aggregate is "COUNT": f_qreg.write(qregStr + "\n") for i in range(number): q_centre = query_centres[i] q_left = q_centre - q_range_half_length q_right = q_centre + q_range_half_length qregStr = str(q_left[0]) + ", " + str(q_right[0]) sqlStr = "SELECT " + "VARIANCE" + "(" + x + ") FROM " + str(table) + " WHERE " + \ x + " BETWEEN " + \ str(q_left[0]) + " AND " + str(q_right[0]) f_hiveql.write(sqlStr + "\n") self.logger.logger.info("Queries are written to file " + file_name_qreg + " and " + file_name_hiveql)