def emp_de_two(tables, cols, col_funcs): ''' Reduce a list of tables to a single table that have the required cols in the list :param tables: a list of dicts with col_name (key) and entries (value) :param cols: a list of col_names :param col_funcs: functions to executes on column entries, a dict :return: a single dict with col_name (key) and entries (value) ''' # reduce each table of the list reduced = [man.reduce_tb(tb, cols) for tb in tables] ''' def check_tb(table): return {col_name: perf_func(col_name, entries) for col_name, entries in table.items()} def perf_func(col_name, entries): if col_funcs.get(col_name, None) is None: # if there's no need to perform col func, return entries as it is return entries # Get the name of the column function func_name = col_funcs[col_name] # perform col functions on column entries return [eva.FUNC[func_name](entries)] reduced = [check_tb(tb) for tb in reduced] ''' # Join all the tables together into one joined = man.combine(reduced) return joined
def test_reduce_table(self): exp = {'col3': self.col3, 'col5': self.col5} test = man.reduce_tb(self.tb6, ['col3', 'col5']) self.assertIsNone(test.get('col1', None)) self.assertIsNone(test.get('col2', None)) self.assertIsNone(test.get('col4', None)) self.assertIsNotNone(test.get('col3', None)) self.assertIsNotNone(test.get('col5', None)) self.assertEqual(exp, test)
def atomic_query(query_str, tables): ''' Does an atomic query using the provided tables :param query_str: an atomic query is a valid query with one EMPRESTA and DE :param tables: a dict of tb_name (keys): tables (values). Each table is a dict of col_names (keys): col_entries (values) :return: table (a dict), col_order (a list of col_names for printing) ''' ''' if 'EMPRESTA pokemon_id DE pokemon' in query_str: print(query_str) ''' # Get list of intervals (start, end) to mark the start and end of sub-exprns intervals = get_exprn_intervals(query_str) # Get and label the sub-expressions: "keyword + [variables]" sub_exprns = [query_str[itv[0]: itv[1]] for itv in intervals] l_sub_exprns = label_exprns(sub_exprns) ''' print('\nFor query: {0}'.format(query_str)) for label, exprn in l_sub_exprns.items(): print('{0}: {1}'.format(label, exprn)) ''' # Get the queried table names and columns tnames = de(l_sub_exprns[' DE ']) cols, col_funcs = emp(l_sub_exprns['EMPRESTA ']) # Get the queried tables tb_list = [tables.get(tname) for tname in tnames] # Get the conditional subexpression if exists onde = l_sub_exprns.get(' ONDE ', None) if onde: # eval_onde returns a dict {tname: table} filtered_tbs = eval_onde(onde.replace('ONDE', '', 1).strip(), tables) # Filter the tables by the conditions tb_list = [filtered_tbs[tname] for tname in tnames] # Finish off with executing EMPRESTA ... DE ... # Do the string strip() here so that there won't be a case of ONDE being # misinterpreted as DE order = l_sub_exprns.get(' ORDENATELOS X ') if order is not None: # Order the tables if there is ORDENATELOS X tb_list = order_cols(order.replace(' ORDENATELOS X ', ''), tb_list) # AGRUPATELOS grp_line = l_sub_exprns.get(' AGRUPATELOS X ') # TENIENDO grp_fil_line = l_sub_exprns.get(' TENIENDO ') # Add on the columns need for TENIENDO and AGRUPATELOS if they are not in # the column list new_cols = cols.copy() if grp_line: grp_col = grp_line.replace(' AGRUPATELOS X ', '').strip() if grp_col not in new_cols: new_cols.append(grp_col) if grp_fil_line: # split the line into var1, operator, var2 var1, oper, var2 = split_clauses(grp_fil_line.replace(' TENIENDO ', '') .strip('() ')) if '(' in var1: col_str = var1 else: col_str = var2 # Assumes the var1 will be a function with column name # Get the list of function names func_keys = list(eva.FUNC.keys()) col_str, __ = [(strip_func(col_str), func) for func in func_keys if func + '(' in col_str][0] if col_str not in new_cols: # Add the col if it is not in the list of columns new_cols.append(col_str) # Table list is reduced to a single table with all the columns stated in the # cols (list) result_tb = emp_de_two(tb_list, new_cols, col_funcs) # Get groupings if grouping is not None grouping = None if grp_line: grouping = get_grouping(grp_line.replace(' AGRUPATELOS X ', '').strip(), result_tb) if grp_fil_line: # If there is a filter group condition, filter grouping grouping = filter_grps(grp_fil_line.replace(' TENIENDO ', '') .strip('() '), result_tb, grouping) # Remove columns added for AGRUPATELOS and TENIENDO result_tb = man.reduce_tb(result_tb, cols) # Do the column functions on the result table # result_tb = do_col_funcs(result_tb, col_funcs) result_tb = do_col_funcs_two(result_tb, col_funcs, grouping) # return result table and the cols order of the query # If DIVERGENTE is true, remove duplicates if col_funcs['DIVERGENTE']: result_tb = man.rmv_dups(result_tb) # Check for SOLO expression, if there is cut_off the rows solo = l_sub_exprns.get(' SOLO ') if solo is not None: # Cut off rows result_tb = cutoff_rows(solo.strip(' SOLO '), result_tb) return result_tb, cols