def gen_combos(tables, colname, val): '''Generate the required logical condition combinations to optionally join two tables''' combos = [] relevant_tables = tables_by_col(tables, colname) possible_combos = list(itertools.combinations(relevant_tables, 2)) if len(possible_combos) > 0: for table1, table2 in possible_combos: val1 = splitter(val) val2 = splitter(val) if colname == consts.YEAR and val in [ consts.LATEST, consts.OLDEST ]: years1 = TableManager.table_years[table1.full_name()] years2 = TableManager.table_years[table2.full_name()] val1 = [years1[val]] val2 = [years2[val]] cond1 = and_( getattr(table1, colname).in_(val1), getattr(table2, colname).in_(val2)) cond2 = and_( getattr(table1, colname).in_(val1), getattr(table2, colname) == None) cond3 = and_( getattr(table1, colname) == None, getattr(table2, colname).in_(val2)) combos.append(or_(cond1, cond2, cond3)) elif not len(possible_combos) and len(relevant_tables) == 1: # if we're just referencing a single table safe_colname = colname.rsplit(".", 1)[-1] combos.append(getattr(relevant_tables[0], safe_colname) == val) return combos
def where_filters(table, where_str): if not where_str: return [] filts = [] wheres = splitter(where_str) for where in wheres: colname, cond = where.split(":") cols = None if "/" in colname: cols = [getattr(table, c) for c in colname.split("/")] else: col = getattr(table, colname) method, value, negate = parse_method_and_val(cond) if method == "ne": expr = col != value elif method == "gt": expr = col > value elif method == "lt": expr = col < value elif method == "rt": expr = and_(cols[1] != 0, cols[0] / cols[1] < value) elif method == "rg": expr = and_(cols[1] != 0, cols[0] / cols[1] > value) else: expr = getattr(col, method)(value) if negate: expr = ~expr filts.append(expr) return filts
def multitable_value_filters(tables, api_obj): '''This method examines the values pased in query args (e.g. year=2014 or geo=04000US25), and applies the logic depending on the crosswalk mode. If the auto-crosswalk is not enabled, special logic (gen_combos) is required to preserve null values so the user will see that no value is available. Otherwise, if auto-crosswalk is enabled, treat each filter as an AND conjunction. Return the list of filters to be applied. ''' filts = [] for colname, val in api_obj.vars_and_vals.items(): related_tables = tables_by_col(tables, colname) if not api_obj.auto_crosswalk: filts += gen_combos(related_tables, colname, val) else: for table in related_tables: if colname == consts.YEAR and val in [ consts.LATEST, consts.OLDEST ]: years = TableManager.table_years[table_name(table)] my_year = years[val] filt = or_(table.year == my_year, table.year == None) api_obj.set_year(my_year) else: api_obj_tmp = crosswalk( table, ApiObject(vars_and_vals={colname: val}, limit=None, exclude=None)) new_vals = splitter(api_obj_tmp.vars_and_vals[colname]) mycol = getattr(table, colname) filt = mycol.in_(new_vals) filts.append(filt) return filts
def where_filters(tables, api_obj): '''Process the where query argument from an API call''' if not api_obj.where: return [] filts = [] wheres = splitter(api_obj.where) for where in wheres: colname, cond = where.split(":") target_var, filt_col = colname.rsplit(".", 1) if filt_col == 'sumlevel': filt_col = api_obj.shows_and_levels.keys()[0] cols = get_column_from_tables(tables, target_var, False) table = tables_by_col(tables, target_var, return_first=True) args = (table, "{}_filter".format(filt_col)) if hasattr(*args): func = getattr(*args) filts.append(func(cond)) else: cols = get_column_from_tables(tables, target_var, False) for col in cols: table = col.class_ filt_col = getattr(table, filt_col) filt = make_filter(filt_col, cond) filts.append(filt) return filts
def copy_where_literals(api_obj): if hasattr(api_obj, "where") and api_obj.where: wheres = splitter(api_obj.where) for where in wheres: colname, cond = where.split(":") if colname not in api_obj.vars_and_vals: api_obj.vars_and_vals[colname] = cond return api_obj
def crosswalk(table, api_obj): '''Given a table and an API object, determine if any crosswalks need to be performed''' pums_schema_name = BasePums.get_schema_name() pums5_schema_name = BasePums5.get_schema_name() registered_crosswalks = [ {"column": "industry_iocode", "schema": "bea", "mapping": industry_iocode_func}, {"column": "commodity_iocode", "schema": "bea", "mapping": iocode_map}, {"column": "naics", "schema": "bls", "mapping": pums_to_bls_naics_map}, {"column": "naics", "schema": "bls", "mapping": pums_to_growth_map, "table": GrowthI, "avoid": CesYi}, {"column": "soc", "schema": "bls", "mapping": pums_to_bls_soc_map}, {"column": "soc", "schema": "onet", "mapping": onet_parents}, {"column": "cip", "schema": "onet", "mapping": onet_cip_parents}, # cbp uses same naics coding as bls {"column": "naics", "schema": "cbp", "mapping": pums_to_bls_naics_map}, {"column": "naics", "schema": pums_schema_name, "mapping": naics_map}, {"column": "cip", "schema": pums_schema_name, "mapping": truncate_cip}, {"column": "geo", "schema": pums_schema_name, "mapping": pums_parent_puma}, {"column": "naics", "schema": pums5_schema_name, "mapping": naics_map}, {"column": "cip", "schema": pums5_schema_name, "mapping": truncate_cip}, {"column": "geo", "schema": pums5_schema_name, "mapping": pums_parent_puma}, {"column": "geo", "schema": "chr", "mapping": chr_parents} ] exclusives = {r["table"]: True for r in registered_crosswalks if "table" in r} for rcrosswalk in registered_crosswalks: column = rcrosswalk['column'] schema = rcrosswalk['schema'] mapping = rcrosswalk['mapping'] target_table = rcrosswalk['table'] if 'table' in rcrosswalk else None avoid = rcrosswalk['avoid'] if 'avoid' in rcrosswalk else None if avoid: if table.full_name() == avoid.full_name(): continue if column in api_obj.vars_and_vals.keys() and table.__table_args__['schema'] == schema: if table in exclusives and (not target_table or target_table.__tablename__ != table.__tablename__): continue curr_vals_str = api_obj.vars_and_vals[column] curr_vals = splitter(curr_vals_str) if isinstance(mapping, dict): new_vals = [mapping[val] if val in mapping else val for val in curr_vals] else: new_vals = [mapping(val, api_obj=api_obj) for val in curr_vals] new_val_str = OR.join(new_vals) api_obj.vars_and_vals[column] = new_val_str # detect if any changes actually happend if curr_vals_str != new_val_str: api_obj.subs[column] = new_val_str return api_obj
def process_value_filters(table, vars_and_vals, api_obj): filts = [] for var, val in vars_and_vals.items(): if var == consts.YEAR and val in [consts.LATEST, consts.OLDEST]: years = TableManager.table_years[table_name(table)] my_year = years[val] filt = table.year == my_year api_obj.set_year(my_year) elif consts.OR in val: filt = getattr(table, var).in_(splitter(val)) else: filt = getattr(table, var) == val filts.append(filt) return filts