def summarize_attributes( in_fc, group_fields, agg_cols, consolidations=None, melt_col=None ): """ Helper function to perform summarizations of input feature class defined by the group, agg, consolidate, and melt columns/objects provided Args: in_fc (str): path to feature class, typically this will be the result of an intersection of a summary fc and disaggregated fc group_fields (list): list of Column objects with optional rename attribute agg_cols (list): list of AggColumn objects with optional agg_method and rename attributes consolidations (list): list of Consolidation objects with optional consolidation method attribute melt_col (list): list of MeltColumn objects with optional agg_method, default value, and DomainColumn object Returns: pandas.Dataframe object with all data summarized according to specs """ # Validation (listify inputs, validate values) # - Group fields (domain possible) group_fields = _validateAggSpecs(group_fields, Column) gb_fields = [gf.name for gf in group_fields] dump_fields = [gf.name for gf in group_fields] keep_cols = [] null_dict = dict([(gf.name, gf.default) for gf in group_fields]) renames = [(gf.name, gf.rename) for gf in group_fields if gf.rename is not None] # - Agg columns (no domain expected) agg_cols = _validateAggSpecs(agg_cols, AggColumn) agg_methods = {} for ac in agg_cols: dump_fields.append(ac.name) keep_cols.append(ac.name) null_dict[ac.name] = ac.default agg_methods[ac.name] = ac.agg_method if ac.rename is not None: renames.append((ac.name, ac.rename)) # - Consolidations (no domain expected) if consolidations: consolidations = _validateAggSpecs(consolidations, Consolidation) for c in consolidations: if hasattr(c, "input_cols"): dump_fields += [ic for ic in c.input_cols] keep_cols.append(c.name) null_dict.update(c.defaultsDict()) agg_methods[c.name] = c.agg_method else: consolidations = [] # - Melt columns (domain possible) if melt_col: melt_col = _validateAggSpecs(melt_col, MeltColumn)[0] dump_fields += [ic for ic in melt_col.input_cols] gb_fields.append(melt_col.label_col) keep_cols.append(melt_col.val_col) null_dict.update(melt_col.defaultsDict()) agg_methods[melt_col.val_col] = melt_col.agg_method # Dump the intersect table to df dump_fields = list( set(dump_fields) ) # remove duplicated fields used in multiple consolidations/melts missing = PMT.which_missing(table=in_fc, field_list=dump_fields) if not missing: int_df = PMT.table_to_df( in_tbl=in_fc, keep_fields=dump_fields, null_val=null_dict ) else: raise Exception( f"\t\tthese cols were missing from the intersected FC: {missing}" ) # Consolidate columns for c in consolidations: if hasattr(c, "input_cols"): int_df[c.name] = int_df[c.input_cols].agg(c.cons_method, axis=1) # Melt columns if melt_col: id_fields = [f for f in gb_fields if f != melt_col.label_col] id_fields += [f for f in keep_cols if f != melt_col.val_col] int_df = int_df.melt( id_vars=id_fields, value_vars=melt_col.input_cols, var_name=melt_col.label_col, value_name=melt_col.val_col, ).reset_index() # Domains for group_field in group_fields: if group_field.domain is not None: group_field.apply_domain(int_df) gb_fields.append(group_field.domain.name) if melt_col: if melt_col.domain is not None: melt_col.apply_domain(int_df) gb_fields.append(melt_col.domain.name) # Group by - summarize all_fields = gb_fields + keep_cols sum_df = int_df[all_fields].groupby(gb_fields).agg(agg_methods).reset_index() # Apply renames if renames: sum_df.rename(columns=dict(renames), inplace=True) return sum_df
def join_attributes( to_table, to_id_field, from_table, from_id_field, join_fields="*", null_value=0.0, renames=None, drop_dup_cols=False, ): """ Helper function to join attributes of one table to another Args: to_table (str): path to table being extended to_id_field (str): primary key from_table (str): path to table being joined from_id_field (str): foreign key join_fields (list/str): list of fields to be added to to_table; Default: "*", indicates all fields are to be joined null_value (int/str): value to insert for nulls renames (dict): key/value pairs of existing field names/ new field names drop_dup_cols (bool): flag to eliminate duplicated fields Returns: None """ # If all columns, get their names if renames is None: renames = {} if join_fields == "*": join_fields = [ f.name for f in arcpy.ListFields(from_table) if not f.required and f.name != from_id_field ] # List expected columns based on renames dict expected_fields = [renames.get(jf, jf) for jf in join_fields] # Check if expected outcomes will collide with fields in the table if drop_dup_cols: # All relevant fields in table (excluding the field to join by) tbl_fields = [ f.name for f in arcpy.ListFields(to_table) if f.name != to_id_field ] # List of which fields to drop drop_fields = [d for d in expected_fields if d in tbl_fields] # join_fields # If all the fields to join will be dropped, exit if len(join_fields) == len(drop_fields): print("--- --- no new fields") return # TODO: what if we want to update these fields? else: drop_fields = [] # Dump from_table to df dump_fields = [from_id_field] + join_fields df = PMT.table_to_df( in_tbl=from_table, keep_fields=dump_fields, null_val=null_value ) # Rename columns and drop columns as needed if renames: df.rename(columns=renames, inplace=True) if drop_fields: df.drop(columns=drop_fields, inplace=True) # Join cols from df to to_table print(f"--- --- {list(df.columns)} to {to_table}") PMT.extend_table_df( in_table=to_table, table_match_field=to_id_field, df=df, df_match_field=from_id_field, )