def plantden_pk(dimapath): """ DEPRECATED returns a dataframe with tblplots, tbllines, tblplantdenheader and tblplantdenDetail joined. PrimaryKey field is made using formdate and plotkey """ arc = arcno() denhead = arcno.MakeTableView('tblPlantDenHeader', dimapath) dendet = arcno.MakeTableView('tblPlantDenDetail', dimapath) # dendet = arcno.MakeTableView(fam['plantden'][0], dimapath) # denhead = arcno.MakeTableView(fam['plantden'][1], dimapath) plantden = pd.merge(denhead,dendet, how="inner", on="RecKey") if 'tblLPIDetail' in ins.actual_list: allpks = lpi_pk(dimapath) elif 'tblGapDetail' in ins.actual_list: allpks = gap_pk(dimapath) else: print("a difficult one! no source of easy source of PK's in this dima!") # where to pull them from will depend on which table needs em, and # what that table has in terms of fields (plotkey, reckey, linekey etc.) pks = allpks.loc[:,["PrimaryKey", "EstablishDate", "FormDate", "RecKey","LineKey"]].copy() iso = arc.isolateFields(pks,'LineKey','PrimaryKey').copy() premerge = pd.merge(plantden,iso,how="inner", on="LineKey").drop_duplicates().copy() if tablename=="tblPlantDenHeader": iso = arc.isolateFields(premerge,'LineKey','PrimaryKey').copy() merge = pd.merge(denhead,iso, how="inner", on="LineKey") return merge elif tablename=="tblPlantDenDetail": iso = arc.isolateFields(premerge,'RecKey','PrimaryKey').copy() merge = pd.merge(dendet,iso, how="inner", on="RecKey") return merge
def gap_pk(dimapath): """ returns a dataframe with tblplots, tbllines, tblgapheader and tblgapDetail joined. PrimaryKey field is made using formdate and plotkey """ arc = arcno() gap_header = arcno.MakeTableView('tblGapHeader', dimapath) gap_detail = arcno.MakeTableView('tblGapDetail', dimapath) lines = arcno.MakeTableView('tblLines', dimapath) plots = arcno.MakeTableView('tblPlots', dimapath) # joins plot_line = pd.merge(plots, lines, how="inner", on="PlotKey") gaphead_detail = pd.merge(gap_header, gap_detail, how="inner", on="RecKey") plot_line_det = pd.merge(plot_line, gaphead_detail, how="inner", on="LineKey") # fixing dup fields # tmp1 = fix_fields(plot_line_det, 'DateModified') # tmp2 = fix_fields(tmp1, 'ElevationType') plot_line_det.FormDate = pd.to_datetime( plot_line_det.FormDate) if platform.system( ) == 'Linux' else plot_line_det.FormDate plot_pk = arc.CalculateField(plot_line_det, "PrimaryKey", "PlotKey", "FormDate") return plot_pk
def sperich_pk(dimapath, tablename): """ returns a dataframe with tblplots, tbllines, tblsperichheader and tblsperichDetail joined. PrimaryKey field is made using formdate and plotkey """ arc = arcno() spe_header = arcno.MakeTableView('tblSpecRichHeader', dimapath) spe_detail = arcno.MakeTableView('tblSpecRichDetail', dimapath) lines = arcno.MakeTableView('tblLines', dimapath) plots = arcno.MakeTableView('tblPlots', dimapath) # joins plot_line = pd.merge(plots, lines, how="inner", on="PlotKey") spehead_detail = pd.merge(spe_header, spe_detail, how="inner", on="RecKey") plot_line_det = pd.merge(plot_line, spehead_detail, how="inner", on="LineKey") plot_line_det.FormDate = pd.to_datetime( plot_line_det.FormDate) if platform.system( ) == 'Linux' else plot_line_det.FormDate plot_pk = arc.CalculateField(plot_line_det, "PrimaryKey", "PlotKey", "FormDate") if tablename == "tblSpecRichHeader": iso = arc.isolateFields(plot_pk, "LineKey", "PrimaryKey") merge = pd.merge(spe_header, iso, how="inner", on="LineKey") return merge elif tablename == "tblSpecRichDetail": iso = arc.isolateFields(plot_pk, "RecKey", "PrimaryKey") merge = pd.merge(spe_detail, iso, how="inner", on="RecKey") return merge
def table_check(tablename, path): """ returns true if table is present in a particular dima uses "arcno.actual_list" to compile a list of tables then checks if the chosen table is present in that list """ instance = arcno(path) tablelist = [i for i, j in instance.actual_list.items()] return True if tablename in tablelist else False
def bsne_pk(dimapath): """ returns a dataframe with tblplots, tblBSNE_Box, tblBSNE_Stack and tblBSNE_BoxCollection and tblBSNE_TrapCollection joined. if tblBSNE_TrapCollection does not exist in networkdima, skip it and join: box, stack and boxcollection. if it exists, join trapcollection and stack. PrimaryKey field is made using formdate and plotkey """ ddt = arcno.MakeTableView("tblBSNE_TrapCollection", dimapath) arc = arcno() if ddt.shape[0] > 0: ddt = arcno.MakeTableView("tblBSNE_TrapCollection", dimapath) stack = arcno.MakeTableView("tblBSNE_Stack", dimapath) # df = arc.AddJoin(stack, ddt, "StackID", "StackID") df = pd.merge(stack, ddt, how="inner", on="StackID") # df.collectDate = pd.to_datetime(df.collectDate) if platform.system()=='Linux' else df.collectDate if "collectDate" in df.columns: df.collectDate = pd.to_datetime(df.collectDate) df2 = arc.CalculateField(df, "PrimaryKey", "PlotKey", "collectDate") df2tmp = fix_fields(df2, "Notes") df2tmp2 = fix_fields(df2tmp, "DateModified") df2tmp3 = fix_fields(df2tmp2, "DateEstablished") return df2tmp3 else: box = arcno.MakeTableView("tblBSNE_Box", dimapath) stack = arcno.MakeTableView("tblBSNE_Stack", dimapath) boxcol = arcno.MakeTableView('tblBSNE_BoxCollection', dimapath) # differences 1 # dfx = pd.merge(stack, box[cols_dif1], left_index=True, right_index=True, how="outer") df = pd.merge(box, stack, how="inner", on="StackID") # df.collectDate = pd.to_datetime(df.collectDate) if platform.system()=='Linux' else df.collectDate df2 = pd.merge(df, boxcol, how="inner", on="BoxID") df2.collectDate = pd.to_datetime(df2.collectDate) if platform.system( ) == 'Linux' else df2.collectDate # fix df2 = arc.CalculateField(df2, "PrimaryKey", "PlotKey", "collectDate") df2tmp = fix_fields(df2, "Notes") df2tmp2 = fix_fields(df2tmp, "DateModified") df2tmp3 = fix_fields(df2tmp2, "DateEstablished") return df2tmp3
def set_model(self,fields=None, tablename=None, pg=None): if self.projectswitch=='tall': self.fields = fields self.tablename = tablename self.modelhandler = model_handler(self.path, self.fields, self.tablename) elif self.projectswitch=='dima': arc = arcno(self.path) for i,j in arc.actual_list.items(): if 'Box' not in i: pg_send(i,self.path) if pg==None else pg_send(i,self.path,1) else: print('handling not implemented')
def table_collector(path2mdbs): """ returns a list of all tables present in a folder of dimas because dimas may each have a different set of tables, this function goes through the list of tables per dima and appends any table not previously seen into an internal list which is ultimately returned. """ containing_folder = path2mdbs contained_files = os.listdir(containing_folder) table_list = [] for mdb_path in contained_files: if os.path.splitext(mdb_path)[1] == '.mdb' or os.path.splitext( mdb_path)[1] == '.accdb': instance = arcno(os.path.join(containing_folder, mdb_path)) for tablename, size in instance.actual_list.items(): if tablename not in table_list: table_list.append(tablename) return table_list
def qual_pk(dimapath): """ returns a dataframe with tblplots, tbllines, tblgapheader and tblgapDetail joined. PrimaryKey field is made using formdate and plotkey no dup fields """ arc = arcno() header = arcno.MakeTableView('tblQualHeader', dimapath) detail = arcno.MakeTableView('tblQualDetail', dimapath) # joins head_detail = pd.merge(header, detail, how="inner", on="RecKey") head_detail.FormDate = pd.to_datetime( head_detail.FormDate) if platform.system( ) == 'Linux' else head_detail.FormDate plot_pk = arc.CalculateField(head_detail, "PrimaryKey", "PlotKey", "FormDate") return plot_pk
def lpi_pk(dimapath): """ returns a dataframe with tblplots, tbllines, tbllpiheader and tblLPIDetail joined. PrimaryKey field is made using formdate and plotkey """ lpi_header = arcno.MakeTableView('tblLPIHeader', dimapath) lpi_detail = arcno.MakeTableView('tblLPIDetail', dimapath) lines = arcno.MakeTableView('tblLines', dimapath) plots = arcno.MakeTableView('tblPlots', dimapath) # joins plot_line = pd.merge(plots, lines, how="inner", on="PlotKey") plot_line.LineKey lpihead_detail = pd.merge(lpi_header, lpi_detail, how="inner", on="RecKey") len(lpihead_detail.PointLoc.unique()) plot_line_det = pd.merge(plot_line, lpihead_detail, how="inner", on="LineKey") plot_line_det.loc[:, ['FormDate', "RecKey", "LineKey"]] arc = arcno() # # tmp1 = fix_fields(plot_line_det, 'DateModified').copy() # tmp2 = fix_fields(tmp1,'ElevationType').copy() plot_line_det.FormDate = pd.to_datetime( plot_line_det.FormDate) if platform.system( ) == 'Linux' else plot_line_det.FormDate plot_pk = arc.CalculateField(plot_line_det, "PrimaryKey", "PlotKey", "FormDate") # plot_pk.drop_duplicates(["PrimaryKey", "PlotKey", "FormDate"]) return plot_pk
def main_translate(tablename: str, dimapath: str, debug=None): """ Translates between tables and different function argument strategies This function will map the tablename to an appropriate tuple of arguments. first block of 'a' argument tuple will check if the dima has 'BSNE' tables within the DIMA. It could still be a vegetation dima however, that will be checked further along on the no_pk function. Parameters ---------- tablename : str Name of the table in DIMA. example: 'tblLines' dimapath : str Physical path to DIMA Access file. example: 'c://Folder/dima_file.mdb' debug : any character or None (default value: None) Prints out how trayectory of the mapping process throughout the function. """ no_primary_key = ['tblPlots', 'tblLines', 'tblSpecies','tblSpeciesGeneric',\ 'tblSites','tblPlotNotes', 'tblSites'] soil_stab_primary_key = ['tblSoilStabDetail', 'tblSoilStabHeader'] soil_pit_primary_key = ['tblSoilPits', 'tblSoilPitHorizons'] plant_prod_primary_key = ['tblPlantProdDetail', 'tblPlantProdHeader'] bsne_primary_keys = ['tblBSNE_Box', 'tblBSNE_Stack','tblBSNE_BoxCollection',\ 'tblBSNE_TrapCollection'] switcher_arguments = { 'no_pk': (None, dimapath, tablename), 'no_pk_soilstab': ('soilstab', dimapath, None), 'no_pk_soilpits': ('soilpits', dimapath, None), 'no_pk_plantprod': ('plantprod', dimapath, None), 'yes_pk': dimapath, 'f': ('fake', dimapath, tablename) } # first check if tablename exists in the particular dima if table_check(tablename, dimapath): if tablename in no_primary_key: # no_pk branch network_check = 0 inst = arcno(dimapath) for i, j in inst.actual_list.items(): if any( [True for i, j in inst.actual_list.items() if 'BSNE' in i]): network_check = 2 else: network_check = 1 while network_check != 0: if network_check == 1: print('no_pk; netdima in path; line or plot' ) if debug else None df = switcher[tablename](*switcher_arguments['f']) network_check = 0 df = blank_fixer(df) df = significant_digits_fix_pandas(df) return df elif network_check == 2: print('no_pk; netdima in path; line or plot' ) if debug else None df = switcher[tablename](*switcher_arguments['no_pk']) network_check = 0 df = blank_fixer(df) df = significant_digits_fix_pandas(df) return df elif tablename in soil_stab_primary_key: # no_pk + soilstab branch print('no_pk; soilstab') if debug else None df = switcher[tablename](*switcher_arguments['no_pk_soilstab']) df = blank_fixer(df) df = significant_digits_fix_pandas(df) return df elif tablename in soil_pit_primary_key: # no_pk + soilpits branch print('no_pk; soilpits') if debug else None df = switcher[tablename](*switcher_arguments['no_pk_soilpits']) df = blank_fixer(df) df = significant_digits_fix_pandas(df) return df elif tablename in plant_prod_primary_key: # no_pk + plantprod branch print('no_pk; plantprod') if debug else None df = switcher[tablename](*switcher_arguments['no_pk_plantprod']) df = blank_fixer(df) df = significant_digits_fix_pandas(df) return df else: # lpi_pk, gap_pk, sperich_pk, plantden_pk, bsne_pk branch if tablename in bsne_primary_keys: print('bsne collection') if debug else None retdf = switcher[tablename](switcher_arguments['yes_pk']) retdf = blank_fixer(retdf) retdf = significant_digits_fix_pandas(retdf) retdf = openingsize_fixer(retdf) return retdf else: print('hmmm?') if debug else None df = switcher[tablename](switcher_arguments['yes_pk']) arc = arcno() iso = arc.isolateFields(df, tableswitch[tablename], "PrimaryKey").copy() iso.drop_duplicates([tableswitch[tablename]], inplace=True) target_table = arcno.MakeTableView(tablename, dimapath) retdf = pd.merge(target_table, iso, how="inner", on=tableswitch[tablename]) retdf = blank_fixer(retdf) retdf = significant_digits_fix_pandas(retdf) retdf = openingsize_fixer(retdf) return retdf else: print(f'table not in {os.path.basename(dimapath)}') pass
def no_pk(tablefam: str = None, dimapath: str = None, tablename: str = None): """ """ arc = arcno() ins = arcno(dimapath) fam = { 'plantprod': ['tblPlantProdDetail', 'tblPlantProdHeader'], 'soilstab': ['tblSoilStabDetail', 'tblSoilStabHeader'], 'soilpit': ['tblSoilPits', 'tblSoilPitHorizons'], 'plantden': ['tblPlantDenDetail', 'tblPlantDenHeader'], } try: if tablefam is not None and ('plantprod' in tablefam): header = arcno.MakeTableView(fam['plantprod'][1], dimapath) detail = arcno.MakeTableView(fam['plantprod'][0], dimapath) head_det = pd.merge(header, detail, how="inner", on="RecKey") head_det.FormDate = pd.to_datetime( head_det.FormDate) if platform.system( ) == 'Linux' else head_det.FormDate head_det = arc.CalculateField(head_det, "PrimaryKey", "PlotKey", "FormDate") if tablename == "tblPlantProdHeader": iso = arc.isolateFields(head_det, 'PlotKey', 'PrimaryKey').copy() merge = pd.merge(header, iso, how="inner", on="PlotKey") return merge elif tablename == "tblPlantProdDetail": iso = arc.isolateFields(head_det, 'RecKey', 'PrimaryKey').copy() merge = pd.merge(detail, iso, how="inner", on="RecKey") return merge elif tablefam is not None and ('soilstab' in tablefam): header = arcno.MakeTableView(fam['soilstab'][1], dimapath) detail = arcno.MakeTableView(fam['soilstab'][0], dimapath) head_det = pd.merge(header, detail, how="inner", on="RecKey") head_det.FormDate = pd.to_datetime( head_det.FormDate) if platform.system( ) == 'Linux' else head_det.FormDate head_det = arc.CalculateField(head_det, "PrimaryKey", "PlotKey", "FormDate") if tablename == "tblSoilStabHeader": iso = arc.isolateFields(head_det, 'PlotKey', 'PrimaryKey').copy() merge = pd.merge(header, iso, how="inner", on="PlotKey").drop_duplicates() return merge elif tablename == "tblSoilStabDetail": iso = arc.isolateFields(head_det, 'RecKey', 'PrimaryKey').copy() merge = pd.merge( detail, iso, how="inner", on="RecKey").drop_duplicates(subset=["BoxNum"]) return merge elif tablefam is not None and ('soilpit' in tablefam): # print("soilpit") pits = arcno.MakeTableView(fam['soilpit'][0], dimapath) horizons = arcno.MakeTableView(fam['soilpit'][1], dimapath) pits_horizons = pd.merge(pits, horizons, how="inner", on="SoilKey") if 'tblLPIDetail' in ins.actual_list: allpks = lpi_pk(dimapath) elif 'tblGapDetail' in ins.actual_list: allpks = gap_pk(dimapath) else: print( "a difficult one! no source of easy source of PK's in this dima!" ) # where to pull them from will depend on which table needs em, and # what that table has in terms of fields (plotkey, reckey, linekey etc.) pks = allpks.loc[:, [ "PrimaryKey", "EstablishDate", "FormDate", "DateModified", "PlotKey" ]].copy() iso = arc.isolateFields(pks, 'PlotKey', 'PrimaryKey').copy() premerge = pd.merge(pits_horizons, iso, how="inner", on="PlotKey").drop_duplicates().copy() if tablename == "tblSoilPits": iso = arc.isolateFields(premerge, 'PlotKey', 'PrimaryKey').drop_duplicates().copy() merge = pd.merge(pits, iso, how="inner", on="PlotKey") return merge elif tablename == "tblSoilPitHorizons": iso = arc.isolateFields(premerge, 'HorizonKey', 'PrimaryKey').drop_duplicates().copy() merge = pd.merge(horizons, iso, how="inner", on="HorizonKey") return merge elif tablefam is not None and ('plantden' in tablefam): dendet = arcno.MakeTableView(fam['plantden'][0], dimapath) denhead = arcno.MakeTableView(fam['plantden'][1], dimapath) plantden = pd.merge(denhead, dendet, how="inner", on="RecKey") plantden = arc.CalculateField(plantden, "PrimaryKey", "LineKey", "FormDate") if tablename == "tblPlantDenHeader": iso = arc.isolateFields(plantden, 'LineKey', 'PrimaryKey').copy() merge = pd.merge(denhead, iso, how="inner", on="LineKey") return merge elif tablename == "tblPlantDenDetail": iso = arc.isolateFields(plantden, 'RecKey', 'PrimaryKey').copy() merge = pd.merge(dendet, iso, how="inner", on="RecKey") return merge else: no_pk_df = arcno.MakeTableView(tablename, dimapath) # print('netdima in path') if ('Network_DIMAs' in dimapath) and (tablefam == None): if ('tblPlots' in tablename) or ('tblLines' in tablename): print("lines,plots; networkdima in the path") fulldf = bsne_pk(dimapath) iso = arc.isolateFields(fulldf, 'PlotKey', 'PrimaryKey').copy() no_pk_df = pd.merge( no_pk_df, iso, how="inner", on=["PlotKey"]).drop_duplicates([ "LineKey", "PrimaryKey" ]) if "tblLines" in tablename else pd.merge( no_pk_df, iso, how="inner", on=["PlotKey"]).drop_duplicates(["PrimaryKey"]) return no_pk_df else: print("network, but not line or plot, no pk") if 'Sites' in tablename: no_pk_df = no_pk_df[(no_pk_df.SiteKey != '888888888') & (no_pk_df.SiteKey != '999999999')] return no_pk_df else: return no_pk_df elif ('Network_DIMAs' in dimapath) and ('fake' in tablefam): if ('tblPlots' in tablename) or ('tblLines' in tablename): fulldf = lpi_pk(dimapath) iso = arc.isolateFields(fulldf, 'PlotKey', 'PrimaryKey').copy() no_pk_df = pd.merge( no_pk_df, iso, how="inner", on=["PlotKey"]).drop_duplicates([ "LineKey", "PrimaryKey" ]) if "tblLines" in tablename else pd.merge( no_pk_df, iso, how="inner", on=["PlotKey"]).drop_duplicates(["PrimaryKey"]) return no_pk_df else: print("network, but not line or plot, no pk --fakebranch") if 'Sites' in tablename: no_pk_df = no_pk_df[(no_pk_df.SiteKey != '888888888') & (no_pk_df.SiteKey != '999999999')] return no_pk_df else: return no_pk_df else: if ('tblPlots' in tablename) or ('tblLines' in tablename): print('not network, no tablefam') fulldf = lpi_pk(dimapath) iso = arc.isolateFields(fulldf, 'PlotKey', 'PrimaryKey').copy() no_pk_df = pd.merge( no_pk_df, iso, how="inner", on=["PlotKey"]).drop_duplicates([ "LineKey", "PrimaryKey" ]) if "tblLines" in tablename else pd.merge( no_pk_df, iso, how="inner", on=["PlotKey"]).drop_duplicates(["PrimaryKey"]) return no_pk_df else: print("not network, not line or plot, no pk") if 'Sites' in tablename: no_pk_df = no_pk_df[(no_pk_df.SiteKey != '888888888') & (no_pk_df.SiteKey != '999999999')] return no_pk_df else: return no_pk_df # return no_pk_df except Exception as e: print(e)
def looper(path2mdbs, tablename, projk=None, csv=False): """ goes through all the files(.mdb or .accdb extensions) inside a folder, create a dataframe of the chosen table using the 'main_translate' function, adds the dataframe into a dictionary,a finally appends all the dataframes and returns the entire appended dataframe """ containing_folder = path2mdbs contained_files = os.listdir(containing_folder) df_dictionary = {} count = 1 basestring = 'file_' for i in contained_files: if os.path.splitext(os.path.join( containing_folder, i))[1] == '.mdb' or os.path.splitext( os.path.join(containing_folder, i))[1] == '.accdb': countup = basestring + str(count) # df creation/manipulation starts here arc = arcno(os.path.join(containing_folder, i)) print(i) df = main_translate(tablename, os.path.join(containing_folder, i)) # if its gapheader, this deals with different versions (the fun alt_gapheader_check) if "tblGapHeader" in tablename: if tablename in arc.actual_list: df = alt_gapheader_check(df) else: df = None if df is not None: if 'DateLoadedInDB' in df.columns: df['DateLoadedInDB'] = df['DateLoadedInDB'].astype( 'datetime64') df['DateLoadedInDB'] = datetime.now().strftime( "%d-%m-%Y %H:%M:%S") else: df['DateLoadedInDB'] = datetime.now().strftime( "%d-%m-%Y %H:%M:%S") df['DBKey'] = os.path.split(os.path.splitext(i)[0])[1].replace( " ", "") # df add to dictionary list df_dictionary[countup] = df.copy() else: pass count += 1 # return df_dictionary if len(df_dictionary) > 0: final_df = pd.concat([j for i, j in df_dictionary.items()], ignore_index=True).drop_duplicates() final_df = dateloadedcheck(final_df) # final_df = northing_round(final_df) if (tablename == 'tblPlots') and (projk is not None): final_df["ProjectKey"] = projk if "tblLines" in tablename: for i in final_df.columns: if "PrimaryKey" in i: final_df[i] = final_df[i].astype("object") return final_df if csv == False else final_df.to_csv( os.path.join(containing_folder, tablename + '.csv')) else: print(f"table '{tablename}' not found within this dima batch")