def __init__(self, runid, model): self.runid = runid self.model = model #create folder to save files in #set output folder save_folder = "save/" + self.runid + "/" print('created save folder') self.save_folder = save_folder try: if os.path.exists(save_folder): shutil.rmtree(save_folder) os.makedirs(save_folder) except BaseException as ex: exception = ex fullStackInfo = ''.join( traceback.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__)) message = "An error occurred while running a facility:\n" + fullStackInfo Logger.logMessage(message) else: message = "created " + self.save_folder Logger.logMessage(message)
def save_model(self, facid): Logger.logMessage("removing fac id:" + facid) #get list of attributes and write them remaining_facs = self.model.facids.tolist()[1:] #print("model attributes", self.model.__dict__) remaining_loc = self.save_folder + "/remaining_facs.pkl" fachandler = open(remaining_loc, 'wb') pickle.dump(remaining_facs, fachandler) print("Saved facs")
def createReport(self, categoryFolder, reportName, arguments=None): # Multipathway is the only summary that has two implementation classes, one for the standard # case and one for when alternate receptors are used. But we don't expose that split # to users, therefore we run the alt rec summary when needed and determine that here. Since we can # assume that all facilities run in the same category used alternate receptors (or not...) # we only need to check the first one to decide. #reset status self.status = False try: # First determine if alternate receptors were used altrec = self.determineAltRec(categoryFolder) if altrec == 'Y' and reportName == 'MultiPathway': reportName = "MultiPathwayNonCensus" # Logger.logMessage("Starting report: " + reportName) module = self.availableReports[reportName] if module is None: Logger.logMessage("Oops. HEM4 couldn't find your report module.") return reportClass = getattr(module, reportName) reportArgs = [self.grpname, arguments] instance = reportClass(categoryFolder, self.facilityIds, reportArgs) instance.writeWithTimestamp() # Logger.logMessage("Finished report: " + reportName) if reportName in self.afterReportRun: Logger.logMessage("Running post-report action for " + reportName) action = self.afterReportRun[reportName] action(categoryFolder) Logger.logMessage("Finished post-report action for " + reportName) except Exception as e: var = traceback.format_exc() Logger.logMessage("An error occured while creating report: " + reportName) Logger.logMessage(var) print(e) self.status = False else: self.status = True
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- duplicates = self.duplicates(df, [pollutant]) if len(duplicates) > 0: Logger.logMessage( "One or more records are duplicated in the Dose Response file (key=pollutant):" ) for d in duplicates: Logger.logMessage(d) Logger.logMessage( "Please remove the duplicate records and restart HEM4.") return None else: return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- if len(df.loc[(df[fac_id] == '')]) > 0: Logger.logMessage( "One or more facility IDs are missing in the Facility List.") messagebox.showinfo( "Missing facility IDs", "One or more facility IDs are missing in the Facility List.") return None files = df[met_station].values.tolist() files = [file.upper() for file in files if file != ''] if not set(files).issubset( set(self.metlib.dataframe[surffile].str.upper())): Logger.logMessage( "One or more met stations referenced in the Facility List are invalid." ) messagebox.showinfo( "Invalid met station", "One or more met stations referenced in the Facility List are invalid." ) return None duplicates = self.duplicates(df, [fac_id]) if len(duplicates) > 0: Logger.logMessage( "One or more records are duplicated in the Facility List (key=fac_id):" ) messagebox.showinfo( "Duplicates", "One or more records are duplicated in the Facility List (key=fac_id)" ) for d in duplicates: Logger.logMessage(d) return None # ---------------------------------------------------------------------------------- # Defaulted: Invalid values in these columns will be replaced with a default. # ---------------------------------------------------------------------------------- for index, row in df.iterrows(): facility = row[fac_id] # urban and urban_pop..note that the rural_urban value can be blank, and in this case we will # leave it blank here (it will be defaulted in Runstream based on census data) valid = ['U', 'R', ''] if not row[rural_urban] in valid: Logger.logMessage( "Facility " + facility + ": rural_urban value invalid. Will be defaulted based on census data." ) row[rural_urban] = "" if row[rural_urban] == 'U': if row[urban_pop] <= 0: Logger.logMessage( "Facility " + facility + ": Invalid value (urban_pop): Defaulting to 50,000.") row[urban_pop] = 50000 # Modeled Distance of Receptors if row[model_dist] > 50000 or row[model_dist] <= 0: Logger.logMessage("Facility " + facility + ": model distance value " + str(row[model_dist]) + " out of range. Defaulting to 3000.") row[model_dist] = 3000 # maximum distance and modeled distance are related... if row[max_dist] > 50000 or row[max_dist] <= 0: Logger.logMessage("Facility " + facility + ": max distance value " + str(row[max_dist]) + " out of range. Defaulting to 50000.") row[max_dist] = 50000 elif row[model_dist] > row[max_dist]: Logger.logMessage( "Facility " + facility + ": model distance value " + str(row[model_dist]) + " is larger than maximum distance. Defaulting max_dist to 50000." ) row[max_dist] = 50000 # Radials: default is 16, minimum number is 4 if row[radial] == 0: Logger.logMessage("Facility " + facility + ": radial value " + str(row[radial]) + " out of range. Defaulting to 16.") row[radial] = 16 if row[radial] < 4: Logger.logMessage("Facility " + facility + ": radial value " + str(row[radial]) + " out of range. Defaulting to 4.") row[radial] = 4 # Circles: default is 13, minimum number is 3 if row[circles] == 0: Logger.logMessage("Facility " + facility + ": circles value " + str(row[circles]) + " out of range. Defaulting to 13.") row[circles] = 13 if row[circles] < 3: Logger.logMessage("Facility " + facility + ": circles value " + str(row[circles]) + " out of range. Defaulting to 3.") row[circles] = 3 # Overlap Distance if row[overlap_dist] == 0: Logger.logMessage("Facility " + facility + ": overlap distance value " + str(row[overlap_dist]) + " out of range. Defaulting to 30.") row[overlap_dist] = 30 elif row[overlap_dist] < 1: Logger.logMessage("Facility " + facility + ": overlap distance value " + str(row[overlap_dist]) + " out of range. Defaulting to 30.") row[overlap_dist] = 30 elif row[overlap_dist] > 500: Logger.logMessage("Facility " + facility + ": overlap distance value " + str(row[overlap_dist]) + " out of range. Defaulting to 30.") row[overlap_dist] = 30 # ring1 if row[ring1] < 100 or row[ring1] > row[max_dist]: Logger.logMessage("Facility " + facility + ": ring1 value " + str(row[ring1]) + " out of range. Defaulting to 100.") row[ring1] = 100 # Facility center...comma separated list that should start with either "U" (meaning UTM coords) or "L" # (meaning lat/lon) and contain two values if lat/lon (lat,lon) or three values if UTM # (northing,easting,zone) center_spec = row[fac_center] spec_valid = True if center_spec.upper().startswith("U"): components = center_spec.split(',') if len(components) != 4: spec_valid = False elif center_spec.upper().startswith("L"): components = center_spec.split(',') if len(components) != 3: spec_valid = False else: spec_valid = False if center_spec != "" and not spec_valid: Logger.logMessage("Facility " + facility + ": Invalid facility center specified: " + center_spec) Logger.logMessage( "Facility " + facility + ": Using default (calculated) center instead.") row[fac_center] = "" # Ring distances...comma separated list that contains at least 3 values, all must be > 0 and <= 50000, and # values must be increasing distance_spec = row['ring_distances'] spec_valid = True distances = distance_spec.split(',') if len(distances) < 3: spec_valid = False else: ring_distance = int(float(distances[0])) if row[model_dist] < ring_distance: Logger.logMessage( "Facility " + facility + ": Error: First ring is greater than modeling distance!" ) messagebox.showinfo( "Modeling distance error", "Facility " + facility + ": Error: First ring is greater than modeling distance!" ) spec_valid = False prev = 0 for d in distances[1:]: ring_distance = int(float(d)) if ring_distance <= prev or ring_distance > 50000: spec_valid = False prev = ring_distance if distance_spec != "" and not spec_valid: Logger.logMessage("Facility " + facility + ": Invalid ring distances specified: " + distance_spec) Logger.logMessage( "Facility " + facility + ": Using default (calculated) distances instead.") row['ring_distances'] = "" # If there are user supplied ring distances then the last one must equal max distance # for correct outer block interpolation if row['ring_distances'] != "": distlist = row['ring_distances'].split(",") if float(distlist[-1]) != row[max_dist]: maxdist_str = "," + str(row[max_dist]) row['ring_distances'] += maxdist_str # Acute valid = ['Y', 'N'] if row[acute] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for acute. Defaulting to 'N'.") row[acute] = 'N' # Hours valid = [1, 2, 3, 4, 6, 8, 12, 24] if row[hours] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for hours. Defaulting to 1.") row[hours] = 1 if row[acute] == 'Y': if row[multiplier] <= 0: Logger.logMessage( "Facility " + facility + ": Invalid value for multiplier. Defaulting to 10.") row[multiplier] = 10 if row[hivalu] <= 0: Logger.logMessage( "Facility " + facility + ": Invalid value for high value. Defaulting to 1.") row[multiplier] = 1 # pdep, pdepl, vdep, vdepl valid = ['NO', 'WO', 'DO', 'WD'] row[vdep] = row[vdep].upper() row[vdepl] = row[vdepl].upper() row[pdep] = row[pdep].upper() row[pdepl] = row[pdepl].upper() if row[vdep] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for vdep. Defaulting to 'NO'.") row[vdep] = 'NO' if row[vdepl] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for vdepl. Defaulting to 'NO'.") row[vdepl] = 'NO' if row[pdep] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for pdep. Defaulting to 'NO'.") row[pdep] = 'NO' if row[pdepl] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for pdepl. Defaulting to 'NO'.") row[pdepl] = 'NO' # elev, user_rcpt, bldg_dw, fastall, emis_var valid = ['Y', 'N'] if row[elev] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for elev. Defaulting to 'Y'.") row[elev] = 'Y' if row[user_rcpt] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for user_rcpt. Defaulting to 'N'.") row[user_rcpt] = 'N' if row[bldg_dw] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for bldg_dw. Defaulting to 'N'.") row[bldg_dw] = 'N' if row[fastall] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for fastall. Defaulting to 'N'.") row[fastall] = 'N' if row[emis_var] not in valid: Logger.logMessage( "Facility " + facility + ": Invalid value for emis_var. Defaulting to 'N'.") row[emis_var] = 'N' # Annual and period start/end met_annual = row[annual] start_spec_valid = True self.period_start_components = "" period_start_spec = row[period_start].replace(" ", "") if met_annual == "Y": if period_start_spec != "": Logger.logMessage( "Facility " + facility + ": Period start specified but ignored because annual = 'Y'" ) row[period_start] = "" start_spec_valid = False else: Logger.logMessage("Facility " + facility + ": Using annual met option.") else: starts = period_start_spec.split(',') for s in starts: if not s.isdigit(): start_spec_valid = False if len(starts) < 3 or len(starts) > 4: start_spec_valid = False else: for c in starts: self.period_start_components += c + " " if period_start_spec != "" and not start_spec_valid: Logger.logMessage("Facility " + facility + ": Invalid period start specified: " + period_start_spec) Logger.logMessage("Facility " + facility + ": Aermod will use default.") row[period_start] = "" else: Logger.logMessage("Facility " + facility + ": Using period start = " + self.period_start_components) if period_start_spec == '': Logger.logMessage( "Aermod will use default in place of blank period start value." ) row[period_start] = self.period_start_components end_spec_valid = True self.period_end_components = "" period_end_spec = row[period_end].replace(" ", "") if met_annual == "Y": if period_end_spec != "": Logger.logMessage( "Facility " + facility + ": Period end specified but ignored because annual = 'Y'" ) row[period_end] = "" end_spec_valid = False else: ends = period_end_spec.split(',') for e in ends: if not e.isdigit(): end_spec_valid = False if len(ends) < 3 or len(ends) > 4: end_spec_valid = False else: for c in ends: self.period_end_components += c + " " if period_end_spec != "" and not end_spec_valid: Logger.logMessage("Facility " + facility + ": Invalid period end specified: " + period_end_spec) Logger.logMessage("Facility " + facility + ": Aermod will use default.") row[period_end] = "" else: Logger.logMessage("Facility " + facility + ": Using period end = " + self.period_end_components) if period_end_spec == '': Logger.logMessage( "Aermod will use default in place of blank period end value." ) row[period_end] = self.period_end_components if period_start_spec != "" and start_spec_valid and period_end_spec != "" and end_spec_valid: if len(starts) != len(ends): Logger.logMessage( "Facility " + facility + ": Inconsistent period start and end specified (both must include hours, or neither): " + period_start_spec + " : " + period_end_spec) Logger.logMessage("Facility " + facility + ": Aermod will use defaults.") row[period_start] = "" row[period_end] = "" start_time = self.get_timestamp(starts) end_time = self.get_timestamp(ends) if start_time >= end_time: Logger.logMessage( "Facility " + facility + ": Inconsistent period start and end specified (start must be before end): " + period_start_spec + " : " + period_end_spec) Logger.logMessage("Facility " + facility + ": Aermod will use defaults.") row[period_start] = "" row[period_end] = "" df.loc[index] = row Logger.logMessage("Uploaded facilities options list file for " + str(len(df)) + " facilities.\n") return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- if len(df.loc[(df[fac_id] == '')]) > 0: Logger.logMessage("One or more facility IDs are missing in the Land Use List.") messagebox.showinfo("Missing facility IDs", "One or more facility IDs are missing in the Land Use List.") return None landfids = set(df[fac_id]) faclistfids = set(self.gasDryFacs) if faclistfids.intersection(landfids) != faclistfids: Logger.logMessage("Based on your Facility List Options file, the Land Use List is missing " + "one or more facilities. Please correct one or both files and upload again.") messagebox.showinfo("Land use list missing", "Based on your Facility List Options file, the Land Use List is missing " + "one or more facilities. Please correct one or both files and upload again.") return None duplicates = self.duplicates(df, [fac_id]) if len(duplicates) > 0: Logger.logMessage("One or more records are duplicated in the Land Use List (key=fac_id):") messagebox.showinfo("Duplicate records", "One or more records are duplicated in the Land Use List (key=fac_id)") for d in duplicates: Logger.logMessage(d) return None for index, row in df.iterrows(): facility = row[fac_id] for num in range(1, 37): number = str(num) number = "0"+number if num < 10 else number field = "D" + number if row[field] not in [1,2,3,4,5,6,7,8,9]: Logger.logMessage("Facility " + facility + ": Field " + field + " contains invalid value.") messagebox.showinfo("Invalid value", "Facility " + facility + ": Field " + field + " contains invalid value.") return None # figure out how to get fac ids that have landuse based on flag or index # TODO # check for unassigned landuse check_landuse_assignment = set(df[fac_id]) Logger.logMessage("Uploaded land use data for [" + ",".join(check_landuse_assignment) + "]\n") return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- if len(df.loc[(df[fac_id] == '')]) > 0: Logger.logMessage("One or more facility IDs are missing in the Particle List.") messagebox.showinfo("Missing facility IDs", "One or more facility IDs are missing in the Particle List.") return None if len(df.loc[(df[source_id] == '')]) > 0: Logger.logMessage("One or more source IDs are missing in the Particle List.") messagebox.showinfo("Missing source IDs", "One or more source IDs are missing in the Particle List.") return None duplicates = self.duplicates(df, [fac_id, source_id, part_diam]) if len(duplicates) > 0: Logger.logMessage("One or more records are duplicated in the Particle List (key=fac_id, source_id, part_diam):") messagebox.showinfo("Duplicate records", "One or more records are duplicated in the Particle List (key=fac_id, source_id, part_diam)") for d in duplicates: Logger.logMessage(d) return None # Verify that all particle source id's from hapemis are present in the particle file hapemis_srcs = (self.hapemis_df[self.hapemis_df[fac_id].isin(self.particleFacilities) & self.hapemis_df['part_frac']>0][[fac_id, source_id]].drop_duplicates()) part_srcs = df[[fac_id, source_id]].drop_duplicates() if len(hapemis_srcs.merge(part_srcs)) != len(hapemis_srcs): Logger.logMessage("There are some source id's that need particle data that are not in the particle file. " + "Please correct the particle file") messagebox.showinfo("Missing source id's", "There are some source id's that need particle data that are not in the particle file. " + "Please correct the particle file") return None for index, row in df.iterrows(): facility = row[fac_id] if row[part_diam] <= 0: Logger.logMessage("Facility " + facility + ": particle diameter value " + str(row[part_diam]) + " out of range.") messagebox.showinfo("Value out of range", "Facility " + facility + ": particle diameter value " + str(row[part_diam]) + " out of range.") return None if row[mass_frac] < 0 or row[mass_frac] > 100: Logger.logMessage("Facility " + facility + ": mass fraction value " + str(row[mass_frac]) + " out of range.") messagebox.showinfo("Value out of range", "Facility " + facility + ": mass fraction value " + str(row[mass_frac]) + " out of range.") return None if row[part_dens] < 0: Logger.logMessage("Facility " + facility + ": particle density value " + str(row[part_dens]) + " out of range.") messagebox.showinfo("Value out of range", "Facility " + facility + ": particle density value " + str(row[part_dens]) + " out of range.") return None # check for mass frac sum to 1 fac_ids = df[fac_id].tolist() incomplete = [] for fac in set(fac_ids): fac_search = df[df[fac_id] == fac] sources = df[df[fac_id] == fac][source_id].tolist() for s in set(sources): mass_fracs = fac_search[fac_search[source_id] == s][mass_frac].tolist() if sum(mass_fracs) != 1: incomplete.append(str(fac) + ': ' + str(s)) if len(incomplete) > 0: Logger.logMessage("The mass fraction for " + ", ".join(incomplete)+ " does not sum to 100%. Please correct them in your "+ "particle size file.") messagebox.showinfo("Mass fraction error", "The mass fraction for " + ", ".join(incomplete)+ " does not sum to 100%. Please correct them in your "+ "particle size file.") return None else: # check for unassigned particle check_particle_assignment = set(df[fac_id]) # Particle size file can have extra facilities if self.particleFacilities.issubset(check_particle_assignment) == False: particle_unassigned = (set(self.particleFacilities) - check_particle_assignment) Logger.logMessage("Particle size data for facilities: " + ", ".join(particle_unassigned) + " have not been assigned. " + "Please edit the particle size file.") messagebox.showinfo("Particle size data", "Particle size data for facilities, " + ", ".join(particle_unassigned) + " have not been assigned. " + "Please edit the particle size file.") return None else: Logger.logMessage("Uploaded particle data for [" + ",".join(check_particle_assignment) + "]\n") return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- if len(df.loc[(df[fac_id] == '')]) > 0: Logger.logMessage( "One or more facility IDs are missing in the Buoyant Line List." ) messagebox.showinfo( "Missing facility IDs", "One or more facility IDs are missing in the Buoyant Line List." ) return None duplicates = self.duplicates(df, [fac_id]) if len(duplicates) > 0: Logger.logMessage( "One or more records are duplicated in the Buoyant Line Parameters List (key=fac_id):" ) messagebox.showinfo( "Duplicate records", "One or more records are duplicated in the Buoyant Line Parameters List (key=fac_id):" ) for d in duplicates: Logger.logMessage(d) return None for index, row in df.iterrows(): facility = row[fac_id] if row[avgbld_len] <= 0: Logger.logMessage("Facility " + facility + ": avg building length " + str(row[avgbld_len]) + " out of range.") messagebox.showinfo( "Out of Range", "Facility " + facility + ": avg building length " + str(row[avgbld_len]) + " out of range.") return None if row[avgbld_hgt] <= 0: Logger.logMessage("Facility " + facility + ": avg building height " + str(row[avgbld_hgt]) + " out of range.") messagebox.showinfo( "Out of Range", "Facility " + facility + ": avg building height " + str(row[avgbld_hgt]) + " out of range.") return None if row[avgbld_wid] <= 0: Logger.logMessage("Facility " + facility + ": avg building width " + str(row[avgbld_wid]) + " out of range.") messagebox.showinfo( "Out of Range", "Facility " + facility + ": avg building width " + str(row[avgbld_wid]) + " out of range.") return None if row[avglin_wid] <= 0: Logger.logMessage("Facility " + facility + ": avg line width " + str(row[avglin_wid]) + " out of range.") messagebox.showinfo( "Out of Range", "Facility " + facility + ": avg line width " + str(row[avglin_wid]) + " out of range.") return None if row[avgbld_sep] < 0: Logger.logMessage("Facility " + facility + ": avg building separation " + str(row[avgbld_sep]) + " out of range.") messagebox.showinfo( "Out of Range", "Facility " + facility + ": avg building separation " + str(row[avgbld_sep]) + " out of range.") return None if row[avgbuoy] <= 0: Logger.logMessage("Facility " + facility + ": avg buoyancy " + str(row[avgbuoy]) + " out of range.") messagebox.showinfo( "Out of Range", "Facility " + facility + ": avg buoyancy " + str(row[avgbuoy]) + " out of range.") return None # check for unassigned buoyant line check_buoyant_assignment = set(df[fac_id]) # get buoyant line facility list find_b = self.emisloc_df[self.emisloc_df[source_type] == 'B'] buoyant_fac = set(find_b[fac_id]) if check_buoyant_assignment != buoyant_fac: buoyant_unassigned = set(check_buoyant_assignment - buoyant_fac) messagebox.showinfo( "Unassigned buoyant Line parameters", "buoyant" + " Line parameters for " + ", ".join(buoyant_unassigned) + " have not been" + " assigned. Please edit the 'source_type' column" + " in the Emissions Locations file.") return None else: Logger.logMessage("Uploaded buoyant line parameters for [" + ",".join(check_buoyant_assignment) + "]\n") return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- if len(df.loc[(df[fac_id] == '')]) > 0: Logger.logMessage( "One or more facility IDs are missing in the HAP Emissions List." ) messagebox.showinfo( "Missing facility IDs", "One or more facility IDs are missing in the HAP Emissions List." ) return None duplicates = self.duplicates(df, [fac_id, source_id, pollutant]) if len(duplicates) > 0: Logger.logMessage( "One or more records are duplicated in the HAP Emissions List (key=fac_id, source_id, pollutant):" ) messagebox.showinfo( "Duplicate records", "One or more records are duplicated in the HAP Emissions List (key=fac_id, source_id, pollutant):" ) for d in duplicates: Logger.logMessage(d) return None hapfids = set(df[fac_id]) if self.fac_ids.intersection(hapfids) != self.fac_ids: Logger.logMessage( "Based on your Facility List Options file, the HAP Emissions List is missing " + "one or more facilities. Please correct one or both files and upload again." ) messagebox.showinfo( "Missing facilities", "Based on your Facility List Options file, the HAP Emissions List is missing " + "one or more facilities. Please correct one or both files and upload again." ) return None if len(df.loc[(df[source_id] == '')]) > 0: Logger.logMessage( "One or more source IDs are missing in the HAP Emissions List." ) messagebox.showinfo( "Missing source IDs", "One or more source IDs are missing in the HAP Emissions List." ) return None if len(df.loc[(df[pollutant] == '')]) > 0: Logger.logMessage( "One or more pollutants are missing in the HAP Emissions List." ) messagebox.showinfo( "Missing pollutants", "One or more pollutants are missing in the HAP Emissions List." ) return None # ---------------------------------------------------------------------------------- # Defaulted: Invalid values in these columns will be replaced with a default. # ---------------------------------------------------------------------------------- for index, row in df.iterrows(): facility = row[fac_id] if row[emis_tpy] < 0: Logger.logMessage("Facility " + facility + ": emissions value " + str(row[emis_tpy]) + " out of range. Defaulting to 0.") row[emis_tpy] = 0 if row[part_frac] < 0 or row[part_frac] > 1: Logger.logMessage("Facility " + facility + ": particulate fraction value " + str(row[part_frac] * 100) + " out of range. Defaulting to 0.") row[part_frac] = 0 df.loc[index] = row # verify pollutants are present in dose library master_list = list(self.haplib.dataframe[pollutant]) lower = [x.lower() for x in master_list] user_haps = set(df[pollutant]) missing_pollutants = [] for hap in user_haps: if hap.lower() not in lower: missing_pollutants.append(hap) self.log = [] # if there are any missing pollutants... if len(missing_pollutants) > 0: fix_pollutants = messagebox.askyesno( "Missing Pollutants in Dose " + "Response Library", "The " + "following pollutants were " + "not found in HEM4's Dose " + "Response Library: " + ', '.join(missing_pollutants) + ".\n Would you like to amend " + "your HAP Emissions file?" + "(they will be removed " + "otherwise). ") if fix_pollutants: Logger.logMessage( "Aborting upload of HAP emissions pending resolution of missing pollutants." ) messagebox.showinfo( "Aborting upload", "Aborting upload of HAP emissions pending resolution of missing pollutants." ) return None else: missing = missing_pollutants remove = set(missing) Logger.logMessage( "Removing these pollutants, which were not found: " + "[{0}]".format(", ".join( str(i) for i in missing_pollutants))) # remove them from data frame # to separate log file the non-modeled HAP Emissions fileDir = os.path.dirname(os.path.realpath('__file__')) filename = os.path.join(fileDir, "output\DR_HAP_ignored.log") logfile = open(filename, 'w') logfile.write(str(datetime.now()) + ":\n") for p in remove: df = df[df[pollutant] != str(p)] # record upload in log # add another essage to say the following pollutants were assigned a generic value... self.log.append("Removed " + p + " from hap emissions file\n") # get row so we can write facility and other info ignored = df[df[pollutant] == p] logfile.write("Removed: " + str(ignored)) logfile.close() Logger.logMessage("Uploaded HAP emissions file for " + str(len(df)) + " source-HAP combinations.\n") return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- if len(df.loc[(df[fac_id] == '')]) > 0: Logger.logMessage( "One or more facility IDs are missing in the Emissions Variations List." ) messagebox.showinfo( "Missing facility IDs", "One or more facility IDs are missing in the Emissions Variations List." ) return None if len(df.loc[(df[source_id] == '')]) > 0: Logger.logMessage( "One or more source IDs are missing in the Emissions Variations List." ) messagebox.showinfo( "Missing source IDs", "One or more source IDs are missing in the Emissions Variations List." ) return None val_list = [] for index, row in df.iterrows(): facility = row[fac_id] valid = [ 'SEASON', 'MONTH', 'HROFDY', 'WSPEED', 'SEASHR', 'HRDOW', 'HRDOW7', 'SHRDOW', 'SHRDOW7', 'MHRDOW', 'MHRDOW7' ] if row['variation'] not in valid: Logger.logMessage("Facility " + facility + ": variation value invalid.") messagebox.showinfo( "Variation invalid", "Facility " + facility + ": variation value invalid.") return None #----------------------------------------------------------------------------------------------------- # Confirm that all facilities needing emission variation according to the Facility List # are in the emission variation file. print("still going?") # facilities in emission variation file var_facs = set(df[fac_id]) # facilities needing emission variation faclist_facs = set( self.faclist_df[self.faclist_df['emis_var'] == 'Y'][fac_id]) if faclist_facs.issubset(var_facs) == False: missing = faclist_facs - var_facs Logger.logMessage( "One or more facilities in the Facility List file that need " + "emission variation are not in the emission variation file. These facilities are: " + ", ".join(missing) + ". Please edit the emission variation file or Facility List file." ) messagebox.showinfo( "Missing facilities in Emission Variation", "One or more facilities in the Facility List file that need " + "emission variation are not in the emission variation file. These facilities are: " + ", ".join(missing) + ". Please edit the emission variation file or Facility List file." ) return None #----------------------------------------------------------------------------------------------------- # Make sure all facility/source ids from emission variation file are also in # the emission location file # facility/source ids from emission variation file var_ids = set(df[[fac_id, source_id]].apply(lambda x: ','.join(x), axis=1).tolist()) # facility/source ids from emission location file model_ids = set(self.emisloc_df[[fac_id, source_id ]].apply(lambda x: ','.join(x), axis=1).tolist()) if len(set(var_ids).difference(set(model_ids))) > 0: missing = set(var_ids).difference(set(model_ids)) messagebox.showinfo( "Missing Emission Location", "The emission " + "variation file indicates variation for facility/source ids " + ", ".join(missing) + " which are not in the " + "emissions location file. Please edit " + "the emissions variation or emissions location " + " file.") return None vtype = df['variation'].tolist() if 'SEASON' in vtype: # check that seasonal variaton only has 4 values seasons = df[df['variation'].str.upper() == 'SEASON'] print(seasons) s_wrong = [] for row in seasons.iterrows(): if len(row[1].dropna().values[3:]) != 4: s_wrong.append(row[1][source_id]) if len(s_wrong) > 0: messagebox.showinfo( "Seasonal Emissions Variation", "Seasonal emissions variations require 4 " + "values. Sources: " + ", ".join(s_wrong) + " do not have the correct number of values. " + "Please update your Emission Variation File.") return None # check wind speed is only 6 values if 'WSPEED' in vtype: wspeed = df[df['variation'].str.upper() == 'WSPEED'] w_wrong = [] for row in wspeed.iterrows(): if len(row[1].dropna().values[3:]) != 6: w_wrong.append(row[1][source_id]) if len(w_wrong) > 0: messagebox.showinfo( "Wind Speed Emissions Variation", "Wind speed emissions variations require 6 " + "values. Sources: " + ", ".join(w_wrong) + " do not have the correct number of values. " + "Please update your Emission Variation File.") return None # make sure the monthly emissions variation has 12 values if 'MONTH' in vtype: month = df[df['variation'].str.upper() == 'MONTH'] m_wrong = [] for row in month.iterrows(): if len(row[1].dropna().values[3:]) != 12: m_wrong.append(row[1][source_id]) if len(m_wrong) > 0: messagebox.showinfo( "Monthly Emissions Variation", "Monthly emissions variations require 12 " + "values. Sources: " + ", ".join(m_wrong) + " do not have the correct number of values. " + "Please update your Emission Variation File.") return None if 'HROFDY' in vtype or 'SEASHR' in vtype or 'SHRDOW' in vtype or 'SHRDOW7' in vtype: other = df[~df['variation'].isin(['MONTH', 'WSPEED', 'SEASON'])] variation = other[other.columns[3:]].values o_wrong = 0 for row in variation: if len(row) != 12: o_wrong += 1 if o_wrong > 0: messagebox.showinfo( "Emissions Variation Error", "One of the emissions variations type does " + "not have the correct number of values. " + "Please check your input file to make all " + "values are either a multiple or factor " + "of 12.") return None Logger.logMessage("Uploaded emissions variations for [" + ",".join(var_ids) + "]\n") return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- if len(df.loc[(df[fac_id] == '')]) > 0: Logger.logMessage( "One or more facility IDs are missing in the Downwash List.") messagebox.showinfo( "Missing Facility IDs", "One or more facility IDs are missing in the Downwash List.") return None if len(df.loc[(df[source_id] == '')]) > 0: Logger.logMessage( "One or more source IDs are missing in the Downwash List.") messagebox.showinfo( "Missing source IDs", "One or more source IDs are missing in the Downwash List.") return None duplicates = self.duplicates(df, [fac_id, source_id, keyword]) if len(duplicates) > 0: Logger.logMessage( "One or more records are duplicated in the Downwash List (key=fac_id, source_id, keyword):" ) messagebox.showinfo( "Duplicate records", "One or more records are duplicated in the Downwash List (key=fac_id, source_id, keyword)" ) for d in duplicates: Logger.logMessage(d) return None for index, row in df.iterrows(): if row[section] != 'SO': Logger.logMessage("Invalid section " + str(row[section]) + ".") messagebox.showinfo( "Invalid section", "Invalid section " + str(row[section]) + ".") return None valid = ['BUILDHGT', 'BUILDWID', 'BUILDLEN', 'XBADJ', 'YBADJ'] if row[keyword] not in valid: Logger.logMessage("Invalid keyword " + str(row[keyword]) + ".") messagebox.showinfo( "Invalid keyword", "Invalid keyword " + str(row[keyword]) + ".") return None constrained = ['BUILDHGT', 'BUILDWID', 'BUILDLEN'] for num in range(1, 37): field = "value_" + str(num) if row[keyword] in constrained and row[field] < 0: Logger.logMessage("Invalid down wash value " + str(row[field]) + ".") messagebox.showinfo( "INvalid values", "Invalid down wash value " + str(row[field]) + ".") return None # check for unassigned downwash check_downwash_assignment = set(df[fac_id]) find_d = self.faclist_df[self.faclist_df[bldg_dw] == "Y"] d_fac = set(find_d[fac_id]) if d_fac.issubset(check_downwash_assignment) == False: downwash_unassigned = d_fac - check_downwash_assignment Logger.logMessage( "Building downwash parameters for facilities, " + ", ".join(downwash_unassigned) + ", have not" + " been assigned. Please edit the" + " 'bldgdw' column in the Facilities List Option" + " file or add these facilities to the downwash file.") messagebox.showinfo( "Unassigned building downwash", "Building " + "downwash parameters for facilities, " + ", ".join(downwash_unassigned) + ", have not" + " been assigned. Please edit the" + " 'bldgdw' column in the Facilities List Option" + " file or add these facilities to the downwash file.") return None # Downwash can only be used on point sources all_dfids = set(df[fac_id]) dfids_2use = list(all_dfids.intersection(d_fac)) dsources_df = df[df[fac_id].isin(dfids_2use)][[fac_id, source_id]] d_in_e = pd.merge(self.emisloc_df, dsources_df, how="inner", on=[fac_id, source_id]) d_in_e_srctypes = set(d_in_e['source_type']) invalid_srctypes = ['V', 'B', 'A', 'N', 'I'] if any(t in d_in_e_srctypes for t in invalid_srctypes): Logger.logMessage( "AERMOD models building downwash from point sources only " + "(i.e., vertical P, horizontal H, or capped C point sources). " + "Your building dimensions file includes non-point sources. " + "Please edit your building dimensions file to remove all non-point sources." ) messagebox.showinfo( "Invalid sources", "AERMOD models building downwash from point sources only " + "(i.e., vertical P, horizontal H, or capped C point sources). " + "Your building dimensions file includes non-point sources. " + "Please edit your building dimensions file to remove all non-point sources." ) return None # Also check that if downwash was called for, then the downwash file contains point source sourceids found in the emisloc file if d_in_e.empty: Logger.logMessage( "Building downwash is called for, but the downwash file does not contain any point source type " + "source id's. Please add appropriate source id's to the downwash file or turn off downwash." ) messagebox.showinfo( "No downwash sources", "Building downwash is called for, but the downwash file does not contain any point source type " + "source id's. Please add appropriate source id's to the downwash file or turn off downwash." ) return None Logger.logMessage("Uploaded building downwash parameters for [" + ",".join(check_downwash_assignment) + "]\n") return df
def validate(self, df): # ---------------------------------------------------------------------------------- # Strict: Invalid values in these columns will cause the upload to fail immediately. # ---------------------------------------------------------------------------------- duplicates = self.duplicates(df, [pollutant]) if len(duplicates) > 0: Logger.logMessage("One or more records are duplicated in the Target Organs file (key=pollutant):") for d in duplicates: Logger.logMessage(d) Logger.logMessage("Please remove the duplicate records and restart HEM4.") return None else: # Verify that no non-cancer causing pollutants are missing haplib = self.haplib_df.loc[self.haplib_df[rfc] > 0] pollutants = set(haplib[pollutant]) organ_pollutants = set(df[pollutant].unique()) if not pollutants.issubset(organ_pollutants): Logger.logMessage("There are non-cancer causing pollutants in the Dose Response file that are not " + "present in the Target Organs file:") diff = pollutants - organ_pollutants for d in diff: Logger.logMessage(d) Logger.logMessage("Please augment the Target Organs file with these pollutants and restart HEM4.") return None return df