    def __init__(self, runid, model):

        self.runid = runid
        self.model = model

        #create folder to save files in
        #set output folder
        save_folder = "save/" + self.runid + "/"
        print('created save folder')
        self.save_folder = save_folder


            if os.path.exists(save_folder):


        except BaseException as ex:

            exception = ex
            fullStackInfo = ''.join(

            message = "An error occurred while running a facility:\n" + fullStackInfo

            message = "created " + self.save_folder
    def save_model(self, facid):

        Logger.logMessage("removing fac id:" + facid)

        #get list of attributes and write them
        remaining_facs = self.model.facids.tolist()[1:]
        #print("model attributes", self.model.__dict__)
        remaining_loc = self.save_folder + "/remaining_facs.pkl"

        fachandler = open(remaining_loc, 'wb')

        pickle.dump(remaining_facs, fachandler)

        print("Saved facs")
    def createReport(self, categoryFolder, reportName, arguments=None):

        # Multipathway is the only summary that has two implementation classes, one for the standard
        # case and one for when alternate receptors are used. But we don't expose that split
        # to users, therefore we run the alt rec summary when needed and determine that here. Since we can
        # assume that all facilities run in the same category used alternate receptors (or not...)
        # we only need to check the first one to decide.

        #reset status
        self.status = False
            # First determine if alternate receptors were used
            altrec = self.determineAltRec(categoryFolder)
            if altrec == 'Y' and reportName == 'MultiPathway':
                reportName = "MultiPathwayNonCensus"
#            Logger.logMessage("Starting report: " + reportName)
            module = self.availableReports[reportName]
            if module is None:
                Logger.logMessage("Oops. HEM4 couldn't find your report module.")
            reportClass = getattr(module, reportName)
            reportArgs = [self.grpname, arguments]
            instance = reportClass(categoryFolder, self.facilityIds, reportArgs)
#            Logger.logMessage("Finished report: " + reportName)
            if reportName in self.afterReportRun:
                Logger.logMessage("Running post-report action for " + reportName)
                action = self.afterReportRun[reportName]
                Logger.logMessage("Finished post-report action for " + reportName)
        except Exception as e:
             var = traceback.format_exc()
             Logger.logMessage("An error occured while creating report: " + reportName)
             self.status = False
            self.status = True
    def validate(self, df):

        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        duplicates = self.duplicates(df, [pollutant])
        if len(duplicates) > 0:
                "One or more records are duplicated in the Dose Response file (key=pollutant):"
            for d in duplicates:

                "Please remove the duplicate records and restart HEM4.")
            return None
            return df
    def validate(self, df):

        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        if len(df.loc[(df[fac_id] == '')]) > 0:
                "One or more facility IDs are missing in the Facility List.")
                "Missing facility IDs",
                "One or more facility IDs are missing in the Facility List.")
            return None

        files = df[met_station].values.tolist()
        files = [file.upper() for file in files if file != '']
        if not set(files).issubset(
                "One or more met stations referenced in the Facility List are invalid."
                "Invalid met station",
                "One or more met stations referenced in the Facility List are invalid."
            return None

        duplicates = self.duplicates(df, [fac_id])
        if len(duplicates) > 0:
                "One or more records are duplicated in the Facility List (key=fac_id):"
                "One or more records are duplicated in the Facility List (key=fac_id)"
            for d in duplicates:
            return None

        # ----------------------------------------------------------------------------------
        # Defaulted: Invalid values in these columns will be replaced with a default.
        # ----------------------------------------------------------------------------------
        for index, row in df.iterrows():

            facility = row[fac_id]

            # urban and urban_pop..note that the rural_urban value can be blank, and in this case we will
            # leave it blank here (it will be defaulted in Runstream based on census data)
            valid = ['U', 'R', '']
            if not row[rural_urban] in valid:
                    "Facility " + facility +
                    ": rural_urban value invalid. Will be defaulted based on census data."
                row[rural_urban] = ""

            if row[rural_urban] == 'U':
                if row[urban_pop] <= 0:
                        "Facility " + facility +
                        ": Invalid value (urban_pop): Defaulting to 50,000.")
                    row[urban_pop] = 50000

            # Modeled Distance of Receptors
            if row[model_dist] > 50000 or row[model_dist] <= 0:
                Logger.logMessage("Facility " + facility +
                                  ": model distance value " +
                                  str(row[model_dist]) +
                                  " out of range. Defaulting to 3000.")
                row[model_dist] = 3000

            # maximum distance and modeled distance are related...
            if row[max_dist] > 50000 or row[max_dist] <= 0:
                Logger.logMessage("Facility " + facility +
                                  ": max distance value " +
                                  str(row[max_dist]) +
                                  " out of range. Defaulting to 50000.")
                row[max_dist] = 50000
            elif row[model_dist] > row[max_dist]:
                    "Facility " + facility + ": model distance value " +
                    str(row[model_dist]) +
                    " is larger than maximum distance. Defaulting max_dist to 50000."
                row[max_dist] = 50000

            # Radials: default is 16, minimum number is 4
            if row[radial] == 0:
                Logger.logMessage("Facility " + facility + ": radial value " +
                                  str(row[radial]) +
                                  " out of range. Defaulting to 16.")
                row[radial] = 16

            if row[radial] < 4:
                Logger.logMessage("Facility " + facility + ": radial value " +
                                  str(row[radial]) +
                                  " out of range. Defaulting to 4.")
                row[radial] = 4

            # Circles: default is 13, minimum number is 3
            if row[circles] == 0:
                Logger.logMessage("Facility " + facility + ": circles value " +
                                  str(row[circles]) +
                                  " out of range. Defaulting to 13.")
                row[circles] = 13

            if row[circles] < 3:
                Logger.logMessage("Facility " + facility + ": circles value " +
                                  str(row[circles]) +
                                  " out of range. Defaulting to 3.")
                row[circles] = 3

            # Overlap Distance
            if row[overlap_dist] == 0:
                Logger.logMessage("Facility " + facility +
                                  ": overlap distance value " +
                                  str(row[overlap_dist]) +
                                  " out of range. Defaulting to 30.")
                row[overlap_dist] = 30
            elif row[overlap_dist] < 1:
                Logger.logMessage("Facility " + facility +
                                  ": overlap distance value " +
                                  str(row[overlap_dist]) +
                                  " out of range. Defaulting to 30.")
                row[overlap_dist] = 30
            elif row[overlap_dist] > 500:
                Logger.logMessage("Facility " + facility +
                                  ": overlap distance value " +
                                  str(row[overlap_dist]) +
                                  " out of range. Defaulting to 30.")
                row[overlap_dist] = 30

            # ring1
            if row[ring1] < 100 or row[ring1] > row[max_dist]:
                Logger.logMessage("Facility " + facility + ": ring1 value " +
                                  str(row[ring1]) +
                                  " out of range. Defaulting to 100.")
                row[ring1] = 100

            # Facility center...comma separated list that should start with either "U" (meaning UTM coords) or "L"
            # (meaning lat/lon) and contain two values if lat/lon (lat,lon) or three values if UTM
            # (northing,easting,zone)
            center_spec = row[fac_center]
            spec_valid = True
            if center_spec.upper().startswith("U"):
                components = center_spec.split(',')
                if len(components) != 4:
                    spec_valid = False
            elif center_spec.upper().startswith("L"):
                components = center_spec.split(',')
                if len(components) != 3:
                    spec_valid = False
                spec_valid = False

            if center_spec != "" and not spec_valid:
                Logger.logMessage("Facility " + facility +
                                  ": Invalid facility center specified: " +
                    "Facility " + facility +
                    ": Using default (calculated) center instead.")
                row[fac_center] = ""

            # Ring distances...comma separated list that contains at least 3 values, all must be > 0 and <= 50000, and
            # values must be increasing
            distance_spec = row['ring_distances']
            spec_valid = True
            distances = distance_spec.split(',')
            if len(distances) < 3:
                spec_valid = False
                ring_distance = int(float(distances[0]))
                if row[model_dist] < ring_distance:
                        "Facility " + facility +
                        ": Error: First ring is greater than modeling distance!"
                        "Modeling distance error", "Facility " + facility +
                        ": Error: First ring is greater than modeling distance!"
                    spec_valid = False
                prev = 0
                for d in distances[1:]:
                    ring_distance = int(float(d))
                    if ring_distance <= prev or ring_distance > 50000:
                        spec_valid = False
                    prev = ring_distance

            if distance_spec != "" and not spec_valid:
                Logger.logMessage("Facility " + facility +
                                  ": Invalid ring distances specified: " +
                    "Facility " + facility +
                    ": Using default (calculated) distances instead.")
                row['ring_distances'] = ""

            # If there are user supplied ring distances then the last one must equal max distance
            # for correct outer block interpolation
            if row['ring_distances'] != "":
                distlist = row['ring_distances'].split(",")
                if float(distlist[-1]) != row[max_dist]:
                    maxdist_str = "," + str(row[max_dist])
                    row['ring_distances'] += maxdist_str

            # Acute
            valid = ['Y', 'N']
            if row[acute] not in valid:
                    "Facility " + facility +
                    ": Invalid value for acute. Defaulting to 'N'.")
                row[acute] = 'N'

            # Hours
            valid = [1, 2, 3, 4, 6, 8, 12, 24]
            if row[hours] not in valid:
                    "Facility " + facility +
                    ": Invalid value for hours. Defaulting to 1.")
                row[hours] = 1

            if row[acute] == 'Y':
                if row[multiplier] <= 0:
                        "Facility " + facility +
                        ": Invalid value for multiplier. Defaulting to 10.")
                    row[multiplier] = 10
                if row[hivalu] <= 0:
                        "Facility " + facility +
                        ": Invalid value for high value. Defaulting to 1.")
                    row[multiplier] = 1

            # pdep, pdepl, vdep, vdepl
            valid = ['NO', 'WO', 'DO', 'WD']
            row[vdep] = row[vdep].upper()
            row[vdepl] = row[vdepl].upper()
            row[pdep] = row[pdep].upper()
            row[pdepl] = row[pdepl].upper()
            if row[vdep] not in valid:
                    "Facility " + facility +
                    ": Invalid value for vdep. Defaulting to 'NO'.")
                row[vdep] = 'NO'
            if row[vdepl] not in valid:
                    "Facility " + facility +
                    ": Invalid value for vdepl. Defaulting to 'NO'.")
                row[vdepl] = 'NO'
            if row[pdep] not in valid:
                    "Facility " + facility +
                    ": Invalid value for pdep. Defaulting to 'NO'.")
                row[pdep] = 'NO'
            if row[pdepl] not in valid:
                    "Facility " + facility +
                    ": Invalid value for pdepl. Defaulting to 'NO'.")
                row[pdepl] = 'NO'

            # elev, user_rcpt, bldg_dw, fastall, emis_var
            valid = ['Y', 'N']
            if row[elev] not in valid:
                    "Facility " + facility +
                    ": Invalid value for elev. Defaulting to 'Y'.")
                row[elev] = 'Y'
            if row[user_rcpt] not in valid:
                    "Facility " + facility +
                    ": Invalid value for user_rcpt. Defaulting to 'N'.")
                row[user_rcpt] = 'N'
            if row[bldg_dw] not in valid:
                    "Facility " + facility +
                    ": Invalid value for bldg_dw. Defaulting to 'N'.")
                row[bldg_dw] = 'N'
            if row[fastall] not in valid:
                    "Facility " + facility +
                    ": Invalid value for fastall. Defaulting to 'N'.")
                row[fastall] = 'N'
            if row[emis_var] not in valid:
                    "Facility " + facility +
                    ": Invalid value for emis_var. Defaulting to 'N'.")
                row[emis_var] = 'N'

            # Annual and period start/end
            met_annual = row[annual]
            start_spec_valid = True
            self.period_start_components = ""
            period_start_spec = row[period_start].replace(" ", "")
            if met_annual == "Y":
                if period_start_spec != "":
                        "Facility " + facility +
                        ": Period start specified but ignored because annual = 'Y'"
                    row[period_start] = ""
                    start_spec_valid = False
                    Logger.logMessage("Facility " + facility +
                                      ": Using annual met option.")
                starts = period_start_spec.split(',')
                for s in starts:
                    if not s.isdigit():
                        start_spec_valid = False

                if len(starts) < 3 or len(starts) > 4:
                    start_spec_valid = False
                    for c in starts:
                        self.period_start_components += c + " "

                if period_start_spec != "" and not start_spec_valid:
                    Logger.logMessage("Facility " + facility +
                                      ": Invalid period start specified: " +
                    Logger.logMessage("Facility " + facility +
                                      ": Aermod will use default.")
                    row[period_start] = ""
                    Logger.logMessage("Facility " + facility +
                                      ": Using period start = " +
                    if period_start_spec == '':
                            "Aermod will use default in place of blank period start value."
                    row[period_start] = self.period_start_components

            end_spec_valid = True
            self.period_end_components = ""
            period_end_spec = row[period_end].replace(" ", "")
            if met_annual == "Y":
                if period_end_spec != "":
                        "Facility " + facility +
                        ": Period end specified but ignored because annual = 'Y'"
                    row[period_end] = ""
                    end_spec_valid = False
                ends = period_end_spec.split(',')
                for e in ends:
                    if not e.isdigit():
                        end_spec_valid = False

                if len(ends) < 3 or len(ends) > 4:
                    end_spec_valid = False
                    for c in ends:
                        self.period_end_components += c + " "

                if period_end_spec != "" and not end_spec_valid:
                    Logger.logMessage("Facility " + facility +
                                      ": Invalid period end specified: " +
                    Logger.logMessage("Facility " + facility +
                                      ": Aermod will use default.")
                    row[period_end] = ""
                    Logger.logMessage("Facility " + facility +
                                      ": Using period end = " +
                    if period_end_spec == '':
                            "Aermod will use default in place of blank period end value."
                    row[period_end] = self.period_end_components

            if period_start_spec != "" and start_spec_valid and period_end_spec != "" and end_spec_valid:
                if len(starts) != len(ends):
                        "Facility " + facility +
                        ": Inconsistent period start and end specified (both must include hours, or neither): "
                        + period_start_spec + " : " + period_end_spec)
                    Logger.logMessage("Facility " + facility +
                                      ": Aermod will use defaults.")
                    row[period_start] = ""
                    row[period_end] = ""

                start_time = self.get_timestamp(starts)
                end_time = self.get_timestamp(ends)
                if start_time >= end_time:
                        "Facility " + facility +
                        ": Inconsistent period start and end specified (start must be before end): "
                        + period_start_spec + " : " + period_end_spec)
                    Logger.logMessage("Facility " + facility +
                                      ": Aermod will use defaults.")
                    row[period_start] = ""
                    row[period_end] = ""

            df.loc[index] = row

        Logger.logMessage("Uploaded facilities options list file for " +
                          str(len(df)) + " facilities.\n")
        return df
    def validate(self, df):
        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        if len(df.loc[(df[fac_id] == '')]) > 0:
            Logger.logMessage("One or more facility IDs are missing in the Land Use List.")
            messagebox.showinfo("Missing facility IDs", "One or more facility IDs are missing in the Land Use List.")
            return None

        landfids = set(df[fac_id])
        faclistfids = set(self.gasDryFacs)
        if faclistfids.intersection(landfids) != faclistfids:
            Logger.logMessage("Based on your Facility List Options file, the Land Use List is missing " +
                              "one or more facilities. Please correct one or both files and upload again.")
            messagebox.showinfo("Land use list missing", "Based on your Facility List Options file, the Land Use List is missing " +
                              "one or more facilities. Please correct one or both files and upload again.")
            return None

        duplicates = self.duplicates(df, [fac_id])
        if len(duplicates) > 0:
            Logger.logMessage("One or more records are duplicated in the Land Use List (key=fac_id):")
            messagebox.showinfo("Duplicate records", "One or more records are duplicated in the Land Use List (key=fac_id)")
            for d in duplicates:
            return None

        for index, row in df.iterrows():

            facility = row[fac_id]

            for num in range(1, 37):
                number = str(num)
                number = "0"+number if num < 10 else number
                field = "D" + number
                if row[field] not in [1,2,3,4,5,6,7,8,9]:
                    Logger.logMessage("Facility " + facility + ": Field " + field + " contains invalid value.")
                    messagebox.showinfo("Invalid value", "Facility " + facility + ": Field " + field + " contains invalid value.")
                    return None

        # figure out how to get fac ids that have landuse based on flag or index
        # TODO

        # check for unassigned landuse
        check_landuse_assignment = set(df[fac_id])

        Logger.logMessage("Uploaded land use data for [" + ",".join(check_landuse_assignment) + "]\n")
        return df
    def validate(self, df):
        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        if len(df.loc[(df[fac_id] == '')]) > 0:
            Logger.logMessage("One or more facility IDs are missing in the Particle List.")
            messagebox.showinfo("Missing facility IDs", "One or more facility IDs are missing in the Particle List.")
            return None

        if len(df.loc[(df[source_id] == '')]) > 0:
            Logger.logMessage("One or more source IDs are missing in the Particle List.")
            messagebox.showinfo("Missing source IDs", "One or more source IDs are missing in the Particle List.")
            return None

        duplicates = self.duplicates(df, [fac_id, source_id, part_diam])
        if len(duplicates) > 0:
            Logger.logMessage("One or more records are duplicated in the Particle List (key=fac_id, source_id, part_diam):")
            messagebox.showinfo("Duplicate records", "One or more records are duplicated in the Particle List (key=fac_id, source_id, part_diam)")
            for d in duplicates:
            return None
        # Verify that all particle source id's from hapemis are present in the particle file
        hapemis_srcs = (self.hapemis_df[self.hapemis_df[fac_id].isin(self.particleFacilities) & 
                        self.hapemis_df['part_frac']>0][[fac_id, source_id]].drop_duplicates())
        part_srcs = df[[fac_id, source_id]].drop_duplicates()
        if len(hapemis_srcs.merge(part_srcs)) != len(hapemis_srcs):
            Logger.logMessage("There are some source id's that need particle data that are not in the particle file. " +
                              "Please correct the particle file")
            messagebox.showinfo("Missing source id's", "There are some source id's that need particle data that are not in the particle file. " +
                              "Please correct the particle file")
            return None
        for index, row in df.iterrows():

            facility = row[fac_id]

            if row[part_diam] <= 0:
                Logger.logMessage("Facility " + facility + ": particle diameter value " + str(row[part_diam]) +
                                  " out of range.")
                messagebox.showinfo("Value out of range", "Facility " + facility + ": particle diameter value " + str(row[part_diam]) +
                                  " out of range.")
                return None
            if row[mass_frac] < 0 or row[mass_frac] > 100:
                Logger.logMessage("Facility " + facility + ": mass fraction value " + str(row[mass_frac]) +
                                  " out of range.")
                messagebox.showinfo("Value out of range", "Facility " + facility + ": mass fraction value " + str(row[mass_frac]) +
                                  " out of range.")
                return None
            if row[part_dens] < 0:
                Logger.logMessage("Facility " + facility + ": particle density value " + str(row[part_dens]) +
                                  " out of range.")
                messagebox.showinfo("Value out of range", "Facility " + facility + ": particle density value " + str(row[part_dens]) +
                                  " out of range.")
                return None

        # check for mass frac sum to 1
        fac_ids = df[fac_id].tolist()
        incomplete = []
        for fac in set(fac_ids):
            fac_search = df[df[fac_id] == fac]
            sources = df[df[fac_id] == fac][source_id].tolist()

            for s in set(sources):
                mass_fracs = fac_search[fac_search[source_id] == s][mass_frac].tolist()

                if sum(mass_fracs) != 1:
                    incomplete.append(str(fac) + ': ' + str(s))

        if len(incomplete) > 0:
            Logger.logMessage("The mass fraction for " + ", ".join(incomplete)+
                                " does not sum to 100%. Please correct them in your "+
                                "particle size file.")
            messagebox.showinfo("Mass fraction error", "The mass fraction for " + ", ".join(incomplete)+
                                " does not sum to 100%. Please correct them in your "+
                                "particle size file.")
            return None
            # check for unassigned particle
            check_particle_assignment = set(df[fac_id])

            # Particle size file can have extra facilities
            if self.particleFacilities.issubset(check_particle_assignment) == False:
                particle_unassigned = (set(self.particleFacilities) - check_particle_assignment)
                Logger.logMessage("Particle size data for facilities: " +
                                  ", ".join(particle_unassigned) + " have not been assigned. " +
                                  "Please edit the particle size file.")
                messagebox.showinfo("Particle size data", "Particle size data for facilities, " +
                                  ", ".join(particle_unassigned) + " have not been assigned. " +
                                  "Please edit the particle size file.")
                return None
                Logger.logMessage("Uploaded particle data for [" + ",".join(check_particle_assignment) + "]\n")
                return df
    def validate(self, df):

        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        if len(df.loc[(df[fac_id] == '')]) > 0:
                "One or more facility IDs are missing in the Buoyant Line List."
                "Missing facility IDs",
                "One or more facility IDs are missing in the Buoyant Line List."
            return None

        duplicates = self.duplicates(df, [fac_id])
        if len(duplicates) > 0:
                "One or more records are duplicated in the Buoyant Line Parameters List (key=fac_id):"
                "Duplicate records",
                "One or more records are duplicated in the Buoyant Line Parameters List (key=fac_id):"
            for d in duplicates:
            return None

        for index, row in df.iterrows():
            facility = row[fac_id]

            if row[avgbld_len] <= 0:
                Logger.logMessage("Facility " + facility +
                                  ": avg building length " +
                                  str(row[avgbld_len]) + " out of range.")
                    "Out of Range",
                    "Facility " + facility + ": avg building length " +
                    str(row[avgbld_len]) + " out of range.")
                return None
            if row[avgbld_hgt] <= 0:
                Logger.logMessage("Facility " + facility +
                                  ": avg building height " +
                                  str(row[avgbld_hgt]) + " out of range.")
                    "Out of Range",
                    "Facility " + facility + ": avg building height " +
                    str(row[avgbld_hgt]) + " out of range.")
                return None
            if row[avgbld_wid] <= 0:
                Logger.logMessage("Facility " + facility +
                                  ": avg building width " +
                                  str(row[avgbld_wid]) + " out of range.")
                    "Out of Range",
                    "Facility " + facility + ": avg building width " +
                    str(row[avgbld_wid]) + " out of range.")
                return None
            if row[avglin_wid] <= 0:
                Logger.logMessage("Facility " + facility +
                                  ": avg line width " + str(row[avglin_wid]) +
                                  " out of range.")
                    "Out of Range",
                    "Facility " + facility + ": avg line width " +
                    str(row[avglin_wid]) + " out of range.")
                return None
            if row[avgbld_sep] < 0:
                Logger.logMessage("Facility " + facility +
                                  ": avg building separation " +
                                  str(row[avgbld_sep]) + " out of range.")
                    "Out of Range",
                    "Facility " + facility + ": avg building separation " +
                    str(row[avgbld_sep]) + " out of range.")
                return None
            if row[avgbuoy] <= 0:
                Logger.logMessage("Facility " + facility + ": avg buoyancy " +
                                  str(row[avgbuoy]) + " out of range.")
                    "Out of Range", "Facility " + facility +
                    ": avg buoyancy " + str(row[avgbuoy]) + " out of range.")
                return None

        # check for unassigned buoyant line
        check_buoyant_assignment = set(df[fac_id])

        # get buoyant line facility list
        find_b = self.emisloc_df[self.emisloc_df[source_type] == 'B']
        buoyant_fac = set(find_b[fac_id])

        if check_buoyant_assignment != buoyant_fac:
            buoyant_unassigned = set(check_buoyant_assignment - buoyant_fac)

                "Unassigned buoyant Line parameters",
                "buoyant" + " Line parameters for " +
                ", ".join(buoyant_unassigned) + " have not been" +
                " assigned. Please edit the 'source_type' column" +
                " in the Emissions Locations file.")
            return None

            Logger.logMessage("Uploaded buoyant line parameters for [" +
                              ",".join(check_buoyant_assignment) + "]\n")

        return df
    def validate(self, df):
        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        if len(df.loc[(df[fac_id] == '')]) > 0:
                "One or more facility IDs are missing in the HAP Emissions List."
                "Missing facility IDs",
                "One or more facility IDs are missing in the HAP Emissions List."
            return None

        duplicates = self.duplicates(df, [fac_id, source_id, pollutant])
        if len(duplicates) > 0:
                "One or more records are duplicated in the HAP Emissions List (key=fac_id, source_id, pollutant):"
                "Duplicate records",
                "One or more records are duplicated in the HAP Emissions List (key=fac_id, source_id, pollutant):"
            for d in duplicates:
            return None

        hapfids = set(df[fac_id])
        if self.fac_ids.intersection(hapfids) != self.fac_ids:
                "Based on your Facility List Options file, the HAP Emissions List is missing "
                "one or more facilities. Please correct one or both files and upload again."
                "Missing facilities",
                "Based on your Facility List Options file, the HAP Emissions List is missing "
                "one or more facilities. Please correct one or both files and upload again."
            return None

        if len(df.loc[(df[source_id] == '')]) > 0:
                "One or more source IDs are missing in the HAP Emissions List."
                "Missing source IDs",
                "One or more source IDs are missing in the HAP Emissions List."
            return None

        if len(df.loc[(df[pollutant] == '')]) > 0:
                "One or more pollutants are missing in the HAP Emissions List."
                "Missing pollutants",
                "One or more pollutants are missing in the HAP Emissions List."
            return None

        # ----------------------------------------------------------------------------------
        # Defaulted: Invalid values in these columns will be replaced with a default.
        # ----------------------------------------------------------------------------------
        for index, row in df.iterrows():

            facility = row[fac_id]

            if row[emis_tpy] < 0:
                Logger.logMessage("Facility " + facility +
                                  ": emissions value " + str(row[emis_tpy]) +
                                  " out of range. Defaulting to 0.")
                row[emis_tpy] = 0

            if row[part_frac] < 0 or row[part_frac] > 1:
                Logger.logMessage("Facility " + facility +
                                  ": particulate fraction value " +
                                  str(row[part_frac] * 100) +
                                  " out of range. Defaulting to 0.")
                row[part_frac] = 0

            df.loc[index] = row

        # verify pollutants are present in dose library
        master_list = list(self.haplib.dataframe[pollutant])
        lower = [x.lower() for x in master_list]

        user_haps = set(df[pollutant])
        missing_pollutants = []

        for hap in user_haps:
            if hap.lower() not in lower:

        self.log = []
        # if there are any missing pollutants...
        if len(missing_pollutants) > 0:
            fix_pollutants = messagebox.askyesno(
                "Missing Pollutants in Dose " + "Response Library", "The " +
                "following pollutants were " + "not found in HEM4's Dose " +
                "Response Library: " + ', '.join(missing_pollutants) +
                ".\n Would you like to amend " + "your HAP Emissions file?" +
                "(they will be removed " + "otherwise). ")

            if fix_pollutants:
                    "Aborting upload of HAP emissions pending resolution of missing pollutants."
                    "Aborting upload",
                    "Aborting upload of HAP emissions pending resolution of missing pollutants."
                return None
                missing = missing_pollutants
                remove = set(missing)
                    "Removing these pollutants, which were not found: " +
                    "[{0}]".format(", ".join(
                        str(i) for i in missing_pollutants)))

                # remove them from data frame
                # to separate log file the non-modeled HAP Emissions
                fileDir = os.path.dirname(os.path.realpath('__file__'))
                filename = os.path.join(fileDir, "output\DR_HAP_ignored.log")
                logfile = open(filename, 'w')

                logfile.write(str(datetime.now()) + ":\n")

                for p in remove:

                    df = df[df[pollutant] != str(p)]

                    # record upload in log
                    # add another essage to say the following pollutants were assigned a generic value...
                    self.log.append("Removed " + p +
                                    " from hap emissions file\n")

                    # get row so we can write facility and other info
                    ignored = df[df[pollutant] == p]

                    logfile.write("Removed: " + str(ignored))


        Logger.logMessage("Uploaded HAP emissions file for " + str(len(df)) +
                          " source-HAP combinations.\n")
        return df
    def validate(self, df):
        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        if len(df.loc[(df[fac_id] == '')]) > 0:
                "One or more facility IDs are missing in the Emissions Variations List."
                "Missing facility IDs",
                "One or more facility IDs are missing in the Emissions Variations List."
            return None

        if len(df.loc[(df[source_id] == '')]) > 0:
                "One or more source IDs are missing in the Emissions Variations List."
                "Missing source IDs",
                "One or more source IDs are missing in the Emissions Variations List."
            return None

        val_list = []
        for index, row in df.iterrows():
            facility = row[fac_id]

            valid = [
                'SEASON', 'MONTH', 'HROFDY', 'WSPEED', 'SEASHR', 'HRDOW',
                'HRDOW7', 'SHRDOW', 'SHRDOW7', 'MHRDOW', 'MHRDOW7'
            if row['variation'] not in valid:
                Logger.logMessage("Facility " + facility +
                                  ": variation value invalid.")
                    "Variation invalid",
                    "Facility " + facility + ": variation value invalid.")

                return None
        # Confirm that all facilities needing emission variation according to the Facility List
        # are in the emission variation file.

        print("still going?")
        # facilities in emission variation file
        var_facs = set(df[fac_id])

        # facilities needing emission variation
        faclist_facs = set(
            self.faclist_df[self.faclist_df['emis_var'] == 'Y'][fac_id])

        if faclist_facs.issubset(var_facs) == False:
            missing = faclist_facs - var_facs
                "One or more facilities in the Facility List file that need " +
                "emission variation are not in the emission variation file. These facilities are: "
                + ", ".join(missing) +
                ". Please edit the emission variation file or Facility List file."
                "Missing facilities in Emission Variation",
                "One or more facilities in the Facility List file that need " +
                "emission variation are not in the emission variation file. These facilities are: "
                + ", ".join(missing) +
                ". Please edit the emission variation file or Facility List file."
            return None

        # Make sure all facility/source ids from emission variation file are also in
        # the emission location file

        # facility/source ids from emission variation file
        var_ids = set(df[[fac_id, source_id]].apply(lambda x: ','.join(x),

        # facility/source ids from emission location file
        model_ids = set(self.emisloc_df[[fac_id, source_id
                                         ]].apply(lambda x: ','.join(x),

        if len(set(var_ids).difference(set(model_ids))) > 0:
            missing = set(var_ids).difference(set(model_ids))

                "Missing Emission Location", "The emission " +
                "variation file indicates variation for facility/source ids " +
                ", ".join(missing) + " which are not in the " +
                "emissions location file. Please edit " +
                "the emissions variation or emissions location " + " file.")
            return None

        vtype = df['variation'].tolist()

        if 'SEASON' in vtype:

            # check that seasonal variaton only has 4 values
            seasons = df[df['variation'].str.upper() == 'SEASON']
            s_wrong = []
            for row in seasons.iterrows():
                if len(row[1].dropna().values[3:]) != 4:

            if len(s_wrong) > 0:
                    "Seasonal Emissions Variation",
                    "Seasonal emissions variations require 4 " +
                    "values. Sources: " + ", ".join(s_wrong) +
                    " do not have the correct number of values. " +
                    "Please update your Emission Variation File.")
                return None

        # check wind speed is only 6 values
        if 'WSPEED' in vtype:
            wspeed = df[df['variation'].str.upper() == 'WSPEED']
            w_wrong = []
            for row in wspeed.iterrows():
                if len(row[1].dropna().values[3:]) != 6:

            if len(w_wrong) > 0:
                    "Wind Speed Emissions Variation",
                    "Wind speed emissions variations require 6 " +
                    "values. Sources: " + ", ".join(w_wrong) +
                    " do not have the correct number of values. " +
                    "Please update your Emission Variation File.")
                return None

        # make sure the monthly emissions variation has 12 values
        if 'MONTH' in vtype:
            month = df[df['variation'].str.upper() == 'MONTH']
            m_wrong = []
            for row in month.iterrows():
                if len(row[1].dropna().values[3:]) != 12:

            if len(m_wrong) > 0:
                    "Monthly Emissions Variation",
                    "Monthly emissions variations require 12 " +
                    "values. Sources: " + ", ".join(m_wrong) +
                    " do not have the correct number of values. " +
                    "Please update your Emission Variation File.")
                return None

        if 'HROFDY' in vtype or 'SEASHR' in vtype or 'SHRDOW' in vtype or 'SHRDOW7' in vtype:
            other = df[~df['variation'].isin(['MONTH', 'WSPEED', 'SEASON'])]
            variation = other[other.columns[3:]].values

            o_wrong = 0
            for row in variation:
                if len(row) != 12:
                    o_wrong += 1

            if o_wrong > 0:
                    "Emissions Variation Error",
                    "One of the emissions variations type does " +
                    "not have the correct number of values. " +
                    "Please check your input file to make all " +
                    "values are either a multiple or factor " + "of 12.")
                return None

        Logger.logMessage("Uploaded emissions variations for [" +
                          ",".join(var_ids) + "]\n")
        return df
    def validate(self, df):

        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        if len(df.loc[(df[fac_id] == '')]) > 0:
                "One or more facility IDs are missing in the Downwash List.")
                "Missing Facility IDs",
                "One or more facility IDs are missing in the Downwash List.")
            return None

        if len(df.loc[(df[source_id] == '')]) > 0:
                "One or more source IDs are missing in the Downwash List.")
                "Missing source IDs",
                "One or more source IDs are missing in the Downwash List.")
            return None

        duplicates = self.duplicates(df, [fac_id, source_id, keyword])
        if len(duplicates) > 0:
                "One or more records are duplicated in the Downwash List (key=fac_id, source_id, keyword):"
                "Duplicate records",
                "One or more records are duplicated in the Downwash List (key=fac_id, source_id, keyword)"
            for d in duplicates:
            return None

        for index, row in df.iterrows():

            if row[section] != 'SO':
                Logger.logMessage("Invalid section " + str(row[section]) + ".")
                    "Invalid section",
                    "Invalid section " + str(row[section]) + ".")
                return None

            valid = ['BUILDHGT', 'BUILDWID', 'BUILDLEN', 'XBADJ', 'YBADJ']
            if row[keyword] not in valid:
                Logger.logMessage("Invalid keyword " + str(row[keyword]) + ".")
                    "Invalid keyword",
                    "Invalid keyword " + str(row[keyword]) + ".")
                return None

            constrained = ['BUILDHGT', 'BUILDWID', 'BUILDLEN']
            for num in range(1, 37):
                field = "value_" + str(num)

                if row[keyword] in constrained and row[field] < 0:
                    Logger.logMessage("Invalid down wash value " +
                                      str(row[field]) + ".")
                        "INvalid values",
                        "Invalid down wash value " + str(row[field]) + ".")
                    return None

        # check for unassigned downwash
        check_downwash_assignment = set(df[fac_id])

        find_d = self.faclist_df[self.faclist_df[bldg_dw] == "Y"]
        d_fac = set(find_d[fac_id])

        if d_fac.issubset(check_downwash_assignment) == False:

            downwash_unassigned = d_fac - check_downwash_assignment

                "Building downwash parameters for facilities, " +
                ", ".join(downwash_unassigned) + ", have not" +
                " been assigned. Please edit the" +
                " 'bldgdw' column in the Facilities List Option" +
                " file or add these facilities to the downwash file.")

                "Unassigned building downwash",
                "Building " + "downwash parameters for facilities, " +
                ", ".join(downwash_unassigned) + ", have not" +
                " been assigned. Please edit the" +
                " 'bldgdw' column in the Facilities List Option" +
                " file or add these facilities to the downwash file.")
            return None

        # Downwash can only be used on point sources
        all_dfids = set(df[fac_id])
        dfids_2use = list(all_dfids.intersection(d_fac))
        dsources_df = df[df[fac_id].isin(dfids_2use)][[fac_id, source_id]]
        d_in_e = pd.merge(self.emisloc_df,
                          on=[fac_id, source_id])
        d_in_e_srctypes = set(d_in_e['source_type'])
        invalid_srctypes = ['V', 'B', 'A', 'N', 'I']
        if any(t in d_in_e_srctypes for t in invalid_srctypes):
                "AERMOD models building downwash from point sources only " +
                "(i.e., vertical P, horizontal H, or capped C point sources). "
                "Your building dimensions file includes non-point sources. " +
                "Please edit your building dimensions file to remove all non-point sources."

                "Invalid sources",
                "AERMOD models building downwash from point sources only " +
                "(i.e., vertical P, horizontal H, or capped C point sources). "
                "Your building dimensions file includes non-point sources. " +
                "Please edit your building dimensions file to remove all non-point sources."

            return None

        # Also check that if downwash was called for, then the downwash file contains point source sourceids found in the emisloc file
        if d_in_e.empty:
                "Building downwash is called for, but the downwash file does not contain any point source type "
                "source id's. Please add appropriate source id's to the downwash file or turn off downwash."

                "No downwash sources",
                "Building downwash is called for, but the downwash file does not contain any point source type "
                "source id's. Please add appropriate source id's to the downwash file or turn off downwash."

            return None

        Logger.logMessage("Uploaded building downwash parameters for [" +
                          ",".join(check_downwash_assignment) + "]\n")
        return df
    def validate(self, df):

        # ----------------------------------------------------------------------------------
        # Strict: Invalid values in these columns will cause the upload to fail immediately.
        # ----------------------------------------------------------------------------------
        duplicates = self.duplicates(df, [pollutant])
        if len(duplicates) > 0:
            Logger.logMessage("One or more records are duplicated in the Target Organs file (key=pollutant):")
            for d in duplicates:

            Logger.logMessage("Please remove the duplicate records and restart HEM4.")
            return None
            # Verify that no non-cancer causing pollutants are missing
            haplib = self.haplib_df.loc[self.haplib_df[rfc] > 0]
            pollutants = set(haplib[pollutant])
            organ_pollutants = set(df[pollutant].unique())

            if not pollutants.issubset(organ_pollutants):
                Logger.logMessage("There are non-cancer causing pollutants in the Dose Response file that are not " +
                                  "present in the Target Organs file:")
                diff = pollutants - organ_pollutants
                for d in diff:
                Logger.logMessage("Please augment the Target Organs file with these pollutants and restart HEM4.")
                return None

            return df