def update_db(self): """Update database to most recent day of year""" data_url_list = self.generate_data_url() weather = dict() for url in data_url_list: print('Scraping data from: ') print(url) myparser = WeatherScraper() with urllib.request.urlopen(url) as response: html = str(response.read()) myparser.feed(html) weather.update(myparser.temps_data) db = DBOperations() db.update_database(weather)
def main(self): """ When the program starts, prompt the user to download a full set of weather data, or to update it (optional). • Then prompt the user for a year range of interest (from year, to year). • Use this class to launch and manage all the other tasks. """ user_selection = '' while user_selection != '4': try: print("1. Update a set of weather data up to today") print("2. Download a full set of weather data") print("3. A year range of interest (from year, to year)") print("4. Exit") user_selection = input("Please make your choice...") if user_selection == '1': try: my_scraper = WeatherScraper() now_date = datetime.datetime.now() is_loop = False for i in range(now_date.year, now_date.year - 1, -1): my_scraper.url_year = i if is_loop: break for j in range(now_date.month - 2, now_date.month + 1): my_scraper.url_month = j my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#" with urllib.request.urlopen( my_url) as response: html = str(response.read()) my_scraper.feed(html) if my_scraper.is_equal is False: is_loop = True break # print(f"inner{my_scraper.dict_Inner}") # print(f"outer{my_scraper.dict_outer}") my_database = DBOperations() my_database.create_table(my_scraper.dict_outer) except Exception as e: print( "Error in Updating a set of weather data up to today: ", e) elif user_selection == '2': try: my_scraper = WeatherScraper() now_date = datetime.datetime.now() is_loop = False for i in reversed(range(now_date.year)): my_scraper.url_year = i if is_loop: break for j in range(0, 13): my_scraper.url_month = j my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#" with urllib.request.urlopen( my_url) as response: html = str(response.read()) my_scraper.feed(html) if my_scraper.is_equal is False: is_loop = True break # print(f"inner{my_scraper.dict_Inner}") # print(f"outer{my_scraper.dict_outer}") my_database = DBOperations() my_database.create_table(my_scraper.dict_outer) except Exception as e: print( "Error in downloading a full set of weather data: ", e) elif user_selection == '3': try: range_value = input( "Please select a RANGE of your interest(e.g 2017 2019): " ) range_value = range_value.split() my_database = DBOperations() dict_value = my_database.query_infos( range_value[0], range_value[1]) my_plot_operation = PlotOperations() my_plot_operation.diplay_box_plot( dict_value, range_value[0], range_value[1]) except Exception as e: print( "Error in A year range of interest (from year, to year): ", e) elif user_selection == '4': break else: print("Invalid choice") except Exception as e: print("Error plot_operations.py: ", e)
class WeatherProcessor: """ This class manages the user interaction to generate plots and update the data. """ def __init__(self): """ Initialize classes and variables for db operations, web scraping, plot operations and UI configuration """ try: self.db = DBOperations("weather.sqlite") self.ws = WeatherScraper() self.pl = PlotOperations() self.last_updated = self.db.fetch_last( )[0]["sample_date"] if self.db.is_table_exist() else "" self.first_updated = self.db.fetch_first( )[0]["sample_date"] if self.db.is_table_exist() else "" except Exception as e: logging.error(f"weatherprocessor:__init__, {e}") def download_data(self): """ Clears the database, reinitializes it, then downloads all the data to it. """ try: self.db.purge_data() self.db.initialize_db() self.collect_data() except Exception as e: logging.error(f"weatherprocessor:download_data, {e}") def update_data(self): """ Ensures the database exists then downloads all the data up to the most recent date in the database. """ try: self.db.initialize_db() self.collect_data() self.last_updated = self.db.fetch_last()[0]["sample_date"] except Exception as e: logging.error(f"weatherprocessor:update_data, {e}") def get_box_plot(self, start_year, end_year): """ Fetches data within the users inputted range then generates a box plot for the mean temperatures of each month. """ try: weather = self.db.fetch_data(start_year, int(end_year) + 1, False) self.pl.generate_box_plot(weather, start_year, end_year) except Exception as e: logging.error(f"weatherprocessor:get_box_plot, {e}") def get_line_plot(self, year, month): """ User inputs the month and year of the data to be fetched then generates a line plot for the daily mean temperatures of that month. """ try: weather = self.db.fetch_data(year, month, True) self.pl.generate_line_plot(weather, year, month) except Exception as e: logging.error(f"weatherprocessor:get_line_plot, {e}") def collect_data(self): """ This method collects the data by looping through and prepping for save, Get the current date and break it down into variables, Query db for the latest recorded data by date, Call the scraper class to collect necessary data, Stop collecting after duplicates are found. """ try: today = date.today() year = int(today.strftime("%Y")) month = int(today.strftime("%m")) duplicate_month, duplicate_day = False, False recent_date = "" dates = self.db.fetch_last() if len(dates) > 0: recent_date = dates[0]["sample_date"] while not duplicate_month and not duplicate_day: """ Iterates through each year starting with the latest and working backwards until duplicate data is found. """ try: month_dict = dict() while not duplicate_day and month > 0: """ Iterate through each month starting with the latest and working backwards until duplicate data is found. """ try: url = self.ws.get_url(year, month) with urllib.request.urlopen(url) as response: html = str(response.read()) self.ws.feed(html) month_dict[month] = self.ws.return_dict() if month + 1 in month_dict.keys( ) and month_dict[month] == month_dict[month + 1]: """Checks if month is the same as the prior month. Used for download_data """ month_dict.popitem() duplicate_month = True break if recent_date != "": temp_dict = {} for key, value in reversed( month_dict[month].items()): """Iterates through each months data enusring there is not a duplicate in the database.""" try: check_date = f"{year}-{month:02d}-{key}" if check_date == recent_date: duplicate_day = True break temp_dict[key] = value except Exception as e: logging.error( f"weatherprocessor:collect_data:loop:loop2:loop3, {e}" ) month_dict[month] = temp_dict self.db.save_data(month_dict[month], month, year) month -= 1 except Exception as e: logging.error( f"weatherprocessor:collect_data:loop:loop2, {e}" ) pub.sendMessage('update_latest_download', year=str(year)) month = 12 year -= 1 except Exception as e: logging.error(f"weatherprocessor:collect_data:loop, {e}") except Exception as e: logging.error(f"weatherprocessor:collect_data, {e}") def get_years_for_dropdown(self, min_year): """Retrieves the years for the combo boxes based on a given min_year.""" try: years = [] if self.db.is_table_exist(): self.last_updated = self.db.fetch_last( )[0]["sample_date"] if self.db.is_table_exist() else "" self.first_updated = self.db.fetch_first( )[0]["sample_date"] if self.db.is_table_exist() else "" if min_year == "": firstyear = int(self.first_updated[:4]) else: firstyear = int(min_year) lastyear = int(self.last_updated[:4]) while firstyear <= lastyear: """Starting from the first year add each year to the years list.""" try: years.append(str(firstyear)) firstyear += 1 except Exception as e: logging.error( f"weatherprocessor:get_years_for_dropdown:loop, {e}" ) return years except Exception as e: logging.error(f"weatherprocessor:get_years_for_dropdown, {e}") def get_months_for_dropdown(self, year): """Retrieves the months for the month combo box based on the selected year.""" try: months = [] if self.db.is_table_exist(): self.first_updated = self.db.fetch_first( )[0]["sample_date"] if self.db.is_table_exist() else "" if year == "": year = int(self.first_updated[:4]) data = self.db.fetch_months(year) for item in data: """Goes through the list of returned data""" try: for value in item.values(): """Adds each month to a list of months.""" try: months.append(str(value[-2:])) except Exception as e: logging.error( f"weatherprocessor:get_months_for_dropdown:loop:loop2, {e}" ) except Exception as e: logging.error( f"weatherprocessor:get_months_for_dropdown:loop, {e}" ) return months[::-1] except Exception as e: logging.error(f"weatherprocessor:get_months_for_dropdown, {e}")