Пример #1
0
    def receive_and_format_data(self,
                                year: int,
                                specific_month: int = 0) -> dict:
        """
        input year and output a dictionary
        output when month is not specified:
            daily mean temperatures of each month for the year
            weather_data = {1: [1.1, 5.5, 6.2, 7.1], 2: [8.1, 5.4, 9.6, 4.7]}
            The dictionary key is the month: January = 1, February = 2 etc...
        output when month is specified:
            weather_data = {1: 1.1, 2: 8.1}
            The dictionary key is the day
        """
        db_operations = DBOperations(self.db_name)
        weather_data = db_operations.fetch_data(self.table_name, year)

        mean_temps_for_plot = {}
        if not specific_month:
            for daily_temps in weather_data:
                month = daily_temps[1][5:7]
                mean_temp = daily_temps[-1]

                if int(month) in mean_temps_for_plot:
                    mean_temps_for_plot[int(month)].append(mean_temp)
                else:
                    mean_temps_for_plot[int(month)] = [mean_temp]
        else:
            for daily_temps in weather_data:
                month = daily_temps[1][5:7]
                day = daily_temps[1][-2:]
                mean_temp = daily_temps[-1]

                if int(month) == specific_month:
                    mean_temps_for_plot[int(day)] = mean_temp
        return mean_temps_for_plot
Пример #2
0
    def __init__(self):
        """ Initialize classes and variables for db operations, web scraping, plot operations and UI configuration """
        try:
            self.db = DBOperations("weather.sqlite")
            self.ws = WeatherScraper()
            self.pl = PlotOperations()
            self.last_updated = self.db.fetch_last(
            )[0]["sample_date"] if self.db.is_table_exist() else ""
            self.first_updated = self.db.fetch_first(
            )[0]["sample_date"] if self.db.is_table_exist() else ""

        except Exception as e:
            logging.error(f"weatherprocessor:__init__, {e}")
Пример #3
0
 def update_db(self):
     """Update database to most recent day of year"""
     data_url_list = self.generate_data_url()
     weather = dict()
     for url in data_url_list:
         print('Scraping data from: ')
         print(url)
         myparser = WeatherScraper()
         with urllib.request.urlopen(url) as response:
             html = str(response.read())
         myparser.feed(html)
         weather.update(myparser.temps_data)
     db = DBOperations()
     db.update_database(weather)
    def boxplot(self, year_one, year_two):
        """Method that creates a box plot of mean temperatures in a range of years based off user input."""
        try:
            db = DBOperations()
            data = db.fetch_data_boxplot(year_one, year_two)
            jan, feb, mar, apr, may, jun, jul, aug, sep, oct, nov, dec = [], [], [], [], [], [], [], [], [], [], [], []

            for key, value in data.items():
                try:
                    if value != 'N/A':
                        if key[5:7] == '01':
                            jan.append(value)
                        elif key[5:7] == '02':
                            feb.append(value)
                        elif key[5:7] == '03':
                            mar.append(value)
                        elif key[5:7] == '04':
                            apr.append(value)
                        elif key[5:7] == '05':
                            may.append(value)
                        elif key[5:7] == '06':
                            jun.append(value)
                        elif key[5:7] == '07':
                            jul.append(value)
                        elif key[5:7] == '08':
                            aug.append(value)
                        elif key[5:7] == '09':
                            sep.append(value)
                        elif key[5:7] == '10':
                            oct.append(value)
                        elif key[5:7] == '11':
                            nov.append(value)
                        elif key[5:7] == '12':
                            dec.append(value)
                except Exception as e:
                    logging.error(e)

            data = [jan, feb, mar, apr, may, jun, jul, aug, sep, oct, nov, dec]

            plt.title('Monthly Temperature Distribution for: {} to {}'.format(
                year_one, year_two))
            plt.ylabel('Temperature(Celsius)')
            plt.xlabel('Month')
            plt.boxplot(data)
            plt.show()

        except Exception as e:
            logging.error(e)
Пример #5
0
    def generate_box_plot(self, start_year: int, end_year: int) -> dict:
        """
        Generate a box plot by years data.
        :param end_year: starting year for box plotting
        :param start_year: ending year for line plotting
        :return: returns the generated box plot images' saving paths class instance
        """
        try:
            print('Generate a BOX PLOT between years[{0}-{1}]...'.format(
                start_year, end_year))

            my_db = DBOperations('weather.sqlite')
            years_data_list = []
            for current_year in range(start_year, end_year + 1):
                years_data_list.extend(my_db.fetch_data(current_year))

            monthly_weather_data = {
            }  # format: [1:[Jan temps],2:[Feb temps],..,12:[Dec temps]]
            for month in range(1, 13):
                if month not in monthly_weather_data:
                    monthly_weather_data[month] = []

            for item in years_data_list:
                if is_number(item[5]):
                    monthly_weather_data[int(item[1][5:7])].append(
                        float(item[5]))

            plot_title = 'Monthly Temperature Distribution for: ' + str(
                start_year) + ' to ' + str(end_year)
            plt.boxplot(monthly_weather_data.values(), sym="o", whis=1.5)
            plt.xlabel('Month')
            plt.ylabel('Temperature (Celsius)')
            plt.title(plot_title)
            file_name = str(start_year) + '_to_' + str(end_year) + '.png'

            # Create new directory
            output_dir = "images"
            mkdir_p(output_dir)
            file_path = '{0}/{1}'.format(output_dir, file_name)
            self.box_plot_path_saving_dict[str(start_year) + '-' +
                                           str(end_year)] = file_path

            plt.savefig(file_path)
            plt.show()
            return self.box_plot_path_saving_dict
        except Exception as e:
            self.logger.error(e)
Пример #6
0
 def clear_db_and_install_all_weather_data(self, event):
     " clear db and install all weather data "
     myweather = WeatherScraper()
     myweather.start_scraping()
     weather_data_from_weather_scraper = myweather.weather
     db_operations = DBOperations(self.db_name)
     db_operations.initialize_db(self.table_name)
     db_operations.purge_data(self.table_name)
     db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
Пример #7
0
    def generate_line_plot(self, specific_year: int,
                           specific_month: int) -> dict:
        """
        Generate a line plot by month data.
        :param specific_month: the chosen month for line plotting
        :param specific_year: the chosen year for line plotting
        :return: returns the generated line plot images' saving paths class instance
        """
        try:
            print('Generate a Line PLOT for [{0}-{1}]...'.format(
                specific_year, specific_month))
            month_string_list = [
                'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
                'Oct', 'Nov', 'Dec'
            ]
            my_db = DBOperations('weather.sqlite')
            specific_timestamp = []  # 2020-12-01
            specific_month_data = []

            month_data = my_db.fetch_data(specific_year, specific_month)
            for item in month_data:
                if is_number(item[5]):
                    specific_timestamp.append(float(item[1][-2:]))
                    specific_month_data.append(float(item[5]))

            plt.plot(specific_timestamp, specific_month_data)
            plt.xlabel('Day')
            plt.ylabel('Temperature (Celsius)')
            plot_title = 'Daily Temperature Distribution for: ' + month_string_list[
                specific_month - 1] + ' ' + str(specific_year)
            plt.title(plot_title)
            file_name = str(specific_year) + '-' + str(specific_month) + '.png'

            # Create new directory
            output_dir = "images"
            mkdir_p(output_dir)
            file_path = '{0}/{1}'.format(output_dir, file_name)

            self.line_plot_path_saving_dict[str(specific_year) + '-' +
                                            str(specific_month)] = file_path
            plt.savefig(file_path)
            plt.show()

            return self.line_plot_path_saving_dict
        except Exception as e:
            self.logger.error(e)
Пример #8
0
    def db_selection():
        """This is to create selection for update or download fullset"""
        print('Please select from (1)download a full set of'
              ' weather data or (2)update it')
        x = int(input())

        if x != 1 and x != 2:
            raise Exception()

        if(x == 1):
            full_set = DBOperations()
            full_set.create_database()
        if(x == 2):
            test = WeatherProcessor()
            print('Most recent date in database is', test.newest_date)
            print("Updating Database to today's date")
            test.update_db()
            print("Updating Database successed")
Пример #9
0
 def scrape_and_save_weather_data(self, end_year:int = 0, end_month:int = 12):
     if not end_year:
         today = date.today()
         end_year = today.year
         end_month = today.month
     myweather = WeatherScraper()
     myweather.start_scraping('url', end_year, end_month)
     weather_data_from_weather_scraper = myweather.weather
     db_operations = DBOperations(self.db_name)
     db_operations.initialize_db(self.table_name)
     db_operations.purge_data(self.table_name)
     db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
 def __init__(self, master=None):
     """Runs standard setup functions, and creates the baseline box for the application"""
     super().__init__(master)
     self.master = master
     self.master.geometry('900x500')
     self.place(relx=0.5, rely=0.5, anchor=tk.CENTER)
     self.db_status_text = tk.StringVar()
     self.line_month = tk.StringVar(self)
     # widgets related variables
     self.db_status_label = None
     self.start_year_entry = None
     self.end_year_entry = None
     self.box_plot_error = None
     self.month_entry = None
     self.year_entry = None
     self.line_plot_error = None
     # generating Widgets
     self.create_widgets()
     self.db_ops = DBOperations()
     self.db_ops.initialize_db()
    def linegraph(self, year, month):
        """Method that creates a lineplot of daily temperature means for a specific year and month based off user input"""

        try:
            db = DBOperations()
            data = db.fetch_data_lineplot(year, month)
            temps = []

            for key, value in data.items():
                try:
                    if value != 'N/A':
                        temps.append(value)
                except Exception as e:
                    logging.error(e)

            plt.title('Daily Mean Temperatures for: {} - {}'.format(
                year, month))
            plt.plot(temps)
            plt.ylabel('Temperature(Celsius)')
            plt.xlabel('Day')
            plt.show()
        except Exception as e:
            logging.error(e)
Пример #12
0
    def update_db(self, event):
        " install missing weather data "
        myweather = WeatherScraper()
        with DBOperations(self.db_name) as dbcm:
            dbcm.execute(f"select max(sample_date) from {self.table_name};")
            latest_date = dbcm.fetchall()[0][0]

        print('latest date in db', latest_date)
        myweather.start_scraping(latest_date)
        weather_data_from_weather_scraper = myweather.weather
        db_operations = DBOperations(self.db_name)
        db_operations.initialize_db(self.table_name)
        db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
Пример #13
0
 def __init__(self):
     self.my_db = DBOperations('weather.sqlite')
     self.my_db.initialize_db()
     self.cut_off = '****************************************************************************'
     self.invalid_input_str = 'Sorry, your input is not validated, please try again.'
     self.logger = logging.getLogger()
Пример #14
0
class WeatherProcessor:
    def __init__(self):
        self.my_db = DBOperations('weather.sqlite')
        self.my_db.initialize_db()
        self.cut_off = '****************************************************************************'
        self.invalid_input_str = 'Sorry, your input is not validated, please try again.'
        self.logger = logging.getLogger()

    def exe_welcome(self):
        """
        Welcome menu.
        :return:
        """
        try:
            print(self.cut_off)
            print('Welcome to Weather Scraper App!')
            print(
                'There are weather data between [{0}] and [{1}] in the database.'
                .format(self.my_db.fetch_earliest_one()[0][0],
                        self.my_db.fetch_last_one()[0][0]))
            self.exe_menu_0()
        except Exception as e:
            self.logger.error(e)

    def exe_menu_0(self):
        """
        Main menu.
        :return:
        """
        try:
            print(self.cut_off)
            print('What do you want to do?')
            menu = {
                '1': 'Fetch all new data from the website.',
                '2':
                'Update data between today and the latest date in the database.',
                '3': 'Generate a plot.',
                '4': 'Exit.'
            }
            options = menu.keys()
            for entry in options:
                print(entry, menu[entry])

            while True:
                selection = input(
                    'Please input the number of the options[1,2,3,4]: ')

                if selection == '1':
                    self.exe_menu_0_1()
                elif selection == '2':
                    self.exe_menu_0_2()
                elif selection == '3':
                    self.exe_menu_0_3()
                elif selection == '4':
                    sys.exit()
                else:
                    print(self.invalid_input_str)
        except Exception as e:
            self.logger.error(e)

    def exe_menu_0_1(self):
        """
        Fetch all new data menu:
        :return:
        """
        try:
            print(self.cut_off)
            print(
                'Are you sure you want to fetch all new data from the website?'
            )

            while True:
                selection = input(
                    'It will take several minutes [Y/N] :').lower()

                if selection == 'y':
                    self.exe_menu_0_1_1()
                    self.exe_menu_0()
                elif selection == 'n':
                    self.exe_menu_0()
                else:
                    print(self.invalid_input_str)
        except Exception as e:
            self.logger.error(e)

    def exe_menu_0_1_1(self):
        """
        Processing of fetching all new data.
        :return:
        """
        try:
            print(self.cut_off)
            print(
                'Fetching all new data from the website. It will take several minutes...'
            )
            self.renew_all_data()
        except Exception as e:
            self.logger.error(e)

    def exe_menu_0_2(self):
        """
        Fetch the gap data menu.
        :return:
        """
        try:
            print(self.cut_off)
            print('The last day in the database is: [{0}]'.format(
                self.my_db.fetch_last_one()[0][0]))
            print('Today is: [{0}]'.format(date.today()))
            print('Fetching the missing data from the website...')
            self.fill_missing_data()
            self.exe_menu_0()
        except Exception as e:
            self.logger.error(e)

    def exe_menu_0_3(self):
        """
        Plot menu.
        :return:
        """
        try:
            print(self.cut_off)
            print('What the kind of plots you want?')

            menu = {
                '1': 'Generate a BOX PLOT between a year range.',
                '2': 'Generate a LINE PLOT for an assigned month.',
                '3': 'Return to main menu.',
                '4': 'Exit.'
            }
            options = menu.keys()
            for entry in options:
                print(entry, menu[entry])

            while True:
                selection = input(
                    'Please input the number of the options[1,2,3,4]: ')

                if selection == '1':
                    self.exe_menu_0_3_1()
                elif selection == '2':
                    self.exe_menu_0_3_2()
                elif selection == '3':
                    self.exe_menu_0()
                elif selection == '4':
                    sys.exit()
                else:
                    print(self.invalid_input_str)
        except Exception as e:
            self.logger.error(e)

    def exe_menu_0_3_1(self):
        """
        Box plot menu.
        :return:
        """
        try:
            print(self.cut_off)
            print(
                'You are trying to generate a BOX PLOT between a year range:')
            start_year_input_flag = True
            end_year_input_flag = True
            start_year = 0
            end_year = 0

            while start_year_input_flag:
                year_input = input(
                    'Enter the start year[from 1996 to now, c for Cancel]: '
                ).lower()
                if is_int(year_input
                          ) and 1996 <= int(year_input) <= date.today().year:
                    start_year = int(year_input)
                    start_year_input_flag = False
                elif year_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            while end_year_input_flag:
                year_input = input(
                    'Enter the end year[from 1996 to now, c for Cancel]: '
                ).lower()
                if is_int(year_input
                          ) and 1996 <= int(year_input) <= date.today().year:
                    end_year = int(year_input)
                    end_year_input_flag = False
                elif year_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            if start_year > end_year:
                start_year, end_year = end_year, start_year

            self.generate_box_plot(start_year, end_year)
            self.exe_menu_0_3()
        except Exception as e:
            self.logger.error(e)

    def exe_menu_0_3_2(self):
        """
        Line plot menu.
        :return:
        """
        try:
            print(self.cut_off)
            print(
                'You are trying to generate a LINE PLOT for a specific month:')
            year_input_flag = True
            month_input_flag = True
            specific_year = 0
            specific_month = 0

            while year_input_flag:
                year_input = input(
                    'Enter the year[from 1996 to now, c for Cancel]: ').lower(
                    )
                if is_int(year_input
                          ) and 1996 <= int(year_input) <= date.today().year:
                    specific_year = int(year_input)
                    year_input_flag = False
                elif year_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            while month_input_flag:
                month_input = input(
                    'Enter the month[1-12, c for Cancel]: ').lower()
                if is_int(month_input) and 1 <= int(month_input) <= 12:
                    specific_month = int(month_input)
                    month_input_flag = False
                elif month_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            self.generate_line_plot(specific_year, specific_month)
            self.exe_menu_0_3()
        except Exception as e:
            self.logger.error(e)

    def renew_all_data(self):
        """
        Fetch all new data from website and cover the database.
        :return:
        """
        try:
            my_scraper = WeatherScraper()
            my_scraper.scrape_now_to_earliest_month_weather()
            self.my_db.purge_data()
            self.my_db.save_data(my_scraper.weather)
        except Exception as e:
            self.logger.error(e)

    def fill_missing_data(self):
        """
        Fetch the gap data from now to the last one in the database and just insert these data.
        :return:
        """
        try:
            last_one_date = self.my_db.fetch_last_one()[0][0]
            last_one_year = int(last_one_date[:4])
            last_one_month = int(last_one_date[5:7])

            year = date.today().year
            month = date.today().month
            my_scraper = WeatherScraper()

            if last_one_year == year and last_one_month == month:
                my_scraper.scrape_month_weather(year, month)
            while last_one_year != year and last_one_month != month:
                my_scraper.scrape_month_weather(year, month)
                month -= 1
                if month == 0:
                    year -= 1
                    month = 12

            self.my_db.save_data(my_scraper.weather)
        except Exception as e:
            self.logger.error(e)

    def generate_box_plot(self, start_year: int, end_year: int) -> None:
        """
        Generate a box plot for a year range.
        :param start_year:
        :param end_year:
        :return:
        """
        try:
            start_year_data = self.my_db.fetch_data(start_year)
            end_year_data = self.my_db.fetch_data(end_year)
            if not start_year_data:
                print(
                    'Warning: there is no data of year[{0}] in the database. Please update first.'
                    .format(start_year))
            elif not end_year_data:
                print(
                    'Warning: there is no data of year[{0}] in the database. Please update first.'
                    .format(end_year_data))
            else:
                my_plot = PlotOperations()
                my_plot.generate_box_plot(start_year, end_year)
        except Exception as e:
            self.logger.error(e)

    def generate_line_plot(self, specific_year: int,
                           specific_month: int) -> None:
        """
        Generate a line plot for a month.
        :param specific_year:
        :param specific_month:
        :return:
        """
        try:
            month_data = self.my_db.fetch_data(specific_year, specific_month)
            if not month_data:
                print(
                    'Warning: there is no data of [{0}-{1}] in the database. Please update first.'
                    .format(specific_year, specific_month))
            else:
                my_plot = PlotOperations()
                my_plot.generate_line_plot(specific_year, specific_month)
        except Exception as e:
            self.logger.error(e)
class Application(tk.Frame):
    """Application for the weather scraper scripts compiled for Programming In Python
       Sends a request to the governemnt of canada
       website http://climate.weather.gc.ca/climate_data/
       and gives users the option to display a month
       as a line graph, or a range of years as a box plot
    """
    def __init__(self, master=None):
        """Runs standard setup functions, and creates the baseline box for the application"""
        super().__init__(master)
        self.master = master
        self.master.geometry('900x500')
        self.place(relx=0.5, rely=0.5, anchor=tk.CENTER)
        self.db_status_text = tk.StringVar()
        self.line_month = tk.StringVar(self)
        # widgets related variables
        self.db_status_label = None
        self.start_year_entry = None
        self.end_year_entry = None
        self.box_plot_error = None
        self.month_entry = None
        self.year_entry = None
        self.line_plot_error = None
        # generating Widgets
        self.create_widgets()
        self.db_ops = DBOperations()
        self.db_ops.initialize_db()

    def create_widgets(self):
        """Calls the functions that create the widgets for specific actions"""
        tk.Label(self, text='Weather Processor', font=('Arial Bold', 22))\
            .grid(row=0, column=0, columnspan=4, pady=(10, 24))

        self.create_db_widgets()

        self.create_bloxplot_widgets()

        self.create_lineplot_widgets()

        # tk.Label(self, text="Line Plot:", font=("Arial", 16))\
        # .grid(row=3, column=2, columnspan=4, pady=(24, 10), padx=(10, 0), sticky=tk.W)

    def create_db_widgets(self):
        """Creates the widgets to allow for database actions: View, Deleting, and updating"""
        tk.Label(self, text='Database related Actions:', font=('Arial', 16))\
            .grid(row=1, column=0, columnspan=4, pady=(0, 10), sticky=tk.W)
        tk.Button(self, text="View All Data", command=self.view_all_data)\
            .grid(row=2, column=0)
        tk.Button(self, text="Update Database", command=self.update_db)\
            .grid(row=2, column=1)
        tk.Button(self, text="Purge all Data", command=self.purge_db)\
            .grid(row=2, column=2)
        self.db_status_label = tk.Label(self, textvariable=self.db_status_text)
        self.db_status_label.grid(row=3, column=0, columnspan=2)

    def create_bloxplot_widgets(self):
        """Creates the widgets to allow users to
           provide a start year, and end year, and
           to request a boxplot graph
        """
        tk.Label(self, text="Box Plot:", font=("Arial", 16))\
            .grid(row=4, column=0, columnspan=4, pady=(50, 10), sticky=tk.W)

        tk.Label(self, text="Start Year:")\
            .grid(row=5, column=0, pady=(10, 0), sticky=tk.W)
        self.start_year_entry = tk.Entry(self)
        self.start_year_entry.grid(row=6, column=0, sticky=tk.W)

        tk.Label(self, text="End Year:")\
            .grid(row=5, column=1, pady=(10, 0), sticky=tk.W)
        self.end_year_entry = tk.Entry(self)
        self.end_year_entry.grid(row=6, column=1, sticky=tk.W)

        tk.Button(self, text="Generate Blox Pot", command=self.generate_boxplot)\
            .grid(row=7, column=0, columnspan=2, sticky=tk.N+tk.S+tk.E+tk.W)

        self.box_plot_error = tk.Label(self, text=" ", fg="#ff0000")
        self.box_plot_error.grid(row=8, column=0, columnspan=2, sticky=tk.W)

    def create_lineplot_widgets(self):
        """Creates the widgets responsible for
           creating and graphing the lineplot for
           a given month
        """
        tk.Label(self, text="Line Plot:", font=("Arial", 16))\
            .grid(row=4, column=2, columnspan=4, pady=(50, 10), sticky=tk.W)

        tk.Label(self, text="Month:")\
            .grid(row=5, column=2, pady=(10, 0), sticky=tk.W)

        self.line_month.set("jan")  # default value

        self.month_entry = tk.OptionMenu(self, self.line_month, "jan", "feb",
                                         "mar", "apr", "may", "jun", "jul",
                                         "aug", "sep", "oct", "nov", "dec")
        self.month_entry.grid(row=6, column=2, sticky=tk.W)

        tk.Label(self, text="Year:")\
            .grid(row=5, column=3, pady=(10, 0), sticky=tk.W)
        self.year_entry = tk.Entry(self)
        self.year_entry.grid(row=6, column=3, sticky=tk.W)

        tk.Button(self, text="Generate Line Pot", command=self.generate_lineplot)\
            .grid(row=7, column=2, columnspan=2, sticky=tk.N+tk.S+tk.E+tk.W)

        self.line_plot_error = tk.Label(self, text=" ", fg="#ff0000")
        self.line_plot_error.grid(row=8, column=2, columnspan=2, sticky=tk.W)

    def view_all_data(self):
        """Shows all the data currently in the db"""
        new_window = tk.Toplevel(self)
        new_window.title("All Weather Data")
        new_window.geometry("900x500")
        tree = ttk.Treeview(new_window)
        # creating table columns
        tree["columns"] = ("date", "location", "min_temp", "max_temp",
                           "avg_temp")
        tree.column("#0", width=50, stretch=tk.YES, anchor=tk.W)
        tree.column("date",
                    width=110,
                    minwidth=100,
                    stretch=tk.YES,
                    anchor=tk.CENTER)
        tree.column("location", width=80, anchor=tk.W)
        tree.column("min_temp", width=150, stretch=tk.YES, anchor=tk.E)
        tree.column("max_temp", width=150, stretch=tk.YES, anchor=tk.E)
        tree.column("avg_temp", width=150, stretch=tk.YES, anchor=tk.E)
        # defining headings
        tree.heading("#0", text="ID")
        tree.heading("date", text="Sample Date")
        tree.heading("location", text="Location")
        tree.heading("min_temp", text="Minimum Temperature")
        tree.heading("max_temp", text="Maximum Temperature")
        tree.heading("avg_temp", text="Average Temperature")

        try:
            data = self.db_ops.get_all_data()
            for row in data:
                tree.insert("",
                            "end",
                            str(row[0]),
                            text=str(row[0]),
                            values=(str(row[1]), str(row[2]), str(row[3]),
                                    str(row[4]), str(row[5])))
        except Exception as e:
            print("ERROR: " + str(e))

        tree.pack(expand=1, fill=tk.BOTH)

    def update_db(self):
        """uses todays date to fetch all the data
           from the most recent date in the db
           to today
        """
        try:
            self.db_status_text.set(
                "Fetching the data and Updating the Database")
            get_latest_row = self.db_ops.get_latest_row()
            scraper = WeatherScraper()
            if get_latest_row == None:
                data = scraper.scrape_all_data()
                self.db_ops.save_data(data)
            else:
                latest_db_date = get_latest_row[1]
                today = datetime.today().strftime('%Y-%m-%d')
                if (today != latest_db_date and today > latest_db_date):
                    data = scraper.scrape_data(latest_db_date, today)
                    self.db_ops.save_data(data)
            self.db_status_text.set(" ")

        except Exception as e:
            print("ERROR: " + str(e))

    def purge_db(self):
        """Calls the function to drop all data from the db"""
        message_box = messagebox.askokcancel(
            title='Purge Data',
            message='Do you really want to delete all data?',
            icon='error')
        if message_box:
            self.db_ops.purge_data()

    def generate_boxplot(self):
        """Uses the data in the db to generate the requested boxplot"""
        start_year = self.start_year_entry.get()
        end_year = self.end_year_entry.get()
        try:
            start_year = int(start_year)
            end_year = int(end_year)
            if (start_year <= 0 or end_year <= 0):
                raise ValueError()
            elif start_year > end_year:
                self.box_plot_error[
                    'text'] = 'ERROR: Start Year can not be greater than end Year!'
            else:
                self.box_plot_error['text'] = ' '
                data = self.db_ops.fetch_data(
                    start_date=(str(start_year) + "-01-01"),
                    end_date=(str(end_year) + "-12-31"))
                boxplot_data = self.format_data_for_boxplot(data)
                plot_ops = PlotOperations(data=boxplot_data)
                plot_ops.show_boxplot()
        except Exception as e:
            if self.box_plot_error['text'] != ' ':
                self.box_plot_error['text'] = 'Please enter valid Year values!'
            print("ERROR :", str(e))

    def get_month_index(self, value):
        """Retuns a months index based on where it is in the year"""
        month = [
            "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep",
            "oct", "nov", "dec"
        ]
        return month.index(value) + 1

    def get_formatted_month(self, index):
        """Returns a formatted numerical value for a month"""
        if index < 10:
            return "0" + str(index)
        else:
            return str(index)

    def generate_lineplot(self):
        """Fetches data from the db, formats it, and then displays that data
           in a lineplot"""
        month_index = self.get_month_index(self.line_month.get())
        formatted_month = self.get_formatted_month(month_index)
        year = self.year_entry.get()
        try:
            year = int(year)
            if year <= 0:
                raise ValueError()
            else:
                self.line_plot_error['text'] = ' '
                data = self.db_ops.fetch_data(
                    start_date=(str(year) + "-" + formatted_month + "-01"),
                    end_date=(str(year) + "-" + formatted_month + "-31"))
                boxplot_data = self.format_data_for_lineplot(data)
                # passing empty data as this data is not used in generating line-plot
                plot_ops = PlotOperations(data={})
                print(boxplot_data)
                plot_ops.show_lineplot(boxplot_data, month_index, year)
        except Exception as e:
            if self.line_plot_error['text'] != ' ':
                self.line_plot_error[
                    'text'] = 'Please enter valid Month and Year values!'
            print("ERROR :", str(e))

    def format_data_for_boxplot(self, data):
        """Takes data from the db, and formats it for display in a boxplot"""
        return_data = {}
        try:
            for row in data:
                date = datetime.strptime(row[1], '%Y-%m-%d')
                year = date.year
                month = date.month
                if not return_data.get(year):
                    return_data[year] = {}
                if not return_data[year].get(month):
                    return_data[year][month] = []
                # if value is None, we are setting default value as 0.
                # We tried using None and NaN from numpy library,
                # but it is not currently supported to matplotlib :(
                if row[5] == None:
                    return_data[year][month].append(0)
                else:
                    return_data[year][month].append(row[5])

        except Exception as e:
            self.box_plot_error['text'] = 'Error while processing data'
            print("ERROR: " + str(e))
        finally:
            return return_data

    def format_data_for_lineplot(self, data):
        """Takes data and formats it for display in a lineplot"""
        return_data = []
        try:
            for row in data:
                # if value is None, we are setting default value as 0.
                # We tried using None and NaN from numpy library,
                # but it is not currently supported to matplotlib :(
                if row[5] == None:
                    return_data.append(0)
                else:
                    return_data.append(row[5])

        except Exception as e:
            self.line_plot_error['text'] = 'Error while processing data'
            print("ERROR: " + str(e))
        finally:
            return return_data

    def say_hi(self):
        print("hi there, everyone!")
Пример #16
0
 def OnClickedDownload(self, event):
     db = DBOperations()
     db.purge_data()
     db.initialize_db()
     scraper = WeatherScraper()
     scraper.start_scraping()
Пример #17
0
 def main(self):
     """
     When the program starts, prompt the user to download a full set of
     weather data, or to update it (optional).
     • Then prompt the user for a year range of interest (from year, to year).
     • Use this class to launch and manage all the other tasks.
     """
     user_selection = ''
     while user_selection != '4':
         try:
             print("1. Update a set of weather data up to today")
             print("2. Download a full set of weather data")
             print("3. A year range of interest (from year, to year)")
             print("4. Exit")
             user_selection = input("Please make your choice...")
             if user_selection == '1':
                 try:
                     my_scraper = WeatherScraper()
                     now_date = datetime.datetime.now()
                     is_loop = False
                     for i in range(now_date.year, now_date.year - 1, -1):
                         my_scraper.url_year = i
                         if is_loop:
                             break
                         for j in range(now_date.month - 2,
                                        now_date.month + 1):
                             my_scraper.url_month = j
                             my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#"
                             with urllib.request.urlopen(
                                     my_url) as response:
                                 html = str(response.read())
                             my_scraper.feed(html)
                             if my_scraper.is_equal is False:
                                 is_loop = True
                                 break
                     # print(f"inner{my_scraper.dict_Inner}")
                     # print(f"outer{my_scraper.dict_outer}")
                     my_database = DBOperations()
                     my_database.create_table(my_scraper.dict_outer)
                 except Exception as e:
                     print(
                         "Error in Updating a set of weather data up to today: ",
                         e)
             elif user_selection == '2':
                 try:
                     my_scraper = WeatherScraper()
                     now_date = datetime.datetime.now()
                     is_loop = False
                     for i in reversed(range(now_date.year)):
                         my_scraper.url_year = i
                         if is_loop:
                             break
                         for j in range(0, 13):
                             my_scraper.url_month = j
                             my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#"
                             with urllib.request.urlopen(
                                     my_url) as response:
                                 html = str(response.read())
                             my_scraper.feed(html)
                             if my_scraper.is_equal is False:
                                 is_loop = True
                                 break
                     # print(f"inner{my_scraper.dict_Inner}")
                     # print(f"outer{my_scraper.dict_outer}")
                     my_database = DBOperations()
                     my_database.create_table(my_scraper.dict_outer)
                 except Exception as e:
                     print(
                         "Error in downloading a full set of weather data: ",
                         e)
             elif user_selection == '3':
                 try:
                     range_value = input(
                         "Please select a RANGE of your interest(e.g 2017 2019): "
                     )
                     range_value = range_value.split()
                     my_database = DBOperations()
                     dict_value = my_database.query_infos(
                         range_value[0], range_value[1])
                     my_plot_operation = PlotOperations()
                     my_plot_operation.diplay_box_plot(
                         dict_value, range_value[0], range_value[1])
                 except Exception as e:
                     print(
                         "Error in A year range of interest (from year, to year): ",
                         e)
             elif user_selection == '4':
                 break
             else:
                 print("Invalid choice")
         except Exception as e:
             print("Error plot_operations.py: ", e)
Пример #18
0
 def full_data(self, event):
     """This event will download the fullset of data"""
     full_set = DBOperations()
     full_set.create_database()
Пример #19
0
            output_dir = "images"
            mkdir_p(output_dir)
            file_path = '{0}/{1}'.format(output_dir, file_name)

            self.line_plot_path_saving_dict[str(specific_year) + '-' +
                                            str(specific_month)] = file_path
            plt.savefig(file_path)
            plt.show()

            return self.line_plot_path_saving_dict
        except Exception as e:
            self.logger.error(e)


if __name__ == '__main__':
    mydb = DBOperations('weather.sqlite')
    mydb.initialize_db()

    my_scraper = WeatherScraper()
    my_scraper.scrape_now_to_earliest_month_weather(
        1998, 5)  # For testing, range is 1996-1997
    my_scraper.scrape_month_weather(2018, 5)
    my_scraper.scrape_month_weather(2020, 12)

    mydb.purge_data()
    mydb.save_data(my_scraper.weather)

    my_plot = PlotOperations()
    my_plot.generate_box_plot(1996, 1997)
    my_plot.generate_line_plot(2018, 5)
    my_plot.generate_line_plot(2020, 12)
import configparser
from flask import Flask
from flask import request, jsonify
from db_operations import DBOperations

config = configparser.ConfigParser()
config.read("config.ini")
app = Flask(__name__)

# Creating database connection at the start of the API server.
db = DBOperations(config["mysql"]["host"], config["mysql"]["port"],
                  config["mysql"]["user"], config["mysql"]["password"])
db_connection = db.connect(config["mysql"]["db"])


class ResponseMessage(Exception):
    """ Defining our own exception class to pass sensible response messages """
    def __init__(self, message, status_code=None, payload=None):
        Exception.__init__(self)
        self.message = message
        if status_code is not None:
            self.status_code = status_code
        self.payload = payload

    def to_dict(self):
        rv = dict(self.payload or ())
        rv["message"] = self.message
        return rv


@app.errorhandler(ResponseMessage)
Пример #21
0
class WeatherProcessor:
    """
  This class manages the user interaction to generate plots and update the data.
  """
    def __init__(self):
        """ Initialize classes and variables for db operations, web scraping, plot operations and UI configuration """
        try:
            self.db = DBOperations("weather.sqlite")
            self.ws = WeatherScraper()
            self.pl = PlotOperations()
            self.last_updated = self.db.fetch_last(
            )[0]["sample_date"] if self.db.is_table_exist() else ""
            self.first_updated = self.db.fetch_first(
            )[0]["sample_date"] if self.db.is_table_exist() else ""

        except Exception as e:
            logging.error(f"weatherprocessor:__init__, {e}")

    def download_data(self):
        """ Clears the database, reinitializes it, then downloads all the data to it. """
        try:
            self.db.purge_data()
            self.db.initialize_db()
            self.collect_data()

        except Exception as e:
            logging.error(f"weatherprocessor:download_data, {e}")

    def update_data(self):
        """ Ensures the database exists then downloads all
        the data up to the most recent date in the database. """
        try:
            self.db.initialize_db()
            self.collect_data()
            self.last_updated = self.db.fetch_last()[0]["sample_date"]

        except Exception as e:
            logging.error(f"weatherprocessor:update_data, {e}")

    def get_box_plot(self, start_year, end_year):
        """ Fetches data within the users inputted range then
        generates a box plot for the mean temperatures of each month. """
        try:
            weather = self.db.fetch_data(start_year, int(end_year) + 1, False)
            self.pl.generate_box_plot(weather, start_year, end_year)

        except Exception as e:
            logging.error(f"weatherprocessor:get_box_plot, {e}")

    def get_line_plot(self, year, month):
        """ User inputs the month and year of the data to be fetched
        then generates a line plot for the daily mean temperatures of that month. """
        try:
            weather = self.db.fetch_data(year, month, True)
            self.pl.generate_line_plot(weather, year, month)

        except Exception as e:
            logging.error(f"weatherprocessor:get_line_plot, {e}")

    def collect_data(self):
        """ This method collects the data by looping through and prepping for save,
        Get the current date and break it down into variables,
        Query db for the latest recorded data by date,
        Call the scraper class to collect necessary data,
        Stop collecting after duplicates are found. """
        try:
            today = date.today()
            year = int(today.strftime("%Y"))
            month = int(today.strftime("%m"))
            duplicate_month, duplicate_day = False, False
            recent_date = ""

            dates = self.db.fetch_last()
            if len(dates) > 0:
                recent_date = dates[0]["sample_date"]

            while not duplicate_month and not duplicate_day:
                """ Iterates through each year starting with the
            latest and working backwards until duplicate data is found. """
                try:
                    month_dict = dict()

                    while not duplicate_day and month > 0:
                        """ Iterate through each month starting with the latest
                and working backwards until duplicate data is found. """
                        try:
                            url = self.ws.get_url(year, month)

                            with urllib.request.urlopen(url) as response:
                                html = str(response.read())

                            self.ws.feed(html)
                            month_dict[month] = self.ws.return_dict()

                            if month + 1 in month_dict.keys(
                            ) and month_dict[month] == month_dict[month + 1]:
                                """Checks if month is the same as the prior month. Used for download_data """
                                month_dict.popitem()
                                duplicate_month = True
                                break

                            if recent_date != "":
                                temp_dict = {}
                                for key, value in reversed(
                                        month_dict[month].items()):
                                    """Iterates through each months data enusring there is not a duplicate in the database."""
                                    try:
                                        check_date = f"{year}-{month:02d}-{key}"
                                        if check_date == recent_date:
                                            duplicate_day = True

                                            break
                                        temp_dict[key] = value

                                    except Exception as e:
                                        logging.error(
                                            f"weatherprocessor:collect_data:loop:loop2:loop3, {e}"
                                        )

                                month_dict[month] = temp_dict
                            self.db.save_data(month_dict[month], month, year)
                            month -= 1

                        except Exception as e:
                            logging.error(
                                f"weatherprocessor:collect_data:loop:loop2, {e}"
                            )

                    pub.sendMessage('update_latest_download', year=str(year))
                    month = 12
                    year -= 1

                except Exception as e:
                    logging.error(f"weatherprocessor:collect_data:loop, {e}")

        except Exception as e:
            logging.error(f"weatherprocessor:collect_data, {e}")

    def get_years_for_dropdown(self, min_year):
        """Retrieves the years for the combo boxes based on a given min_year."""
        try:
            years = []

            if self.db.is_table_exist():

                self.last_updated = self.db.fetch_last(
                )[0]["sample_date"] if self.db.is_table_exist() else ""
                self.first_updated = self.db.fetch_first(
                )[0]["sample_date"] if self.db.is_table_exist() else ""

                if min_year == "":
                    firstyear = int(self.first_updated[:4])
                else:
                    firstyear = int(min_year)

                lastyear = int(self.last_updated[:4])

                while firstyear <= lastyear:
                    """Starting from the first year add each year to the years list."""
                    try:
                        years.append(str(firstyear))
                        firstyear += 1

                    except Exception as e:
                        logging.error(
                            f"weatherprocessor:get_years_for_dropdown:loop, {e}"
                        )

            return years

        except Exception as e:
            logging.error(f"weatherprocessor:get_years_for_dropdown, {e}")

    def get_months_for_dropdown(self, year):
        """Retrieves the months for the month combo box based on the selected year."""
        try:
            months = []

            if self.db.is_table_exist():

                self.first_updated = self.db.fetch_first(
                )[0]["sample_date"] if self.db.is_table_exist() else ""

                if year == "":
                    year = int(self.first_updated[:4])

                data = self.db.fetch_months(year)

                for item in data:
                    """Goes through the list of returned data"""
                    try:
                        for value in item.values():
                            """Adds each month to a list of months."""
                            try:
                                months.append(str(value[-2:]))

                            except Exception as e:
                                logging.error(
                                    f"weatherprocessor:get_months_for_dropdown:loop:loop2, {e}"
                                )

                    except Exception as e:
                        logging.error(
                            f"weatherprocessor:get_months_for_dropdown:loop, {e}"
                        )

            return months[::-1]

        except Exception as e:
            logging.error(f"weatherprocessor:get_months_for_dropdown, {e}")
 def setUp(self):
     self.dbname = "weather.sqlite"
     self.mydb = DBOperations(self.dbname)
Пример #23
0
    def start_scraping(self):
        '''Returns a dictionary that is populated after being parsed from the weather database'''
        try:
            today = datetime.today()
            day = today.day
            month = today.month
            year = today.year
            myparser = WeatherScraper()
            db = DBOperations()
            temporary_dict = {}
            data = True

            if day == 1:
                month = month - 1

            if db.update_data() is None:
                mostRecentDate = 0
                mostRecentYear = 0
                mostRecentMonth = 0

            else:
                mostRecentDate = db.update_data()
                mostRecentYear = mostRecentDate[0:4]
                mostRecentMonth = mostRecentDate[5:7]

            while data:
                try:
                    with urllib.request.urlopen(
                            "https://climate.weather.gc.ca/climate_data/daily_data_e.html?StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=1&Year={}&Month={}#"
                            .format(year, month)) as response:
                        html = str(response.read())

                        myparser.feed(html)

                    if temporary_dict == myparser.weather and len(
                            temporary_dict) != 0:
                        data = False

                    db.save_data(myparser.weather)

                    temporary_dict = myparser.weather.copy()
                    myparser.weather.clear()

                    if int(mostRecentYear) == year and int(
                            mostRecentMonth) == month:
                        break

                    print(year)
                    print(month)

                    month -= 1

                    if month == 0:
                        year -= 1
                        month = 12

                except Exception as e:
                    logging.error(e)

        except Exception as e:
            logging.error(e)