def process_html(self, data): """Extract the contents from raw HTML data, in addition to removing whitespace and joining text line elements by underscore. Args: data (bytes): The raw HTML data. """ _check_parameter('data', bytes, data) # Extracts contents from raw HTML. soup = BeautifulSoup(data, 'lxml') for script in soup(['script', 'style']): script.extract() contents = soup.get_text().splitlines() # Strips whitespace and joins line elements. self._data = [] for line in contents: elements = line.split() if elements: self._data.append('_'.join(elements)) return self
def insert_values(self, table_name, table_values): """Inserts entities into a table. Args: table_name (str): The name of the table. table_values (dict): The column labels and corresponding values as key-value pairs. """ self._check_database() utils._check_parameter('table_name', str, table_name) utils._check_parameter('table_values', dict, table_values) labels, values = list(zip(*table_values.items())) _columns, _values = '', '' for label, value in zip(labels[:-1], values[:-1]): _columns += "{}, ".format(str(label)) _values += "'{}', ".format(str(value)) _columns += "{}".format(str(labels[-1])) _values += "'{}'".format(str(values[-1])) self._query = """INSERT INTO {} ({}) VALUES ({}); """.format(table_name, _columns, _values) return self
def create_table(self, table_name, table_columns): """Creates a table if connected to a MySQL server and a working database is set. Args: table_name (str): Name of the new table. table_columns (dict): The column labels and corresponding column data types as key-value pairs. The data types are given in Python format. """ self._check_database() utils._check_parameter('table_name', str, table_name) utils._check_parameter('table_columns', dict, table_columns) col_labels, col_dtypes = list(zip(*table_columns.items())) mysql_dtypes = self.convert_dtypes(col_dtypes) _columns = '' for label, dtype in zip(col_labels[:-1], mysql_dtypes[:-1]): _columns += ' {} {},'.format(label, dtype) _columns += ' {} {}'.format(col_labels[-1], mysql_dtypes[-1]) self._query = """CREATE TABLE IF NOT EXISTS {} ({}); """.format(table_name, _columns) return self
def update_slumber_interval(cls, slumber_interval): """Define the amount of time (in seconds) that is passed between each attempt to collect parameters from the Davis WeatherLink website. Args: slumber_interval (int): The slumber interval time in seconds. """ _check_parameter('slumber_interval', int, slumber_interval) cls._SLUMBER_INTERVAL = slumber_interval
def update_newline(cls, newline): """Define the symbol used as newline character when writing to file. Args: newline (str): The symbol used as newline character when writing to file. """ _check_parameter('newline', str, newline) cls._NEWLINE = newline
def update_separator(cls, separator): """Define the symbol used to seraprate items when writing to file. Args: separator (str): The symbol used to separate items when writing to file. """ _check_parameter('separator', str, separator) cls._SEPARATOR = separator
def update_limit_varchars(cls, new_limit): """Defines the maximum number of characters assigned to string attributes in the created database tables. Args: new_limit (int): The maximum number of characters assigned to string attributes. """ utils._check_parameter('new_limit', int, new_limit) cls._CHARS_LIMIT = new_limit
def drop_unit(cls, unit): """Removes a parameter unit such that it will not be filtered from the collected raw parameter values. Args: unit (str): The unit symbol that will no longer be removed from the collected raw parameters. """ _check_parameter('Unit', str, unit) del cls._REMOVABLE_UNITS[cls._REMOVABLE_UNITS.index(unit)]
def add_unit(cls, unit): """Adds a parameter unit that will be filtered from the collected raw parameter values. Args: unit (str): The unit symbol that will be removed from the collected raw parameters. """ _check_parameter('Unit', str, unit) cls._REMOVABLE_UNITS.append(unit)
def drop_table(self, table_name): """Deletes specified table from database. Only enabled if connected to a MySQL server and a working database is set. Args: table_name (str): Name of the table. """ self._check_database() utils._check_parameter('table_name', str, table_name) self._query = 'DROP TABLE IF EXISTS {};'.format(table_name) return self
def create_database(self, database): """Creates a new database. Only enabled if connected to a MySQL server. Args: database (str): Name of the database to create. """ utils._check_parameter('database', str, database) self._current_db = database self._query = 'CREATE DATABASE IF NOT EXISTS {};'.format(database) return self
def describe_table(self, table_name): """Returns description of table content. Only enabled if connected to a MySQL server and a working database is set. Args: table_name (str): Name of the table. """ self._check_database() utils._check_parameter('table_name', str, table_name) self._query = 'DESCRIBE {};'.format(table_name) return self
def drop_database(self, database): """Deletes a database. Only enabled if connected to a MySQL server. Args: database (str): Name of the database to delete. """ utils._check_parameter('database', str, database) # Resetting working DB variable. self._current_db = None self._query = 'DROP DATABASE IF EXISTS {};'.format(database) return self
def use_database(self, database): """Selects an existent database as working database. Only enabled if connected to a MySQL server and the database exists. Args: database (str): Name of the new working database. """ utils._check_parameter('database', str, database) self._current_db = database self._query = 'USE {}'.format(database) return self
def fetch_html(self): """Collects the raw HTML data.""" _check_parameter('url', str, self.url) # Request HTML file. try: html = urllib.request.urlopen(self.url).read() except: print('No connection. Retrying in {} seconds' ''.format(float(self._SLUMBER_INTERVAL))) time.sleep(float(self._SLUMBER_INTERVAL)) html = urllib.request.urlopen(self.url).read() return html
def drop_parameters(cls, parameter_labels): """Remove target parameters. Args: parameter_labels (iterable of str): The labels of each parameter that will be excluded from the target parameters. Example: Dropping the default target parameter `Year`: >>> DataLogger.drop_parameters(['Year']) """ _check_parameter('parameter_labels', (list, tuple), parameter_labels) for parameter_label in parameter_labels: _check_parameter('parameter_label', str, parameter_label) del cls._TARGET_PARAMETERS[parameter_label]
def add_parameters(cls, target_parameters): """Include additional target parameters. Args: target_parameters (dict): The target parameter labels and corresponding column numbers of target parameter values as key-value pairs. The parameter labels should be given as they appear in the Davis WeatherLink report. The column number referes to the report column number beginning at one and counting along each row. Example: Collecting the amount of rain for the last hour. The parameter label is given as `Last_Hour_rain`, and the target value is located at column number one as `Rate` (mm/hour). Thus, >>> DataLogger.add_parameters({'Last_Hour_rain': 1}) """ _check_parameter('target_parameters', dict, target_parameters) for (key, value) in target_parameters.items(): _check_parameter('key', str, key) _check_parameter('value', int, value) cls._TARGET_PARAMETERS[key] = value
def update_target_value(cls, parameter, column_number): """Select a different logging value for any given parameter. Args: parameter (str): The target parameter label. column_number (int): The new reference to the target value. Example: Collecting `Today's Highs` values rather than `Current` values of the outside temperature requires changing the target value column number fron 1 to 2. Thus, >>> DataLogger.update_target_value('Outside_Temp', 2) """ _check_parameter('parameter', str, parameter) _check_parameter('column_number', int, column_number) cls._TARGET_PARAMETERS[parameter] = column_number