def get_from_website(): """Get the current data directly from the website.""" # `parse` will also take a url (http only, no https) data = normalize_html(URL) table = get_db_store() table.upsert(data, ['timestamp']) return data
def test_wont_duplicate_data(self): # TODO `upsert` is just manually copied, actually test a function with open('fixtures/test_download.html', 'r') as f: data = utils.normalize_html(f) self.table.upsert(data, ['timestamp']) self.assertEqual(len(list(self.table.all())), 1) self.table.upsert(data, ['timestamp']) self.assertEqual(len(list(self.table.all())), 1)
def batch_process(store, files, batch=False): """Process all the files in batches.""" # TODO abstract common stuff with `process()` for f in files: try: with open(f, 'r') as fh: data = normalize_html(fh) except AssertionError as e: # malformed HTML logger.error("{} {}".format(f, e)) continue yield data
def process(store, files): """Process all the files.""" for f in files: try: with open(f, 'r') as fh: data = normalize_html(fh) ctime = int(time.mktime(data['timestamp'].timetuple())) # TODO delete file after parsing except AssertionError as e: # malformed HTML logger.error("{} {}".format(f, e)) continue logger.info("{} {}".format(ctime, data)) store.upsert(data, ['timestamp'])
def test_normalize_works(self): control = { 'total_system_capacity': 38322, 'actual_system_demand': 31579, 'total_wind_output': 5973, 'dc_e': 0, 'dc_l': 100, 'dc_n': 25, 'dc_r': 151, 'dc_s': 0, 'current_frequency': 59.962, 'instantaneous_time_error': -2.562, 'timestamp': datetime.datetime(2012, 3, 29, 23, 9, 50), } with open('fixtures/test_download.html', 'r') as f: data = utils.normalize_html(f) self.assertEqual(data, control)