def sync_stats(): """Fetch latest stats from IEDCR website""" try: logger = Logger.create_logger("tasks") logger.info("Starting sync of stats data") if Meta.is_stats_syncing(): logger.info("A stats sync is already in progress") return Meta.set_stats_syncing(True) provider = DataProvider() data = provider.get_stats() stat = Stat.get() # iteratively update the data for attr, value in data.items(): setattr(stat, attr, value) stat.save() Meta.set_stats_syncing(False) logger.info("Stats sync complete") except Exception as e: Meta.set_stats_syncing(False) logger.error(f"Stats sync failed with error: {e}")
class BaseModel(db.Model): __abstract__ = True logger = Logger.create_logger(__name__) def save(self): """save the item to database""" try: db.session.add(self) db.session.commit() except Exception as e: self.logger.error(f"Error while saving to database: {e}") def delete(self): """delete the item from database""" try: db.session.delete(self) db.session.commit() except Exception as e: self.logger.error(f"Error while deleting from database: {e}")
def sync_district_data(): """Fetch latest data from IEDCR reports""" try: # For some unknown reason, Logger.createLogger(__name__), # where __name__ == "application.tasks" doesn't bind # the handler. After some debugging, I found that anything # prefixing "application.*" doesn't work. According to # Logger.create_logger(), it assumes that a handler is # already binded, although it's not. # For the other parts it doesn't cause any problem. For example, # when the logger is created inside DataProvider module, the name # "application.provider.*" doesn't cause any problem. # This is a weird issue. I will look into this later. For now, # I will name it "tasks" logger = Logger.create_logger("tasks") logger.info("Starting sync of district data") if Meta.is_district_syncing(): logger.info("A district sync is already in progress") return # set updating state to true Meta.set_district_syncing(True) # download and get updated data provider = DataProvider() new_data = (provider.sync_district_data() ) # returns list of tuple as [...(districtName, Count)] last_updated = Meta.get_last_district_sync() # flag to monitor if fetched data has changed has_updated = False # differnece with current time and last updated time update_delta = datetime.utcnow() - last_updated # check the data against database records and update as necessary for pair in new_data: # ignore blank data if pair[0] == "" or pair[1] == "": continue district = District.find_by_name(pair[0]) if district: if district.count != pair[1]: # count changed from last record # - save previous count # - update new count district.prev_count = district.count district.count = pair[1] has_updated = True else: # count did not change # - make count and prev_count same only if last change was 1 day ago if update_delta.days >= 1: district.prev_count = district.count district.save() else: new_district = District(pair[0], pair[1]) new_district.save() has_updated = True # set updating state to False as update is finished Meta.set_district_syncing(False) logger.debug(f"Has updated = {has_updated}") if has_updated: # set last updated time to now if more than 24hrs # the 24hrs constant window helps to better calculate new_count - prev_count if update_delta.days >= 1: Meta.set_last_district_sync() logger.info("Updated last sync time") logger.info("District sync complete (fetched new data)") return logger.info("District sync complete (already up-to-date)") except Exception as e: Meta.set_district_syncing(False) logger.error(f"District sync failed with error: {e}")
class DataProvider: logger = Logger.create_logger(__name__) def __init__(self): self.stats_data_source = "https://corona.gov.bd/lang/en" self.district_report_url = os.environ.get("REPORT_URL") self.trans_table = str.maketrans("০১২৩৪৫৬৭৮৯", "0123456789") def get_stats(self): """Fetch the latest statistics like total positive cases, deaths etc""" page = requests.get(self.stats_data_source) soup = bs(page.content, "html.parser") counts = soup.select(".live-update-box-wrap-h1>b") # process counts - replace bangla digits with english for i in range(len(counts)): # counts[i] = counts[i].text.translate(self.trans_table) counts[i] = int(counts[i].text) data_dict = { "positive_24": counts[0], "positive_total": counts[1], "death_24": counts[2], "death_total": counts[3], "recovered_24": counts[4], "recovered_total": counts[5], "test_24": counts[6], "test_total": counts[7], } self.logger.debug(data_dict) return data_dict def parse_district_data(self): """Parse the Google Sheets to get district data""" page = requests.get(self.district_report_url) soup = bs(page.content, "html.parser") table = soup.find("table") rows = table.find_all("tr") result = [] for rindex, row in enumerate(rows): # ignore first two rows and last row because they are headers/totals if rindex < 2 or rindex == len(rows) - 1: continue data = [] for col in row.find_all("td"): # ignore division names column if col.has_attr("rowspan"): continue data.append(self.sanitize(col.text)) result.append(data) return result def sanitize(self, s): """sanitize string: - by replacing invalid chars with correct ones - converting to int if applicable""" mapping = {"’": "'"} for key, val in mapping.items(): s = s.replace(key, val) if s.isdigit(): s = int(s) return s def sync_district_data(self): return self.parse_district_data()