def parse_daily_areas(date, country, html): if country in ("Northern Ireland", "UK"): return None soup = BeautifulSoup(html, features="html.parser") output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]] if country == "Scotland": table = soup.find_all("table")[0] for table_row in table.findAll("tr"): columns = [ normalize_whitespace(col.text) for col in table_row.findAll("td") ] if len(columns) == 0: continue if columns[0].lower() in ("", "health board"): continue area = columns[0].replace("Ayrshire & Arran", "Ayrshire and Arran") area = columns[0].replace("Eileanan Siar (Western Isles)", "Western Isles") area_code = lookup_health_board_code(area) cases = columns[1] if cases == "*": # means 5 or fewer cases cases = "NaN" else: cases = cases.replace("*", "").replace(",", "") output_row = [date, country, area_code, area, cases] output_rows.append(output_row) return output_rows elif country == "Wales": if date >= "2020-04-08": # daily areas no longer published on the HTML page (now published on the dashboard) return None table = soup.find_all("table")[0] for table_row in table.findAll("tr"): columns = [ normalize_whitespace(col.text) for col in table_row.findAll("td") ] if len(columns) == 0: continue if columns[0].lower() in ("", "health board", "wales", "total", "wales total"): continue if is_blank(columns[-1]): continue area = ( columns[0] .replace("City and County of Swansea", "Swansea") .replace("City of Cardiff", "Cardiff") .replace("Newport City", "Newport") .replace("County Borough Council", "") .replace("County Council", "") .replace("Council", "") .replace("Cardiff & Vale", "Cardiff and Vale") .replace("Cwm Taf Morgannwg", "Cwm Taf") .strip() ) if is_blank(area): area = columns[0] cases = columns[-1].replace("*","").replace(",", "") output_row = [date, country, lookup_health_board_code(area), area, cases] output_rows.append(output_row) return output_rows return None
def parse_daily_areas(date, country, html): if country in ("Northern Ireland", "UK"): return None soup = BeautifulSoup(html, features="html.parser") output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]] if country == "Scotland": table = soup.find_all("table")[-1] for table_row in table.findAll("tr"): columns = [ normalize_whitespace(col.text) for col in table_row.findAll("td") ] if len(columns) == 0: continue if columns[0].lower() in ("", "health board"): continue area = columns[0].replace("Ayrshire & Arran", "Ayrshire and Arran") area_code = lookup_health_board_code(area) cases = columns[1].replace("*", "") output_row = [date, country, area_code, area, cases] output_rows.append(output_row) return output_rows elif country == "Wales": table = soup.find_all("table")[0] for table_row in table.findAll("tr"): columns = [ normalize_whitespace(col.text) for col in table_row.findAll("td") ] if len(columns) == 0: continue if columns[0].lower() in ("", "health board", "wales", "total", "wales total"): continue if is_blank(columns[-1]): continue area = (columns[0].replace( "City and County of Swansea", "Swansea").replace("City of Cardiff", "Cardiff").replace( "Newport City", "Newport").replace("County Borough Council", "").replace("County Council", "").replace("Council", "").strip()) if is_blank(area): area = columns[0] cases = columns[-1] output_row = [ date, country, lookup_health_board_code(area), area, cases ] output_rows.append(output_row) return output_rows return None
def upsert_bank(jurisdiction_id, bank_code=None, name=None, fetched=False): if bank_code not in swift_banks: if is_blank(name): name = None if not jurisdiction_id: jurisdiction_id = jurisdiction_by_code("XX") s = Session() bank = _get_bank(s, jurisdiction_id, bank_code) if not bank: bank = Bank(code=bank_code, name=name, country_id=jurisdiction_id, fetched=fetched) s.add(bank) cash_account = Account( code=None, acc_type="CASH", bank=bank, fetched=True ) # CASH accounts don't really exist, so all pre-fetched s.add(cash_account) elif name: if not bank.name: bank.name = name elif bank.name != name: print("Bank with different name: old: %s; new: %s"\ %(bank.name, name)) if len(name) > len(bank.name): bank.name = name s.commit() swift_banks[bank_code] = bank.id s.close() return swift_banks[bank_code]
def upsert_alias(name, org_id, jurisdiction_id): """Atomic operation includes commit does not include normalisation """ s = Session() name = clean_name(name) if is_blank(name) or not org_id: return None alias = _get_alias(s, name, org_id, jurisdiction_id) if not alias: alias = Alias(alias=name, org_id=org_id, country_id=jurisdiction_id) s.add(alias) company = _get_organisation(s, org_id) if not company: # TODO: What do we do with anonymous entities? E.g. cash sources raise Exception("Expected company with id=%d but not found"%org_id) # Out because names are processed after import is complete # if len(name) < len(company.name): # company.name = name s.commit() result = alias.id s.close() return result
def account_type(code): """Assuming code is normalised""" if not code or is_blank(code) or len(code) < 6: return "CASH" if code[0:2].isalpha(): if code[2:4].isdigit() and code[0:2] in cached_jurisdictions(): return "IBAN" elif len(code) in [ 8, 11 ] and code[2:4].isalpha() and not re.search(r"\s", code): return "SWIFT" return "LOCAL"
def merge_organisations(this_id, that_id): """remove organisation with that_id""" success = True s = Session() this = _get_organisation(s, this_id) if not this: return False that = _get_organisation(s, that_id) if not that: return False # name, org_type, core if not is_blank(that.name) and len(that.name) < len(this.name): this.name = that.name # if this.org_type and that.org_type and this.org_type != that.org_type: # print("Organisation %s with different type: old: '%s'; new: '%s'"\ # %(this.name, this.org_type, that.org_type)) # success = False # else: # this.org_type = this.org_type or that.org_type if this.core and that.core and this.core != that.core: print("Organisation %s with different core: old: '%s'; new: '%s'"\ %(this.name, this.core, that.core)) success = False else: this.core = this.core or that.core # accounts, aliases #this.aliases = this.aliases + that.aliases this.aliases = _merge_aliases(s, this.aliases, that.aliases) this.accounts = this.accounts + that.accounts s.delete(that) s.commit() s.close() return success
def expand_links(parentItem, extern=False): if isinstance(parentItem, list): for item in parentItem: expand_links(item, extern) else: modelType = util.get_if_exists(parentItem, "modelType", None) if "story" == modelType: story_key = None keyStr = util.get_if_exists(parentItem, "key", None) if not util.is_blank(keyStr): story_db = ndb.Key(urlsafe=keyStr).get() if story_db: parentItem["visible"] = story_db.deleted parentItem["url"] = flask.url_for("story", story_key=util.story_key(story_db), _external=extern) else: parentItem["visible"] = False if "page" == modelType: keyStr = util.get_if_exists(parentItem, "url_component", "home") try: parentItem["url"] = flask.url_for(keyStr, _external=extern) except routing.BuildError: parentItem["url"] = flask.url_for("home", _external=extern) if "nodes" in parentItem: expand_links(parentItem["nodes"], extern)
def read_role(name, norm=None, country="XX", code=None, bank_name=None, acc_country="XX", core=False): jurisdiction_id = jurisdictions.jurisdiction_by_code(country) code = re.sub(r"\s", "", code).lstrip("0") acc_type = banks.account_type(code) try: bank_code = banks.account_bank_code( code, offline=True ) if util.is_blank(bank_name) and not util.is_blank(code) else None bank_code = None if util.is_blank( bank_code) or util.contains_whitespace(bank_code) else bank_code except LookupError as e: bank_code = None if acc_type == "CASH" and banks.account_type(name) == "SWIFT": bank_code = name code = None name = None norm = None acc_bank_id = banks.get_bank(jurisdiction_id, bank_code)\ or banks.upsert_bank(jurisdiction_id, bank_code=bank_code, name=bank_name) acc_id = banks.upsert_account(code, acc_type, acc_bank_id, None) return acc_id elif acc_type == "IBAN": bank_country = code[0:2] if not bank_country or ( bank_country not in jurisdictions.cached_jurisdictions().keys()): print("Unrecognised account country: %s" % bank_country) bank_country = "XX" if acc_country != bank_country: if not util.is_blank(acc_country): print("Account %s with conflicting bank country: jurisdiction: '%s'; code: '%s'"\ %(code, acc_country, bank_country)) acc_country = bank_country elif acc_type == "SWIFT": acc_country = code[4:6] acc_jurisdiction_id = jurisdictions.jurisdiction_by_code(acc_country) acc_id = banks.get_account_by_code(code) if acc_id: org_id = banks.get_organisation_by_account(acc_id)\ or organisations.upsert_organisation(norm, core) # or organisations.upsert_organisation(norm, org_type, core) organisations.upsert_alias(name, org_id, jurisdiction_id) if norm != name: organisations.upsert_alias(norm, org_id, jurisdiction_id) else: org_id = organisations.upsert_organisation(norm, core) # org_id = organisations.upsert_organisation(norm, org_type, core) # TODO: Problem creating alias if the organisation is not yet persisted organisations.upsert_alias(name, org_id, jurisdiction_id) if norm != name: organisations.upsert_alias(norm, org_id, jurisdiction_id) acc_bank_id = banks.get_bank(jurisdiction_id, bank_code)\ or banks.upsert_bank(jurisdiction_id, bank_code=bank_code, name=bank_name) acc_id = banks.upsert_account(code, acc_type, acc_bank_id, org_id) return acc_id
r"(?s)Updated: (?P<time>.+?),? \S+ (?P<date>\d+\s\w+\s\d{4})") m = re.search(pattern, text) groups = m.groupdict() date = dateparser.parse(groups["date"]).strftime("%Y-%m-%d") output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]] for table_row in table.findAll("tr"): columns = [ normalize_whitespace(col.text) for col in table_row.findAll("td") ] if len(columns) == 0: continue if columns[0] == "Health Board" or columns[0] == "Wales" or columns[ 0] == "TOTAL": continue if is_blank(columns[2]): continue area = (columns[0].replace( "City and County of Swansea", "Swansea").replace("City of Cardiff", "Cardiff").replace( "Newport City", "Newport").replace("County Borough Council", "").replace("County Council", "").replace("Council", "").strip()) if is_blank(area): area = columns[0] cases = columns[2] output_row = [date, country, lookup_health_board_code(area), area, cases] output_rows.append(output_row) with open(csv_file, "w") as csvfile:
def __init__(self, data, start, prefix=''): self.start = start self.prefix = prefix self.data = data self.size = len(data) self.is_blank = is_blank(data)