Пример #1
0
def parse_daily_areas(date, country, html):
    if country in ("Northern Ireland", "UK"):
        return None
    soup = BeautifulSoup(html, features="html.parser")
    output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]]
    if country == "Scotland":
        table = soup.find_all("table")[0]
        for table_row in table.findAll("tr"):
            columns = [
                normalize_whitespace(col.text) for col in table_row.findAll("td")
            ]
            if len(columns) == 0:
                continue
            if columns[0].lower() in ("", "health board"):
                continue
            area = columns[0].replace("Ayrshire & Arran", "Ayrshire and Arran")
            area = columns[0].replace("Eileanan Siar (Western Isles)", "Western Isles")
            area_code = lookup_health_board_code(area)
            cases = columns[1]
            if cases == "*": # means 5 or fewer cases
                cases = "NaN"
            else:
                cases = cases.replace("*", "").replace(",", "")
            output_row = [date, country, area_code, area, cases]
            output_rows.append(output_row)
        return output_rows
    elif country == "Wales":
        if date >= "2020-04-08":
            # daily areas no longer published on the HTML page (now published on the dashboard)
            return None
        table = soup.find_all("table")[0]
        for table_row in table.findAll("tr"):
            columns = [
                normalize_whitespace(col.text) for col in table_row.findAll("td")
            ]
            if len(columns) == 0:
                continue
            if columns[0].lower() in ("", "health board", "wales", "total", "wales total"):
                continue
            if is_blank(columns[-1]):
                continue
            area = (
                columns[0]
                .replace("City and County of Swansea", "Swansea")
                .replace("City of Cardiff", "Cardiff")
                .replace("Newport City", "Newport")
                .replace("County Borough Council", "")
                .replace("County Council", "")
                .replace("Council", "")
                .replace("Cardiff & Vale", "Cardiff and Vale")
                .replace("Cwm Taf Morgannwg", "Cwm Taf")
                .strip()
            )
            if is_blank(area):
                area = columns[0]
            cases = columns[-1].replace("*","").replace(",", "")
            output_row = [date, country, lookup_health_board_code(area), area, cases]
            output_rows.append(output_row)
        return output_rows
    return None
Пример #2
0
def parse_daily_areas(date, country, html):
    if country in ("Northern Ireland", "UK"):
        return None
    soup = BeautifulSoup(html, features="html.parser")
    output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]]
    if country == "Scotland":
        table = soup.find_all("table")[-1]
        for table_row in table.findAll("tr"):
            columns = [
                normalize_whitespace(col.text)
                for col in table_row.findAll("td")
            ]
            if len(columns) == 0:
                continue
            if columns[0].lower() in ("", "health board"):
                continue
            area = columns[0].replace("Ayrshire & Arran", "Ayrshire and Arran")
            area_code = lookup_health_board_code(area)
            cases = columns[1].replace("*", "")
            output_row = [date, country, area_code, area, cases]
            output_rows.append(output_row)
        return output_rows
    elif country == "Wales":
        table = soup.find_all("table")[0]
        for table_row in table.findAll("tr"):
            columns = [
                normalize_whitespace(col.text)
                for col in table_row.findAll("td")
            ]
            if len(columns) == 0:
                continue
            if columns[0].lower() in ("", "health board", "wales", "total",
                                      "wales total"):
                continue
            if is_blank(columns[-1]):
                continue
            area = (columns[0].replace(
                "City and County of Swansea",
                "Swansea").replace("City of Cardiff", "Cardiff").replace(
                    "Newport City",
                    "Newport").replace("County Borough Council",
                                       "").replace("County Council",
                                                   "").replace("Council",
                                                               "").strip())
            if is_blank(area):
                area = columns[0]
            cases = columns[-1]
            output_row = [
                date, country,
                lookup_health_board_code(area), area, cases
            ]
            output_rows.append(output_row)
        return output_rows
    return None
Пример #3
0
def upsert_bank(jurisdiction_id, bank_code=None, name=None, fetched=False):
    if bank_code not in swift_banks:
        if is_blank(name):
            name = None
        if not jurisdiction_id:
            jurisdiction_id = jurisdiction_by_code("XX")

        s = Session()
        bank = _get_bank(s, jurisdiction_id, bank_code)
        if not bank:
            bank = Bank(code=bank_code,
                        name=name,
                        country_id=jurisdiction_id,
                        fetched=fetched)
            s.add(bank)
            cash_account = Account(
                code=None, acc_type="CASH", bank=bank, fetched=True
            )  # CASH accounts don't really exist, so all pre-fetched
            s.add(cash_account)
        elif name:
            if not bank.name:
                bank.name = name
            elif bank.name != name:
                print("Bank with different name: old: %s; new: %s"\
                 %(bank.name, name))
                if len(name) > len(bank.name):
                    bank.name = name

        s.commit()
        swift_banks[bank_code] = bank.id
        s.close()

    return swift_banks[bank_code]
Пример #4
0
def upsert_alias(name, org_id, jurisdiction_id):
	"""Atomic operation
	includes commit
	does not include normalisation 
	"""
	s = Session()
	name = clean_name(name)
	if is_blank(name) or not org_id:
		return None
	alias = _get_alias(s, name, org_id, jurisdiction_id)
	if not alias:
		alias = Alias(alias=name, org_id=org_id, country_id=jurisdiction_id)
		s.add(alias)

	company = _get_organisation(s, org_id)
	if not company:
		# TODO: What do we do with anonymous entities? E.g. cash sources
		raise Exception("Expected company with id=%d but not found"%org_id)

	# Out because names are processed after import is complete
	# if len(name) < len(company.name):
	# 	company.name = name

	s.commit()
	result = alias.id
	s.close()
	return result
Пример #5
0
def account_type(code):
    """Assuming code is normalised"""
    if not code or is_blank(code) or len(code) < 6:
        return "CASH"
    if code[0:2].isalpha():
        if code[2:4].isdigit() and code[0:2] in cached_jurisdictions():
            return "IBAN"
        elif len(code) in [
                8, 11
        ] and code[2:4].isalpha() and not re.search(r"\s", code):
            return "SWIFT"

    return "LOCAL"
Пример #6
0
def merge_organisations(this_id, that_id):
	"""remove organisation with that_id"""
	success = True
	s = Session()
	this = _get_organisation(s, this_id)
	if not this:
		return False
	that = _get_organisation(s, that_id)
	if not that:
		return False

	# name, org_type, core
	if not is_blank(that.name) and len(that.name) < len(this.name):
		this.name = that.name

	# if this.org_type and that.org_type and this.org_type != that.org_type:
	# 	print("Organisation %s with different type: old: '%s'; new: '%s'"\
	# 		%(this.name, this.org_type, that.org_type))
	# 	success = False
	# else:
	# 	this.org_type = this.org_type or that.org_type
	if this.core and that.core and this.core != that.core:
		print("Organisation %s with different core: old: '%s'; new: '%s'"\
			%(this.name, this.core, that.core))
		success = False
	else:
		this.core = this.core or that.core

	# accounts, aliases
	#this.aliases = this.aliases + that.aliases
	this.aliases = _merge_aliases(s, this.aliases, that.aliases)
	this.accounts = this.accounts + that.accounts

	s.delete(that)
	s.commit()
	s.close()

	return success
Пример #7
0
def expand_links(parentItem, extern=False):
    if isinstance(parentItem, list):
        for item in parentItem:
            expand_links(item, extern)
    else:
        modelType = util.get_if_exists(parentItem, "modelType", None)
        if "story" == modelType:
            story_key = None
            keyStr = util.get_if_exists(parentItem, "key", None)
            if not util.is_blank(keyStr):
                story_db = ndb.Key(urlsafe=keyStr).get()
                if story_db:
                    parentItem["visible"] = story_db.deleted
                    parentItem["url"] = flask.url_for("story", story_key=util.story_key(story_db), _external=extern)
                else:
                    parentItem["visible"] = False
        if "page" == modelType:
            keyStr = util.get_if_exists(parentItem, "url_component", "home")
            try:
                parentItem["url"] = flask.url_for(keyStr, _external=extern)
            except routing.BuildError:
                parentItem["url"] = flask.url_for("home", _external=extern)
        if "nodes" in parentItem:
            expand_links(parentItem["nodes"], extern)
Пример #8
0
def read_role(name,
              norm=None,
              country="XX",
              code=None,
              bank_name=None,
              acc_country="XX",
              core=False):
    jurisdiction_id = jurisdictions.jurisdiction_by_code(country)

    code = re.sub(r"\s", "", code).lstrip("0")
    acc_type = banks.account_type(code)

    try:
        bank_code = banks.account_bank_code(
            code, offline=True
        ) if util.is_blank(bank_name) and not util.is_blank(code) else None
        bank_code = None if util.is_blank(
            bank_code) or util.contains_whitespace(bank_code) else bank_code
    except LookupError as e:
        bank_code = None

    if acc_type == "CASH" and banks.account_type(name) == "SWIFT":
        bank_code = name
        code = None
        name = None
        norm = None
        acc_bank_id = banks.get_bank(jurisdiction_id, bank_code)\
         or banks.upsert_bank(jurisdiction_id, bank_code=bank_code, name=bank_name)
        acc_id = banks.upsert_account(code, acc_type, acc_bank_id, None)
        return acc_id
    elif acc_type == "IBAN":
        bank_country = code[0:2]
        if not bank_country or (
                bank_country
                not in jurisdictions.cached_jurisdictions().keys()):
            print("Unrecognised account country: %s" % bank_country)
            bank_country = "XX"
        if acc_country != bank_country:
            if not util.is_blank(acc_country):
                print("Account %s with conflicting bank country: jurisdiction: '%s'; code: '%s'"\
                 %(code, acc_country, bank_country))
            acc_country = bank_country
    elif acc_type == "SWIFT":
        acc_country = code[4:6]

    acc_jurisdiction_id = jurisdictions.jurisdiction_by_code(acc_country)
    acc_id = banks.get_account_by_code(code)
    if acc_id:
        org_id = banks.get_organisation_by_account(acc_id)\
         or organisations.upsert_organisation(norm, core)
        # or organisations.upsert_organisation(norm, org_type, core)
        organisations.upsert_alias(name, org_id, jurisdiction_id)
        if norm != name:
            organisations.upsert_alias(norm, org_id, jurisdiction_id)
    else:
        org_id = organisations.upsert_organisation(norm, core)
        # org_id = organisations.upsert_organisation(norm, org_type, core)
        # TODO: Problem creating alias if the organisation is not yet persisted
        organisations.upsert_alias(name, org_id, jurisdiction_id)
        if norm != name:
            organisations.upsert_alias(norm, org_id, jurisdiction_id)

        acc_bank_id = banks.get_bank(jurisdiction_id, bank_code)\
         or banks.upsert_bank(jurisdiction_id, bank_code=bank_code, name=bank_name)
        acc_id = banks.upsert_account(code, acc_type, acc_bank_id, org_id)

    return acc_id
    r"(?s)Updated: (?P<time>.+?),? \S+ (?P<date>\d+\s\w+\s\d{4})")
m = re.search(pattern, text)
groups = m.groupdict()
date = dateparser.parse(groups["date"]).strftime("%Y-%m-%d")

output_rows = [["Date", "Country", "AreaCode", "Area", "TotalCases"]]
for table_row in table.findAll("tr"):
    columns = [
        normalize_whitespace(col.text) for col in table_row.findAll("td")
    ]
    if len(columns) == 0:
        continue
    if columns[0] == "Health Board" or columns[0] == "Wales" or columns[
            0] == "TOTAL":
        continue
    if is_blank(columns[2]):
        continue
    area = (columns[0].replace(
        "City and County of Swansea",
        "Swansea").replace("City of Cardiff", "Cardiff").replace(
            "Newport City",
            "Newport").replace("County Borough Council",
                               "").replace("County Council",
                                           "").replace("Council", "").strip())
    if is_blank(area):
        area = columns[0]
    cases = columns[2]
    output_row = [date, country, lookup_health_board_code(area), area, cases]
    output_rows.append(output_row)

with open(csv_file, "w") as csvfile:
Пример #10
0
 def __init__(self, data, start, prefix=''):
     self.start = start
     self.prefix = prefix
     self.data = data
     self.size = len(data)
     self.is_blank = is_blank(data)