示例#1
0
def _get_geonames_data() -> TGeonamesData:
    zipfile_name = f"{COUNTRY_CODE}.zip"
    chunked_download(GEONAMES_URL, zipfile_name)

    geonames_data = []
    with zipfile.ZipFile(zipfile_name) as zf:
        with zf.open(f"{COUNTRY_CODE}.txt", "r") as fd:  # type: ignore
            for line in fd.readlines():
                fields = line.decode().strip().split("\t")
                zipcode = fields[1].strip()
                city_name = fields[2].strip()
                state_code = fields[4].strip()
                latitude = decimal.Decimal(fields[9].strip())
                longitude = decimal.Decimal(fields[10].strip())
                place_name = fields[2].strip()
                if place_name.startswith(ARMY_PREFIXES):
                    continue
                geonames_data.append(
                    (zipcode, city_name, state_code, latitude, longitude))

    return geonames_data
示例#2
0
def _get_timezones_data() -> typing.Dict[str, str]:
    filename = "zipcodes_to_timezones.gz"
    chunked_download(ZIP_2_TIMEZONES_URL, filename)

    zipcode_to_timezones = {}
    with gzip.open(filename) as f:
        for line_encoded in f:
            line = line_encoded.decode().strip()
            if line.startswith("INSERT INTO"):
                i = 0
                while line[i] != "(":
                    i += 1
                i += 1  # Skip the leading "("
                j = len(line) - 1
                j -= 1  # Skip the trailing ";"
                j -= 1  # Skip the trailing ")"
                row_defs = line[i:j].split("),(")
                for row_def in row_defs:
                    fields = row_def.split(",")
                    zipcode = fields[1][1:-1].strip()
                    timezone = fields[6][1:-1].strip()
                    zipcode_to_timezones[zipcode] = timezone

    return zipcode_to_timezones
示例#3
0
def generate_fixtures():
    """
    Generates fixture data for all zipcodes and sensors within 100km of central Portland.

    We test on this subset of real data to keep test speed down.

    I would caution against running this script unless absolutely necessary because doing so
    will force you to fix a bunch of tests.

    """
    path = pathlib.Path(__file__).parent.parent.parent / "tests" / "fixtures"
    timestamp = BaseTestCase.timestamp

    resp = requests.get(PURPLEAIR_URL)
    resp.raise_for_status()
    response_json = resp.json()
    results = []
    num_skipped = 0
    for res in response_json.get("results", []):
        latitude = res.get("Lat")
        longitude = res.get("Lon")
        if (latitude is not None and longitude is not None
                and _is_in_range(latitude, longitude)):
            res["LastSeen"] = timestamp
            results.append(res)
        else:
            num_skipped += 1
    response_json["results"] = results
    file_path = path / "purpleair/purpleair.json"
    with file_path.open("w") as f:
        json.dump(response_json, f)
    print(f"Skipped {num_skipped} sensors (wrote {len(results)})")

    tmpfile = "/tmp/geonames.zip"
    try:
        os.remove(tmpfile)
    except FileNotFoundError:
        pass
    chunked_download(GEONAMES_URL, tmpfile)
    lines = ""
    num_kept = 0
    num_skipped = 0
    with zipfile.ZipFile(tmpfile) as zf:
        with zf.open(f"{COUNTRY_CODE}.txt", "r") as fd:
            for line in fd.readlines():
                fields = line.decode().strip().split("\t")
                latitude = float(fields[9].strip())
                longitude = float(fields[10].strip())
                if _is_in_range(latitude, longitude):
                    num_kept += 1
                    lines += line.decode()
                else:
                    num_skipped += 1

    tmpdir = "/tmp/geonames_out.zip"
    try:
        shutil.rmtree(tmpdir)
    except FileNotFoundError:
        pass
    os.mkdir(tmpdir)
    file_name = f"{tmpdir}/{COUNTRY_CODE}.txt"
    with open(file_name, "w") as f:
        f.write(lines)
    file_path = path / f"geonames/{COUNTRY_CODE}.zip"
    with zipfile.ZipFile(file_path, "w", zipfile.ZIP_DEFLATED) as zf:
        zf.write(file_name, os.path.basename(file_name))
    print(f"Skipped {num_skipped} zipcodes (wrote {num_kept})")