def _get_geonames_data() -> TGeonamesData: zipfile_name = f"{COUNTRY_CODE}.zip" chunked_download(GEONAMES_URL, zipfile_name) geonames_data = [] with zipfile.ZipFile(zipfile_name) as zf: with zf.open(f"{COUNTRY_CODE}.txt", "r") as fd: # type: ignore for line in fd.readlines(): fields = line.decode().strip().split("\t") zipcode = fields[1].strip() city_name = fields[2].strip() state_code = fields[4].strip() latitude = decimal.Decimal(fields[9].strip()) longitude = decimal.Decimal(fields[10].strip()) place_name = fields[2].strip() if place_name.startswith(ARMY_PREFIXES): continue geonames_data.append( (zipcode, city_name, state_code, latitude, longitude)) return geonames_data
def _get_timezones_data() -> typing.Dict[str, str]: filename = "zipcodes_to_timezones.gz" chunked_download(ZIP_2_TIMEZONES_URL, filename) zipcode_to_timezones = {} with gzip.open(filename) as f: for line_encoded in f: line = line_encoded.decode().strip() if line.startswith("INSERT INTO"): i = 0 while line[i] != "(": i += 1 i += 1 # Skip the leading "(" j = len(line) - 1 j -= 1 # Skip the trailing ";" j -= 1 # Skip the trailing ")" row_defs = line[i:j].split("),(") for row_def in row_defs: fields = row_def.split(",") zipcode = fields[1][1:-1].strip() timezone = fields[6][1:-1].strip() zipcode_to_timezones[zipcode] = timezone return zipcode_to_timezones
def generate_fixtures(): """ Generates fixture data for all zipcodes and sensors within 100km of central Portland. We test on this subset of real data to keep test speed down. I would caution against running this script unless absolutely necessary because doing so will force you to fix a bunch of tests. """ path = pathlib.Path(__file__).parent.parent.parent / "tests" / "fixtures" timestamp = BaseTestCase.timestamp resp = requests.get(PURPLEAIR_URL) resp.raise_for_status() response_json = resp.json() results = [] num_skipped = 0 for res in response_json.get("results", []): latitude = res.get("Lat") longitude = res.get("Lon") if (latitude is not None and longitude is not None and _is_in_range(latitude, longitude)): res["LastSeen"] = timestamp results.append(res) else: num_skipped += 1 response_json["results"] = results file_path = path / "purpleair/purpleair.json" with file_path.open("w") as f: json.dump(response_json, f) print(f"Skipped {num_skipped} sensors (wrote {len(results)})") tmpfile = "/tmp/geonames.zip" try: os.remove(tmpfile) except FileNotFoundError: pass chunked_download(GEONAMES_URL, tmpfile) lines = "" num_kept = 0 num_skipped = 0 with zipfile.ZipFile(tmpfile) as zf: with zf.open(f"{COUNTRY_CODE}.txt", "r") as fd: for line in fd.readlines(): fields = line.decode().strip().split("\t") latitude = float(fields[9].strip()) longitude = float(fields[10].strip()) if _is_in_range(latitude, longitude): num_kept += 1 lines += line.decode() else: num_skipped += 1 tmpdir = "/tmp/geonames_out.zip" try: shutil.rmtree(tmpdir) except FileNotFoundError: pass os.mkdir(tmpdir) file_name = f"{tmpdir}/{COUNTRY_CODE}.txt" with open(file_name, "w") as f: f.write(lines) file_path = path / f"geonames/{COUNTRY_CODE}.zip" with zipfile.ZipFile(file_path, "w", zipfile.ZIP_DEFLATED) as zf: zf.write(file_name, os.path.basename(file_name)) print(f"Skipped {num_skipped} zipcodes (wrote {num_kept})")