def parse_cities_request(self, response): cities = json.loads(response.body) today = date_utils.today() current_week = Week.fromdate(today) # We have to do different passes for 2019 and 2020, since the specific days of # the epidemiological week differs. # # The api seems to return the data from the current year as "2020", and the previous as "2019", # so we'll exploit that to extract the data only from the "2020" chart for city in cities: for year in [2020, 2019]: for weeknum in range(1, current_week.week): ep_week = Week(year, weeknum) # Cache more than 4 weeks ago should_cache = (current_week.week - weeknum) > 4 yield self.make_registral_request( city=city, ep_week=ep_week, callback=self.parse_registral_request, dont_cache=not should_cache, )
def generate_documents(self, applicant: Applicant): applicant_dir = GENERATED_DIR / applicant.contact_info.corporate_email applicant_dir.mkdir(exist_ok=True) parents = [ r for r in applicant.family if r.type in [RelativeType.FATHER, RelativeType.MOTHER] ] siblings = [ r for r in applicant.family if r.type in [RelativeType.BROTHER, RelativeType.SISTER] ] data = applicant.dict() photo = data.pop("photo") photo_path = applicant_dir / "photo" with open(photo_path, "wb") as f: f.write(base64.b64decode(photo.encode())) context = { "date": today(), "full_name": applicant.full_name, "parents": parents, "siblings": siblings, **data, } for (en, rus) in DOCUMENTS: doc = DocxTemplate(TEMPLATES_DIR / en) doc.render(context, self.jinja_env) doc.replace_media(DUMMY_IMAGE, photo_path) doc.save(applicant_dir / rus)
def start_requests(self): today = date_utils.today() tomorrow = today + datetime.timedelta(days=1) # `date_utils.date_range` excludes the last, so to get today's data we # need to pass tomorrow. for date in date_utils.date_range(datetime.date(2020, 1, 1), tomorrow): # Won't cache dates from 30 days ago until today (only historical # ones which are unlikely to change). should_cache = today - date > datetime.timedelta(days=30) for state in STATES: for search in ("death-respiratory", "death-covid"): if search == "death-covid": yield self.make_covid_request( start_date=date, end_date=date, date_type="data_ocorrido", search=search, cause=None, state=state, callback=self.parse_covid_request, dont_cache=not should_cache, ) else: for cause in ("pneumonia", "insuficiencia_respiratoria"): yield self.make_covid_request( start_date=date, end_date=date, date_type="data_ocorrido", search=search, cause=cause, state=state, callback=self.parse_covid_request, dont_cache=not should_cache, )
def start_requests(self): for date in date_utils.date_range(self.start_date, date_utils.today()): yield self.make_state_confirmed_request( date, callback=self.parse_state_confirmed, meta={"row": { "date": date }}, )
class TotalDeathsSpider(BaseRegistroCivilSpider): name = "obitos_totais" base_url = "https://transparencia.registrocivil.org.br/api/record/death" start_date = datetime.date(2015, 1, 1) end_date = date_utils.today() def make_state_request(self, start_date, end_date, state, callback, dont_cache=False): data = [ ("start_date", str(start_date)), ("end_date", str(end_date)), ("state", state), ] return self.make_request( url=urljoin(self.base_url, "?" + urlencode(data)), callback=callback, meta={ "row": qs_to_dict(data), "dont_cache": dont_cache }, ) def start_requests_after_login(self): one_day = datetime.timedelta(days=1) today = date_utils.today() non_cache_period = datetime.timedelta(days=30) # `date_utils.date_range` excludes the last, so we need to add one day # to `end_date`. for date in date_utils.date_range(self.start_date, self.end_date + one_day, interval="monthly"): # Won't cache dates from 30 days ago until today (only historical # ones which are unlikely to change). should_cache = today - date > non_cache_period for state in STATES: yield self.make_state_request( start_date=date, end_date=date_utils.next_date(date, "monthly") - one_day, state=state, callback=self.parse, dont_cache=not should_cache, ) def parse(self, response): meta = response.meta["row"] data = json.loads(response.body)["data"] for row in data: row.update(meta) row["city"] = row.pop("name") row["deaths_total"] = row.pop("total") yield row
def start_requests(self): today = date_utils.today() tomorrow = today + datetime.timedelta(days=1) # `date_utils.date_range` excludes the last, so to get today's data we # need to pass tomorrow. for date in date_utils.date_range(datetime.date(2020, 1, 1), tomorrow): # Won't cache dates from 30 days ago until today (only historical # ones which are unlikely to change). should_cache = today - date > datetime.timedelta(days=30) for state in STATES: yield self.make_registral_request( start_date=date, end_date=date, state=state, callback=self.parse_registral_request, dont_cache=not should_cache, )
def start_requests_after_login(self): one_day = datetime.timedelta(days=1) today = date_utils.today() non_cache_period = datetime.timedelta(days=30) # `date_utils.date_range` excludes the last, so we need to add one day # to `end_date`. for date in date_utils.date_range(self.start_date, self.end_date + one_day, interval="monthly"): # Won't cache dates from 30 days ago until today (only historical # ones which are unlikely to change). should_cache = today - date > non_cache_period for state in STATES: yield self.make_state_request( start_date=date, end_date=date_utils.next_date(date, "monthly") - one_day, state=state, callback=self.parse, dont_cache=not should_cache, )
def convert_file(filename): # There are some missing data on the registral, so default all to None # Multiple passes to keep the same column ordering all_keys = [] for prefix in PREFIX_CHOICES: all_keys.extend(year_causes_keys(prefix, YEAR_CHOICES)) all_keys.extend([f"{prefix}_total_{year}" for year in YEAR_CHOICES]) base_row = {} for key in all_keys: base_row[key] = 0 if key.startswith("deaths_") else None table_types = { "date": rows.fields.DateField, "state": rows.fields.TextField, "cause": rows.fields.TextField, "total": rows.fields.IntegerField, } table = rows.import_from_csv(filename, force_types=table_types) row_key = lambda row: (row.state, datetime.date(2020, row.date.month, row.date.day)) table = sorted(table, key=row_key) accumulated = Counter() last_day = today() for key, state_data in groupby(table, key=row_key): state, date = key row = { "date": date, "state": state, } try: this_day_in_2019 = datetime.date(2019, date.month, date.day) except ValueError: # This day does not exist in 2019 (29 February) yesterday = date - one_day this_day_in_2019 = datetime.date(2019, yesterday.month, yesterday.day) row["epidemiological_week_2019"] = brazilian_epidemiological_week(this_day_in_2019)[1] row["epidemiological_week_2020"] = brazilian_epidemiological_week(date)[1] row.update(base_row) # Zero sum of new deaths for this state in all years (will accumulate) for year in YEAR_CHOICES: accumulated[(year, state, "new-total")] = 0 # For each death cause in this date/state, fill `row` and accumulate filled_causes = set() for item in state_data: cause = item.cause year = item.date.year key_new = get_death_cause_key("new_deaths", cause, year) new_deaths = item.total if key_new is None: if new_deaths > 0: #raise RuntimeError(f"Cannot have new_deaths > 0 when key for (new_deaths, {cause}, {year}) is None") print(f"ERROR converting {item}: new_deaths > 0 but key is None") continue else: continue accumulated_key = (year, state, cause) accumulated_key_total = (year, state, "total") accumulated_key_new_total = (year, state, "new-total") accumulated[accumulated_key] += new_deaths accumulated[accumulated_key_total] += new_deaths accumulated[accumulated_key_new_total] += new_deaths row[key_new] = new_deaths row[get_death_cause_key("deaths", cause, year)] = accumulated[accumulated_key] filled_causes.add((year, cause)) # Fill other deaths_* (accumulated) values with the last available data # if not filled by the state_data for this date. for cause in RESPIRATORY_DEATH_CAUSES: for year in YEAR_CHOICES: if (year, cause) in filled_causes: continue accumulated_key = (year, state, cause) key_name = get_death_cause_key("deaths", cause, year) if key_name is None: continue row[key_name] = accumulated[accumulated_key] # Fill year totals (new and accumulated) for state for year in YEAR_CHOICES: if year == last_day.year and date > last_day: new_total = None else: new_total = accumulated[(year, state, "new-total")] total = accumulated[(year, state, "total")] row[get_death_cause_key("new_deaths", "total", year)] = new_total row[get_death_cause_key("deaths", "total", year)] = total yield row