Exemplo n.º 1
0
 def start_requests(self):
     for date in date_range(self.start_date, today()):
         yield self.make_state_confirmed_request(
             date,
             callback=self.parse_state_confirmed,
             meta={"row": {
                 "date": date
             }},
         )
Exemplo n.º 2
0
class TotalDeathsSpider(BaseRegistroCivilSpider):
    name = "obitos_totais"
    base_url = "https://transparencia.registrocivil.org.br/api/record/death"
    start_date = datetime.date(2015, 1, 1)
    end_date = today()

    def make_state_request(self,
                           start_date,
                           end_date,
                           state,
                           callback,
                           dont_cache=False):
        data = [
            ("start_date", str(start_date)),
            ("end_date", str(end_date)),
            ("state", state),
        ]
        return self.make_request(
            url=urljoin(self.base_url, "?" + urlencode(data)),
            callback=callback,
            meta={
                "row": qs_to_dict(data),
                "dont_cache": dont_cache
            },
        )

    def start_requests_after_login(self):
        one_day = datetime.timedelta(days=1)
        today = today()
        non_cache_period = datetime.timedelta(days=30)
        # `date_range` excludes the last, so we need to add one day to
        # `end_date`.
        for date in date_range(self.start_date,
                               self.end_date + one_day,
                               interval="monthly"):
            # Won't cache dates from 30 days ago until today (only historical
            # ones which are unlikely to change).
            should_cache = today - date > non_cache_period
            for state in STATES:
                yield self.make_state_request(
                    start_date=date,
                    end_date=next_month(date) - one_day,
                    state=state,
                    callback=self.parse,
                    dont_cache=not should_cache,
                )

    def parse(self, response):
        meta = response.meta["row"]
        data = json.loads(response.body)["data"]
        for row in data:
            row.update(meta)
            row["city"] = row.pop("name")
            row["deaths_total"] = row.pop("total")
            yield row
Exemplo n.º 3
0
 def start_requests_after_login(self):
     one_day = datetime.timedelta(days=1)
     non_cache_period = datetime.timedelta(days=30)
     # `date_range` excludes the last, so we need to add one day to
     # `end_date`.
     for date in date_range(self.start_date, self.end_date + one_day, interval="monthly"):
         # Won't cache dates from 30 days ago until today (only historical
         # ones which are unlikely to change).
         should_cache = today() - date > non_cache_period
         for state in STATES:
             yield self.make_state_request(
                 start_date=date,
                 end_date=next_month(date) - one_day,
                 state=state,
                 callback=self.parse,
                 dont_cache=not should_cache,
             )
Exemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input_filenames", nargs="+")
    parser.add_argument("output_filename")
    args = parser.parse_args()

    writer = rows.utils.CsvLazyDictWriter(args.output_filename)
    progress = tqdm()
    write_row = writer.writerow
    progress_update = progress.update
    start_date, end_date = None, today()
    for filename in args.input_filenames:
        for row in get_data_greedy(filename, start_date, end_date):
            write_row(row)
            progress_update()
    writer.close()
    progress.close()
Exemplo n.º 5
0
def parse_application_date(value):
    value = parse_date(value)
    if value <= "2020-01-01" or value >= str(today()):  # Invalid value
        value = None
    return value
Exemplo n.º 6
0
 async def tasks(self):
     start_date = None
     end_date = today()
     for filename in self.input_filenames:
         yield Task(function=get_data_greedy,
                    args=(filename, start_date, end_date))
Exemplo n.º 7
0
    def parse_csv(self, response):
        reader = csv.DictReader(io.StringIO(response.body.decode("iso-8859-1")), delimiter=";")
        city_name_key = "Município"
        city_code_key = "Cód IBGE"
        confirmed_key = "Mun_Total de casos"
        deaths_key = "Mun_Total de óbitos"
        capture_date = today()
        total_confirmed = total_deaths = 0
        for row in reader:
            city = row[city_name_key]
            city_ibge_code = int(row[city_code_key]) if row[city_code_key] else None
            confirmed = int(row[confirmed_key])
            deaths = int(row[deaths_key])
            if city == "Outros países":
                confirmed_imported = confirmed
                deaths_imported = deaths
                continue
            elif city == "Ignorado":
                confirmed_undefined = confirmed
                deaths_undefined = deaths
                continue
            elif city == "Outros estados":
                confirmed_other_states = confirmed
                deaths_other_states = deaths
                continue
            else:
                city = self.cities[city_ibge_code]
                total_confirmed += confirmed
                total_deaths += deaths
                yield {
                    "city": city.city,
                    "city_ibge_code": city_ibge_code,
                    "confirmed": confirmed,
                    "date": capture_date,
                    "deaths": deaths,
                    "place_type": "city",
                    "state": self.name,
                }

        confirmed = confirmed_imported + confirmed_undefined + confirmed_other_states
        deaths = deaths_imported + deaths_undefined + deaths_other_states
        total_confirmed += confirmed
        total_deaths += deaths
        yield {
            "city": "Importados/Indefinidos",
            "city_ibge_code": None,
            "confirmed": confirmed,
            "date": capture_date,
            "deaths": deaths,
            "place_type": "city",
            "state": self.name,
        }
        yield {
            "city": None,
            "city_ibge_code": self.state_ibge_code,
            "confirmed": total_confirmed,
            "date": capture_date,
            "deaths": total_deaths,
            "place_type": "state",
            "state": self.name,
        }
Exemplo n.º 8
0
def read_files(input_filenames):
    start_date = None
    end_date = today()
    for filename in input_filenames:
        yield get_data_greedy(filename, start_date, end_date)
Exemplo n.º 9
0
def convert_file(filename):
    # There are some missing data on the registral, so default all to None
    # Multiple passes to keep the same column ordering
    all_keys = []
    for prefix in PREFIX_CHOICES:
        all_keys.extend(year_causes_keys(prefix, YEAR_CHOICES))
        all_keys.extend([f"{prefix}_total_{year}" for year in YEAR_CHOICES])
    base_row = {}
    for key in all_keys:
        base_row[key] = 0 if key.startswith("deaths_") else None

    table_types = {
        "date": rows.fields.DateField,
        "state": rows.fields.TextField,
        "cause": rows.fields.TextField,
        "total": rows.fields.IntegerField,
    }
    table = rows.import_from_csv(filename, force_types=table_types)
    row_key = lambda row: (row.state,
                           datetime.date(2020, row.date.month, row.date.day))
    table = sorted(table, key=row_key)
    accumulated = Counter()
    last_day = today()
    for key, state_data in groupby(table, key=row_key):
        state, date = key
        row = {
            "date": date,
            "state": state,
        }
        try:
            this_day_in_2019 = datetime.date(2019, date.month, date.day)
        except ValueError:  # This day does not exist in 2019 (29 February)
            yesterday = date - one_day
            this_day_in_2019 = datetime.date(2019, yesterday.month,
                                             yesterday.day)
        row["epidemiological_week_2019"] = brazilian_epidemiological_week(
            this_day_in_2019)[1]
        row["epidemiological_week_2020"] = brazilian_epidemiological_week(
            date)[1]
        row.update(base_row)

        # Zero sum of new deaths for this state in all years (will accumulate)
        for year in YEAR_CHOICES:
            accumulated[(year, state, "new-total")] = 0

        # For each death cause in this date/state, fill `row` and accumulate
        filled_causes = set()
        for item in state_data:
            cause = item.cause
            year = item.date.year
            key_new = get_death_cause_key("new_deaths", cause, year)
            new_deaths = item.total
            if key_new is None:
                if new_deaths > 0:
                    # raise RuntimeError(f"Cannot have new_deaths > 0 when key for (new_deaths, {cause}, {year}) is None")
                    print(
                        f"ERROR converting {item}: new_deaths > 0 but key is None"
                    )
                    continue
                else:
                    continue
            accumulated_key = (year, state, cause)
            accumulated_key_total = (year, state, "total")
            accumulated_key_new_total = (year, state, "new-total")
            accumulated[accumulated_key] += new_deaths
            accumulated[accumulated_key_total] += new_deaths
            accumulated[accumulated_key_new_total] += new_deaths
            row[key_new] = new_deaths
            row[get_death_cause_key("deaths", cause,
                                    year)] = accumulated[accumulated_key]
            filled_causes.add((year, cause))

        # Fill other deaths_* (accumulated) values with the last available data
        # if not filled by the state_data for this date.
        for cause in RESPIRATORY_DEATH_CAUSES:
            for year in YEAR_CHOICES:
                if (year, cause) in filled_causes:
                    continue
                accumulated_key = (year, state, cause)
                key_name = get_death_cause_key("deaths", cause, year)
                if key_name is None:
                    continue
                row[key_name] = accumulated[accumulated_key]

        # Fill year totals (new and accumulated) for state
        for year in YEAR_CHOICES:
            if year == last_day.year and date > last_day:
                new_total = None
            else:
                new_total = accumulated[(year, state, "new-total")]
            total = accumulated[(year, state, "total")]
            row[get_death_cause_key("new_deaths", "total", year)] = new_total
            row[get_death_cause_key("deaths", "total", year)] = total

        yield row