def test_not_valid_if_sum_of_deaths_does_not_matches_with_total(self): self.content = self.content.replace('TOTAL NO ESTADO,102,32', 'TOTAL NO ESTADO,102,50') file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors): format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf)
def test_confirmed_cases_must_be_equal_or_greater_than_deaths(self): original_content = self.content self.content = original_content.replace("Abatiá,9,1", "Abatiá,9,20") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors): format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf)
def test_validate_if_all_cities_exists_are_in_the_state(self): file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, "SP") exception = execinfo.value assert "Abatiá não pertence à UF SP" in exception.error_messages assert "Adrianópolis não pertence à UF SP" in exception.error_messages
def test_invalidate_spreadsheet_against_float_numbers(self): self.content = self.content.replace("Abatiá,9,1", "Abatiá,10.000,1") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) exception = execinfo.value assert ( "Erro no formato de algumas entradas dados: cheque para ver se a planilha não possui fórmulas ou números com ponto ou vírgula nas linhas: TOTAL NO ESTADO, Importados/Indefinidos, Abatiá, " # all entries in exception.error_messages[0])
def test_validation_error_if_city_formula(self): self.content = self.content.replace("Abatiá,9,1", "Abatiá,'=SUM(A1:A3)',1") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) exception = execinfo.value assert "Provavelmente há uma fórmula na linha Abatiá da planilha" in exception.error_messages
def test_not_valid_if_sum_of_deaths_does_not_matches_with_total(self): self.content = self.content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,102,50") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) exception = execinfo.value assert "A soma de mortes (32) difere da entrada total (50)." in exception.error_messages
def test_validation_error_if_city_is_repeated(self): self.content = self.content.replace("Abatiá,9,1", "Abatiá,9,1\nAbatiá,0,0") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) exception = execinfo.value assert "Mais de uma entrada para Abatiá" in exception.error_messages
def test_validation_error_if_city_formula(self): self.content = self.content.replace("Abatiá,9,1", "Abatiá,'=SUM(A1:A3)',1") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) exception = execinfo.value assert ( 'Erro no formato de algumas entradas dados: cheque para ver se a planilha não possui fórmulas ou números com ponto ou vírgula nas linhas: Abatiá"' in exception.error_messages)
def test_invalidate_spreadsheet_against_VALUE_error(self): self.content = self.content.replace("Abatiá,9,1", "Abatiá,#VALUE!,3 0") self.content = self.content.replace("Adrianópolis,11,2", "Adrianópolis,#VALUE!,3 0") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) exception = execinfo.value assert ( 'Erro no formato de algumas entradas dados: cheque para ver se a planilha não possui fórmulas ou números com ponto ou vírgula nas linhas: Abatiá, Adrianópolis"' in exception.error_messages)
def test_both_confirmed_cases_and_deaths_columns_must_be_integers(self): original_content = self.content # confirmed cases as float self.content = original_content.replace("Abatiá,9,1", "Abatiá,9.10,1") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors): format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) # deaths as float self.content = original_content.replace("Abatiá,9,1", "Abatiá,9,1.10") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors): format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf)
def test_raise_error_if_empty_line_but_with_data(self): sample = settings.SAMPLE_SPREADSHEETS_DATA_DIR / "sample-PR-empty-lines.csv" assert sample.exists() self.content = sample.read_text() self.content = self.content.replace(",,", ",10,20") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors) as execinfo: format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) exception = execinfo.value msg = "Uma ou mais linhas com a coluna de cidade vazia possuem números de confirmados ou óbitos" assert msg in exception.error_messages assert exception.error_messages.count(msg) == 1
def test_both_confirmed_cases_and_deaths_columns_must_be_filled(self): original_content = self.content # missing confirmed cases self.content = original_content.replace("Abatiá,9,1", "Abatiá,,1") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors): format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf) # missing deaths self.content = original_content.replace("Abatiá,9,1", "Abatiá,9,") file_rows = rows.import_from_csv(self.file_from_content) with pytest.raises(SpreadsheetValidationErrors): format_spreadsheet_rows_as_dict(file_rows, self.date, self.uf)
def clean(self): cleaned_data = super().clean() file = cleaned_data.get('file') spreadsheet_date = cleaned_data.get("date") state = cleaned_data.get("state") if all([file, spreadsheet_date, state]): path = Path(file.name) import_func_per_suffix = { '.csv': rows.import_from_csv, '.xls': import_xls, '.xlsx': rows.import_from_xlsx, '.ods': rows.import_from_ods, } import_func = import_func_per_suffix.get(path.suffix.lower()) if not import_func: valid = import_func_per_suffix.keys() msg = f"Formato de planilha inválida. O arquivo precisa estar formatado como {valid}." # noqa raise forms.ValidationError(msg) try: file_rows = import_func(file) except Exception as e: raise forms.ValidationError(e) try: self.file_data_as_json, self.data_warnings = format_spreadsheet_rows_as_dict( file_rows, spreadsheet_date, state ) except SpreadsheetValidationErrors as exception: for error in exception.error_messages: self.add_error(None, error)
def test_ignore_sum_of_deaths_if_flagged(self): self.content = self.content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,102,90") file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf, skip_sum_deaths=True) assert "A checagem da soma de óbitos por cidade com o valor total foi desativada." in warnings assert results[0]["deaths"] == 90
def test_do_not_check_for_totals_if_only_total_lines_data(self): sample = settings.SAMPLE_SPREADSHEETS_DATA_DIR / "sample-PR-no-cities-data.csv" assert sample.exists() self.content = sample.read_text() file_rows = rows.import_from_csv(self.file_from_content) data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert data[0]["deaths"] == 50
def test_spreadsheet_only_with_total_line_should_be_valid(self): self.content = "municipio,confirmados,mortes\nTOTAL NO ESTADO,102,32" file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert len(results) == 1 assert results[0]["confirmed"] == 102 assert results[0]["deaths"] == 32
def test_alternative_columns_names_for_city(self): alternatives = ["município", "cidade"] original_content = self.content for alt in alternatives: self.content = original_content.replace("municipio", alt) file_rows = rows.import_from_csv(self.file_from_content) data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert data[0]["confirmed"] == 102
def test_line_can_have_none_for_all_values_if_city_has_no_cases_yet(self): self.content = self.content.replace("Abatiá,9,1", "Abatiá,,") self.content = self.content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,93,31") file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert len(results) == len(file_rows) - 1 assert "Abatiá" not in [r["city"] for r in results]
def test_ignore_empty_lines_when_importing(self): sample = settings.SAMPLE_SPREADSHEETS_DATA_DIR / "sample-PR-empty-lines.csv" assert sample.exists() self.content = sample.read_text() file_rows = rows.import_from_csv(self.file_from_content) data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert len(file_rows) > 8 assert 8 == len(data)
def test_alternative_columns_names_for_deaths(self): alternatives = ['óbitos', 'óbito', 'obito', 'morte'] original_content = self.content for alt in alternatives: self.content = original_content.replace('mortes', alt) file_rows = rows.import_from_csv(self.file_from_content) data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert data[0]['deaths'] == 32
def test_alternative_columns_names_for_deaths(self): alternatives = ["óbitos", "óbito", "obito", "morte"] original_content = self.content for alt in alternatives: self.content = original_content.replace("mortes", alt) file_rows = rows.import_from_csv(self.file_from_content) data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert data[0]["deaths"] == 32
def test_alternative_columns_names_for_confirmed_cases(self): alternatives = ['casos confirmados', 'confirmado', 'confirmados'] original_content = self.content for alt in alternatives: self.content = original_content.replace('confirmados', alt) file_rows = rows.import_from_csv(self.file_from_content) data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert data[0]['confirmed'] == 102
def test_undefined_can_hold_none_values_for_confirmed_and_deaths(self): self.content = self.content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,100,30") self.content = self.content.replace("Importados/Indefinidos,2,2", "Importados/Indefinidos,,") file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert len(results) == len(file_rows) - 1 assert "Importados/Indefinidos" not in [r["city"] for r in results]
def test_undefined_entry_can_have_more_deaths_than_cases(self): self.content = self.content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,102,33") self.content = self.content.replace("Importados/Indefinidos,2,2", "Importados/Indefinidos,2,3") file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert len(results) == len(file_rows) assert 2 == len(warnings) assert "db warning" in warnings assert "Importados/Indefinidos com número óbitos maior que de casos confirmados."
def test_allow_zero_as_a_valid_value(self): original_content = self.content base = { "city": "Abatiá", "city_ibge_code": get_city_info("Abatiá", "PR").city_ibge_code, "confirmed": 9, "date": self.date.isoformat(), "deaths": 1, "place_type": "city", "state": "PR", } # zero confirmed cases self.content = original_content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,93,31") self.content = self.content.replace("Abatiá,9,1", "Abatiá,0,0") expected = base.copy() expected["confirmed"] = 0 expected["deaths"] = 0 file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results # zero deaths self.content = original_content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,102,31") self.content = self.content.replace("Abatiá,9,1", "Abatiá,9,0") expected = base.copy() expected["deaths"] = 0 file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results
def test_validate_historical_data_as_the_final_validation( self, mock_validate_historical_data): mock_validate_historical_data.return_value = ["warning 1", "warning 2"] file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert results assert ["warning 1", "warning 2"] == warnings assert 1 == mock_validate_historical_data.call_count on_going_spreadsheet = mock_validate_historical_data.call_args[0][0] assert isinstance(on_going_spreadsheet, StateSpreadsheet) assert on_going_spreadsheet.table_data == results assert on_going_spreadsheet.state == self.uf assert on_going_spreadsheet.date == self.date
def test_always_use_ibge_data_to_format_the_city_name(self): self.content = self.content.replace("Abatiá,9,1", "abatiá,9,1") file_rows = rows.import_from_csv(self.file_from_content) expected = { "city": "Abatiá", "city_ibge_code": get_city_info("Abatiá", "PR").city_ibge_code, "confirmed": 9, "date": self.date.isoformat(), "deaths": 1, "place_type": "city", "state": "PR", } results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results
def test_line_can_have_none_for_all_values_if_city_has_no_cases_yet(self): self.content = self.content.replace('Abatiá,9,1', 'Abatiá,,') self.content = self.content.replace('TOTAL NO ESTADO,102,32', 'TOTAL NO ESTADO,93,31') file_rows = rows.import_from_csv(self.file_from_content) expected = { "city": 'Abatiá', "city_ibge_code": get_city_info('Abatiá', 'PR').city_ibge_code, "confirmed": 0, "date": self.date.isoformat(), "deaths": 0, "place_type": "city", "state": 'PR', } results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results
def clean(self): cleaned_data = super().clean() fobj = cleaned_data.get("file") spreadsheet_date = cleaned_data.get("date") state = cleaned_data.get("state") if all([fobj, spreadsheet_date, state]): path = Path(fobj.name) suffix = path.suffix.lower() import_func_per_suffix = { ".csv": rows.import_from_csv, ".xls": import_xls, ".xlsx": rows.import_from_xlsx, ".ods": rows.import_from_ods, } import_func = import_func_per_suffix.get(suffix) if not import_func: valid = import_func_per_suffix.keys() msg = f"Formato de planilha inválida. O arquivo precisa estar formatado como {valid}." # noqa raise forms.ValidationError(msg) file_data = io.BytesIO(fobj.read()) fobj.seek(0) try: file_rows = import_func(file_data) except Exception as e: msg = f"Incoerência no formato do arquivo e sua extensão. Confirme se o conteúdo arquivo de fato corresponde a um {suffix} válido." raise forms.ValidationError(f"{msg} ERRO: {e}") try: self.file_data_as_json, self.data_warnings = format_spreadsheet_rows_as_dict( file_rows, spreadsheet_date, state, skip_sum_cases=cleaned_data.get("skip_sum_cases", False), skip_sum_deaths=cleaned_data.get("skip_sum_deaths", False), ) except SpreadsheetValidationErrors as exception: for error in exception.error_messages: self.add_error(None, error)
def test_format_valid_list_of_rows(self): file_rows = rows.import_from_csv(self.file_from_content) date = self.date.isoformat() data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) cities_data = [ { "nome": "Abatiá", "confirmados": 9, "mortes": 1 }, { "nome": "Adrianópolis", "confirmados": 11, "mortes": 2 }, { "nome": "Agudos do Sul", "confirmados": 12, "mortes": 3 }, { "nome": "Almirante Tamandaré", "confirmados": 8, "mortes": 4 }, { "nome": "Altamira do Paraná", "confirmados": 13, "mortes": 5 }, { "nome": "Alto Paraíso", "confirmados": 47, "mortes": 15 }, ] for d in cities_data: d["ibge"] = get_city_info(d["nome"], self.uf).city_ibge_code expected = [ { "city": None, "city_ibge_code": 41, "confirmed": 102, "date": date, "deaths": 32, "place_type": "state", "state": "PR", }, { "city": "Importados/Indefinidos", "city_ibge_code": None, "confirmed": 2, "date": date, "deaths": 2, "place_type": "city", "state": "PR", }, ] expected.extend([{ "city": c["nome"], "city_ibge_code": c["ibge"], "confirmed": c["confirmados"], "date": date, "deaths": c["mortes"], "place_type": "city", "state": "PR", } for c in cities_data]) assert data == expected assert ["warning"] == warnings