def test_validate_order_fields_issue_313(): source = "data/issue-313.xlsx" query = Query(pick_fields=[1, 2, 3, 4, 5]) schema = { "fields": [ { "name": "Column_1", "type": "string" }, { "name": "Column_2", "type": "string", "constraints": { "required": True } }, { "name": "Column_3", "type": "string" }, { "name": "Column_4", "type": "string" }, { "name": "Column_5", "type": "string" }, ] } report = validate(source, query=query, schema=schema, sync_schema=True) assert report.valid
def test_xlsx_parser_adjust_floating_point_error_default(): source = "data/adjust-floating-point-error.xlsx" dialect = ExcelDialect(preserve_formatting=True) query = Query(skip_fields=["<blank>"]) with pytest.warns(UserWarning): with Table(source, dialect=dialect, query=query) as table: assert table.read_data()[1][2] == 274.65999999999997
def test_table_limit_offset_fields(): query = Query(limit_fields=1, offset_fields=1) source = "text://header1,header2,header3\nvalue1,value2,value3" with Table(source, format="csv", query=query) as table: assert table.header == ["header2"] assert table.header.field_positions == [2] assert table.read_rows() == [ {"header2": "value2"}, ]
def test_table_limit_fields_error_zero_issue_521(): source = "data/long.csv" query = Query(limit_fields=0) table = Table(source, query=query) with pytest.raises(exceptions.FrictionlessException) as excinfo: table.open() error = excinfo.value.error assert error.code == "query-error" assert error.note.count('minimum of 1" at "limitFields')
def test_table_reset_on_close_issue_190(): query = Query(limit_rows=1) source = [["1", "english"], ["2", "中国人"]] table = Table(source, headers=False, query=query) table.open() table.read_data() == [["1", "english"]] table.open() table.read_data() == [["1", "english"]] table.close()
def test_table_skip_fields_position_and_prefix(): query = Query(skip_fields=[2, "header3"]) source = "text://header1,header2,header3\nvalue1,value2,value3" with Table(source, format="csv", query=query) as table: assert table.header == ["header1"] assert table.header.field_positions == [1] assert table.read_rows() == [ {"header1": "value1"}, ]
def test_table_skip_fields_blank_header_notation(): query = Query(skip_fields=["<blank>"]) source = "text://header1,,header3\nvalue1,value2,value3" with Table(source, format="csv", query=query) as table: assert table.header == ["header1", "header3"] assert table.header.field_positions == [1, 3] assert table.read_rows() == [ {"header1": "value1", "header3": "value3"}, ]
def test_table_offset_rows_error_zero_issue_521(): source = "data/long.csv" query = Query(offset_rows=0) table = Table(source, query=query) with pytest.raises(FrictionlessException) as excinfo: table.open() error = excinfo.value.error assert error.code == "query-error" assert error.note.count('minimum of 1" at "offsetRows')
def test_table_skip_fields_regex(): query = Query(skip_fields=["<regex>header(1|3)"]) source = "text://header1,header2,header3\nvalue1,value2,value3" with Table(source, format="csv", query=query) as table: assert table.header == ["header2"] assert table.header.field_positions == [2] assert table.read_rows() == [ {"header2": "value2"}, ]
def test_table_write_ods(tmpdir): source = "data/table.csv" target = str(tmpdir.join("table.ods")) with Table(source) as table: table.write(target) # NOTE: ezodf writer creates more cells than we ask query = Query(limit_fields=2, limit_rows=2) with Table(target, query=query) as table: assert table.header == ["id", "name"] assert table.read_data() == [[1, "english"], [2, "中国人"]]
def read_data_and_validate(self): """Reads data from filepath and validates it. Using frictionless. """ log.info(f"Reading and validating: {self.filepath}") skip_errors = [] # assert the correct dialect and checks header_case = not self.table_params.get("headers-ignore-case", False) if "json" in self.metadata["file_format"]: expected_headers = [ c["name"] for c in self.metadata["columns"] if c not in self.metadata.get("partitions", []) ] dialect = dialects.JsonDialect(keys=expected_headers) if ( "headers-ignore-case" in self.table_params or "expect-header" in self.table_params ): conf_warn = ( "jsonl files do not support header options. If keys " "in json lines do not match up exactly (i.e. case sensitive) " "with meta columns then keys will be nulled" ) log.warning(conf_warn) else: # assumes CSV dialect = dialects.Dialect(header_case=header_case) if not self.table_params.get("expect-header"): skip_errors.append("#head") query = None row_limit = self.table_params.get("row-limit", False) if row_limit: query = Query(limit_rows=row_limit) if " " in self.filepath: raise ValueError("The filepath must not contain a space") with Table(self.filepath, dialect=dialect, query=query) as table: resp = validate( table.row_stream, schema=self.schema, dialect=dialect, skip_errors=skip_errors, ) self.valid = resp.valid # Returns a class so lazily converting it to dict self.response = dict(resp.tables[0])
def test_validate_invalid_limit_rows(): query = Query(limit_rows=2) report = validate("data/invalid.csv", query=query) assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, 3, "blank-header"], [None, 4, "duplicate-header"], [2, 3, "missing-cell"], [2, 4, "missing-cell"], [3, 3, "missing-cell"], [3, 4, "missing-cell"], ]
def test_table_xlsx_adjust_floating_point_error(): source = "data/adjust-floating-point-error.xlsx" dialect = dialects.ExcelDialect( fill_merged_cells=False, preserve_formatting=True, adjust_floating_point_error=True, ) query = Query(skip_fields=["<blank>"]) with pytest.warns(UserWarning): with Table(source, dialect=dialect, query=query) as table: assert table.read_data()[1][2] == 274.66
def test_table_skip_rows_regex(): source = [ ["# comment"], ["name", "order"], ["# cat"], ["# dog"], ["John", 1], ["Alex", 2], ] query = Query(skip_rows=["# comment", r"<regex># (cat|dog)"]) with Table(source, query=query) as table: assert table.header == ["name", "order"] assert table.read_data() == [["John", 1], ["Alex", 2]]
def test_table_pick_rows_regex(): source = [ ["# comment"], ["name", "order"], ["# cat"], ["# dog"], ["John", 1], ["Alex", 2], ] query = Query(pick_rows=[r"<regex>(name|John|Alex)"]) with Table(source, query=query) as table: assert table.header == ["name", "order"] assert table.read_data() == [["John", 1], ["Alex", 2]]
def test_table_skip_rows_preset(): source = [ ["name", "order"], ["", ""], [], ["Ray", 0], ["John", 1], ["Alex", 2], ["", 3], [None, 4], ["", None], ] query = Query(skip_rows=["<blank>"]) with Table(source, query=query) as table: assert table.header == ["name", "order"] assert table.read_data() == [ ["Ray", 0], ["John", 1], ["Alex", 2], ["", 3], [None, 4], ]
def test_table_skip_rows_non_string_cell_issue_322(): query = Query(skip_rows=["1"]) source = [["id", "name"], [1, "english"], [2, "spanish"]] with Table(source, query=query) as table: assert table.header == ["id", "name"] assert table.read_data() == [[2, "spanish"]]
def test_resource_to_table_respect_query_issue_503(): resource = Resource(path="data/table.csv", query=Query(limit_rows=1)) with resource.to_table() as table: assert table.header == ["id", "name"] assert table.read_rows() == [{"id": 1, "name": "english"}]
def test_resource_respect_query_set_after_creation_issue_503(): resource = Resource(path="data/table.csv") resource.query = Query(limit_rows=1) assert resource.read_header() == ["id", "name"] assert resource.read_rows() == [{"id": 1, "name": "english"}]
def test_xlsx_parser_preserve_formatting_number_multicode(): source = "data/number-format-multicode.xlsx" dialect = ExcelDialect(preserve_formatting=True) query = Query(skip_fields=["<blank>"]) with Table(source, dialect=dialect, query=query) as table: assert table.read_data() == [["4.5"], ["-9.032"], ["15.8"]]
def test_table_limit_offset_rows(): source = "data/long.csv" query = Query(limit_rows=2, offset_rows=2) with Table(source, query=query) as table: assert table.header == ["id", "name"] assert table.read_data() == [["3", "c"], ["4", "d"]]
def test_table_offset_rows(): source = "data/long.csv" query = Query(offset_rows=5) with Table(source, query=query) as table: assert table.header == ["id", "name"] assert table.read_data() == [["6", "f"]]
def test_table_limit_rows(): source = "data/long.csv" query = Query(limit_rows=1) with Table(source, query=query) as table: assert table.header == ["id", "name"] assert table.read_data() == [["1", "a"]]
def test_table_skip_rows_with_headers_example_from_readme(): query = Query(skip_rows=["#"]) source = [["#comment"], ["name", "order"], ["John", 1], ["Alex", 2]] with Table(source, query=query) as table: assert table.header == ["name", "order"] assert table.read_data() == [["John", 1], ["Alex", 2]]
def test_table_skip_blank_at_the_end_issue_bco_dmo_33(): query = Query(skip_rows=["#"]) source = "data/skip-blank-at-the-end.csv" with Table(source, query=query) as table: assert table.header == ["test1", "test2"] assert table.read_data() == [["1", "2"], []]
def test_table_skip_rows_with_headers(): source = "data/skip-rows.csv" query = Query(skip_rows=["#"]) with Table(source, query=query) as table: assert table.header == ["id", "name"] assert table.read_data() == [["1", "english"], ["2", "中国人"]]
def test_table_skip_rows_excel_empty_column(): source = "data/skip-rows.xlsx" query = Query(skip_rows=[""]) with Table(source, query=query) as table: assert table.read_data() == [["A", "B"], [8, 9]]
def test_validate_structure_errors_with_limit_rows(): query = Query(limit_rows=3) report = validate("data/structure-errors.csv", query=query) assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [4, None, "blank-row"], ]
def test_table_pick_rows_number(): source = "data/skip-rows.csv" query = Query(pick_rows=[3, 5]) with Table(source, headers=False, query=query) as table: assert table.read_data() == [["1", "english"], ["2", "中国人"]]
def test_package_resources_respect_query_set_after_creation_issue_503(): package = Package(resources=[Resource(path="data/table.csv")]) resource = package.get_resource("table") resource.query = Query(limit_rows=1) assert resource.read_header() == ["id", "name"] assert resource.read_rows() == [{"id": 1, "name": "english"}]