def test_datetime_read_cell(format, source, target): with pytest.warns(None) as recorded: field = Field({"name": "name", "type": "datetime", "format": format}) cell, notes = field.read_cell(source) assert cell == target if not format.startswith("fmt:"): assert recorded.list == []
def test_type_custom(): # Type class CustomType(Type): def read_cell(self, cell): return [cell] # Plugin class CustomPlugin(Plugin): def create_type(self, field): if field.type == "custom": return CustomType(field) # Testing system.register("custom", CustomPlugin()) schema = Schema(fields=[Field(type="integer"), Field(type="custom")]) resource = Resource(path="data/table.csv", schema=schema) assert resource.read_rows() == [ { "integer": 1, "custom": ["english"] }, { "integer": 2, "custom": ["中国人"] }, ]
def test_table_dialect_header_case_is_false(): dialect = Dialect(header_case=False) schema = Schema(fields=[Field(name="ID"), Field(name="NAME")]) with Table("data/table.csv", dialect=dialect, schema=schema) as table: assert table.schema.field_names == ["ID", "NAME"] assert table.header == ["id", "name"] assert table.header.valid is True
def test_missing_label(): schema = Schema(fields=[Field(name="id"), Field(name="name"), Field(name="extra")]) with Resource(path="data/table.csv", schema=schema) as resource: header = resource.header assert header == ["id", "name", "extra"] assert header.labels == ["id", "name"] assert header.valid is False
def test_resource_layout_header_case_is_false(): layout = Layout(header_case=False) schema = Schema(fields=[Field(name="ID"), Field(name="NAME")]) with Resource("data/table.csv", layout=layout, schema=schema) as resource: assert resource.schema.field_names == ["ID", "NAME"] assert resource.labels == ["id", "name"] assert resource.header == ["ID", "NAME"] assert resource.header.valid is True
def test_array_read_cell_array_item_with_constraint(): field = Field(type="array", array_item={"constraints": { "enum": ["val1", "val2"] }}) cell, notes = field.read_cell('["val1", "val2"]') assert cell == ["val1", "val2"] assert notes is None
def test_field_read_cell(): field = Field(DESCRIPTOR) assert field.read_cell("1") == (1, None) assert field.read_cell("string") == ( None, {"type": 'type is "integer/default"'}, ) assert field.read_cell("-") == (None, {"required": 'constraint "required" is "True"'})
def test_table_dialect_header_case_default(): schema = Schema(fields=[Field(name="ID"), Field(name="NAME")]) with Table("data/table.csv", schema=schema) as table: assert table.schema.field_names == ["ID", "NAME"] assert table.header == ["id", "name"] assert table.header.valid is False assert table.header.errors[0].code == "non-matching-header" assert table.header.errors[1].code == "non-matching-header"
def test_field_read_cell_number_missingValues(): field = Field({ "name": "name", "type": "number", "missingValues": ["", "NA", "N/A"] }) assert field.read_cell("") == (None, None) assert field.read_cell("NA") == (None, None) assert field.read_cell("N/A") == (None, None)
def test_resource_layout_header_case_default(): schema = Schema(fields=[Field(name="ID"), Field(name="NAME")]) with Resource("data/table.csv", schema=schema) as resource: assert resource.schema.field_names == ["ID", "NAME"] assert resource.labels == ["id", "name"] assert resource.header == ["ID", "NAME"] assert resource.header.valid is False assert resource.header.errors[0].code == "incorrect-label" assert resource.header.errors[1].code == "incorrect-label"
def test_missing_header(): schema = Schema( fields=[Field(name="id"), Field(name="name"), Field(name="extra")]) resource = Resource(path="data/table.csv", schema=schema) header = resource.read_header() assert header == ["id", "name"] assert header.valid is False
def test_validate_package_using_detector_schema_sync_issue_847(): package = Package( resources=[ Resource( data=[["f1"], ["v1"], ["v2"], ["v3"]], schema=Schema(fields=[Field(name="f1"), Field(name="f2")]), ), ] ) for resource in package.resources: resource.detector = Detector(schema_sync=True) report = validate(package) assert report.valid
def test_type_custom(custom_plugin): schema = Schema(fields=[ Field(name="integer", type="integer"), Field(name="custom", type="custom"), ]) with Resource(path="data/table.csv", schema=schema) as resource: assert resource.read_rows() == [ { "integer": 1, "custom": ["english"] }, { "integer": 2, "custom": ["中国人"] }, ]
def test_field_read_cell_required(): field = Field({ "name": "name", "type": "string", "constraints": { "required": True }, "missingValues": ["", "NA", "N/A"], }) read = field.read_cell assert read("test") == ("test", None) assert read("null") == ("null", None) assert read("none") == ("none", None) assert read("nil") == ("nil", None) assert read("nan") == ("nan", None) assert read("-") == ("-", None) assert read("NA") == (None, { "required": 'constraint "required" is "True"' }) assert read("N/A") == (None, { "required": 'constraint "required" is "True"' }) assert read("") == (None, {"required": 'constraint "required" is "True"'}) assert read(None) == (None, { "required": 'constraint "required" is "True"' })
def test_field_read_cell_pattern(): field = Field({"name": "name", "type": "string", "constraints": {"pattern": "a|b"}}) read = field.read_cell assert read("a") == ("a", None) assert read("b") == ("b", None) assert read("c") == ("c", {"pattern": 'constraint "pattern" is "a|b"'}) # Null value passes assert read("") == (None, None)
def test_field_read_cell_maxLength(): field = Field({"name": "name", "type": "string", "constraints": {"maxLength": 2}}) read = field.read_cell assert read("abc") == ("abc", {"maxLength": 'constraint "maxLength" is "2"'}) assert read("ab") == ("ab", None) assert read("a") == ("a", None) # Null value passes assert read("") == (None, None)
def test_field_defaults(): field = Field({"name": "id"}) assert field.name == "id" assert field.type == "any" assert field.format == "default" assert field.missing_values == [""] assert field.constraints == {} assert field.required is False
def test_field(): field = Field(DESCRIPTOR) assert field.name == "id" assert field.type == "integer" assert field.format == "default" assert field.missing_values == ["-"] assert field.constraints == {"required": True} assert field.required is True
def test_validate_less_actual_fields_with_required_constraint_issue_950(): schema = describe("data/table.csv", type="schema") schema.add_field(Field(name="bad", constraints={"required": True})) report = validate("data/table.csv", schema=schema) assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, 3, "missing-label"], [2, 3, "missing-cell"], [3, 3, "missing-cell"], ]
def test_field_pprint_1029(): field = Field({ "name": "name", "type": "string", "constraints": { "maxLength": 2 } }) expected = """{'constraints': {'maxLength': 2}, 'name': 'name', 'type': 'string'}""" assert repr(field) == expected
def test_field_read_cell_maximum(): field = Field({"name": "name", "type": "integer", "constraints": {"maximum": 2}}) read = field.read_cell assert read("3") == (3, {"maximum": 'constraint "maximum" is "2"'}) assert read(3) == (3, {"maximum": 'constraint "maximum" is "2"'}) assert read("2") == (2, None) assert read(2) == (2, None) assert read("1") == (1, None) assert read(1) == (1, None) # Null value passes assert read("") == (None, None)
def test_field_set_schema(): test_schema_init = Schema(fields=[ Field( name="name", type="boolean", format={ "trueValues": "Yes", "falseValues": "No" }, ) ]) field = Field(schema=test_schema_init) assert field.schema == test_schema_init test_schema_property = Schema( {"fields": [{ "name": "name", "type": "other" }]}) field.schema = test_schema_property assert field.schema == test_schema_property
def test_field_read_cell_enum(): field = Field( {"name": "name", "type": "integer", "constraints": {"enum": ["1", "2", "3"]}} ) read = field.read_cell assert read("1") == (1, None) assert read(1) == (1, None) assert read("4") == (4, {"enum": "constraint \"enum\" is \"['1', '2', '3']\""}) assert read(4) == (4, {"enum": "constraint \"enum\" is \"['1', '2', '3']\""}) # Null value passes assert read("") == (None, None)
def test_schema_metadata_bad_schema_format(): schema = Schema( fields=[ Field( name="name", type="boolean", format={"trueValues": "Yes", "falseValues": "No"}, ) ] ) assert schema.metadata_valid is False assert schema.metadata_errors[0].code == "field-error"
def create_header(cells, *, schema=None, field_positions=[]): field_positions = field_positions or list(range(1, len(cells) + 1)) if not schema: fields = [] for field_position in field_positions: fields.append( Field({ "name": "field%s" % field_position, "type": "any" })) schema = Schema({"fields": fields}) return Header(cells, schema=schema, field_positions=field_positions)
def test_field_standard_specs_properties(create_descriptor): options = dict( name="name", title="title", description="description", type="string", format="format", missing_values="missing", constraints={}, rdf_type="rdf", ) field = (Field(**options) if not create_descriptor else Field( helpers.create_descriptor(**options))) assert field.name == "name" assert field.title == "title" assert field.description == "description" assert field.type == "string" assert field.format == "format" assert field.missing_values == "missing" assert field.constraints == {} assert field.rdf_type == "rdf"
def transform_resource(self, resource): table = resource.to_petl() source = self.get("sourceName") target = self.get("targetName") value = lambda row: "FUSS" if row[source][:3] == "FZS" else "VELO" if target not in resource.schema.fields: field = Field(name=target, type="string") resource.schema.add_field(field) resource.data = table.addfield(target, value=value) else: resource.data = table.update(target, value)
def create_row(cells, *, schema=None, field_positions=[], row_position=1, row_number=1): field_positions = field_positions or list(range(1, len(cells) + 1)) if not schema: fields = [] for field_position in field_positions: fields.append(Field({"name": "field%s" % field_position, "type": "any"})) schema = Schema({"fields": fields}) return Row( cells, schema=schema, field_positions=field_positions, row_position=row_position, row_number=row_number, )
def test_resource_metadata_bad_schema_format(): schema = Schema(fields=[ Field( name="name", type="boolean", format={ "trueValues": "Yes", "falseValues": "No" }, ) ]) resource = Resource(name="name", path="data/table.csv", schema=schema) assert resource.metadata_valid is False assert resource.metadata_errors[0].code == "field-error"
def transform_resource(self, resource): table = resource.to_petl() source = self.get("sourceName") strptime = self.get("sourceFormat") target = self.get("targetName") strftime = self.get("targetFormat") type = self.get("targetType") value = lambda row: datetime.strftime( datetime.strptime(row[source], strptime), strftime) if target not in resource.schema.fields: field = Field(name=target, type=type) resource.schema.add_field(field) resource.data = table.addfield(target, value=value) else: resource.data = table.update(target, value)