示例#1
0
    def load(cls, path_locator: "PathLocator", path: str, source_schema: "Schema"=None):
        """
        Constructs a schema.

        :param path_locator: Utility class that resolves file paths based on Polytropos' configuration base path.
        :param path: The path to the schema that is to be loaded, relative to schemas base path.
        :param source_schema: An already-loaded schema from which this schema can be translated, if applicable.
        :return:
        """
        # TODO Figure out why these two lines are necessary. They definitely are, for now.
        if path is None:
            return None

        source_immutable: Optional[Track] = source_schema.immutable if source_schema else None
        source_temporal: Optional[Track] = source_schema.temporal if source_schema else None

        temporal_path: str = os.path.join(path_locator.schemas_dir, path, 'temporal.json')
        immutable_path: str = os.path.join(path_locator.schemas_dir, path, 'immutable.json')

        with open(temporal_path, 'r') as temporal, open(immutable_path, 'r') as immutable:
            return cls(
                temporal=Track.build(
                    specs=json.load(temporal), source=source_temporal, name='temporal'
                ),
                immutable=Track.build(
                    specs=json.load(immutable), source=source_immutable, name='immutable'
                )
            )
def test_underscore_folders_ignored():
    spec: Dict = {
        "binary_in_root": {
            "name": "the_binary",
            "data_type": "Binary",
            "sort_order": 0
        }
    }
    immutable: Track = Track.build(spec, None, "immutable")
    temporal: Track = Track.build({}, None, "temporal")
    schema: Schema = Schema(temporal, immutable)
    content: Dict = {
        "immutable": {
            "the_binary": "true",
            "_folder": {
                "foo": "shouldn't matter",
                "bar": "also shouldn't matter"
            }
        }
    }
    expected: Dict = {
        "immutable": {
            "the_binary": True,
            "_folder": {
                "foo": "shouldn't matter",
                "bar": "also shouldn't matter"
            }
        }
    }
    composite: Composite = Composite(schema, content)
    cast: Cast = Cast(schema, {})
    cast(composite)
    assert composite.content == expected
def test_use_same_source_twice(source_spec: Dict, source_doc: Dict,
                               create_document_value_provider, expected):
    """Two targets can use the same source."""
    target_spec: Dict = {
        "target_var_1": {
            "name": "first_target",
            "data_type": "Integer",
            "sources": ["source_var_1"],
            "sort_order": 0
        },
        "target_var_2": {
            "name": "second_target",
            "data_type": "Integer",
            "sources": ["source_var_1"],
            "sort_order": 1
        }
    }
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track,
                                       create_document_value_provider)

    actual: OrderedDict[str, Any] = translate("composite_id", "period",
                                              source_doc)
    assert actual == expected
示例#4
0
def test_variable_spec_two_errors():
    spec: Dict = {
        "var1": {
            "name": "name/1",
            "data_type": "Text",
            "sort_order": 1
        },
        "var2": {
            "name": "name2",
            "data_type": "Integer",
            "sort_order": 0,
            "parent": "unknown",
            "metadata": {
                "notes": "notes2"
            }
        },
    }
    with pytest.raises(ValidationError) as exc_info:
        Track.build(spec, None, "")
    assert len(exc_info.value.errors) == 2
    lines = str(exc_info.value).split("\n")
    assert lines == [
        "var1: bad name",
        'var2: Variable "var2" lists "unknown" as its parent, but variable doesn\'t exist.',
    ]
示例#5
0
def target_schema(source_schema) -> Schema:
    spec_path: str = os.path.join(basepath, "target_spec.json")
    with open(spec_path) as fh:
        spec: Dict = json.load(fh)
    temporal: Track = Track.build(spec, source_schema.temporal, "temporal")
    immutable: Track = Track.build({}, source_schema.immutable, "immutable")
    return Schema(temporal, immutable)
def nested_list_schema() -> Schema:
    immutable_spec: Dict = {
        "outer_list_1_id": {
            "name": "outer_list_1",
            "data_type": "List",
            "sort_order": 0
        },
        "inner_list_1_id": {
            "name": "inner_list",
            "data_type": "List",
            "parent": "outer_list_1_id",
            "sort_order": 0
        },
        "name_1_id": {
            "name": "name",
            "data_type": "Text",
            "parent": "inner_list_1_id",
            "sort_order": 0
        }
    }

    immutable_track: Track = Track.build(immutable_spec, None, "immutable")
    temporal_track: Track = Track.build({}, None, "Temporal")
    schema: Schema = Schema(temporal_track, immutable_track)
    return schema
def do_test(s_doc, s_spec, t_doc, t_spec, create_document_value_provider):
    source_track: Track = Track.build(s_spec, None, "Source")
    target_track: Track = Track.build(t_spec, source_track, "Target")
    translate: Translator = Translator(target_track,
                                       create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", s_doc)
    assert actual == t_doc
def test_nested_does_not_short_circuit_crawl():
    """Bug history:
         - Detected around 9/20/2019
         - Isolated minimum reproducible case on 9/24/2019
         - Caused by commit e23b825 (8/27/2019)
         - Regression test based on minimum reproducible case
    """
    spec: Dict = {
        "root": {
            "name": "return",
            "data_type": "Folder",
            "sort_order": 0
        },
        "application_submissions": {
            "name": "application_submissions",
            "data_type": "List",
            "parent": "root",
            "sort_order": 0
        },
        "award_restrict": {
            "name": "award_restrict",
            "data_type": "Text",
            "parent": "application_submissions",
            "sort_order": 0
        },
        "filer": {
            "name": "filer",
            "data_type": "Folder",
            "parent": "root",
            "sort_order": 1
        },
        "name_org": {
            "name": "name_org",
            "data_type": "Text",
            "parent": "filer",
            "sort_order": 0
        }
    }

    temporal: Track = Track.build(spec, None, "temporal")
    immutable: Track = Track.build({}, None, "immutable")
    schema: Schema = Schema(temporal, immutable, name="semantic")

    basepath: str = os.path.dirname(os.path.abspath(__file__))
    composite_path: str = os.path.join(basepath, "data")

    shutil.rmtree(output_path, ignore_errors=True)
    os.makedirs(output_path)
    with Context.build(conf_dir="dummy", data_dir="dummy") as context:
        coverage: CoverageFile = CoverageFile(context, schema, output_path + "/semantic", None, None)
        coverage(composite_path, "dummy")

    expected_path: str = os.path.join(basepath, "expected.csv")
    actual_path: str = os.path.join(output_path, "semantic_temporal.csv")
    with open(expected_path) as expected_fh, open(actual_path) as actual_fh:
        expected: csv.DictReader = csv.DictReader(expected_fh)
        actual: csv.DictReader = csv.DictReader(actual_fh)
        e_rows = [row for row in expected]
        a_rows = [row for row in actual]
        assert a_rows == e_rows
示例#9
0
    def _do_cast_error_test(data_type: str, raw: Optional[Any]):
        spec: Dict = {
            "var": {
                "name": "the_var",
                "data_type": data_type,
                "sort_order": 0
            }
        }
        immutable: Track = Track.build(spec, None, "immutable")
        temporal: Track = Track.build({}, None, "temporal")
        schema: Schema = Schema(temporal, immutable)
        content: Dict = {"immutable": {"the_var": raw}}
        composite: Composite = Composite(schema, content)
        cast: Cast = Cast(schema, {})
        cast(composite)

        expected: Dict = {
            "immutable": {
                "qc": {
                    "_exceptions": {
                        "cast_errors": {
                            "the_var": raw
                        }
                    }
                }
            }
        }
        actual: Dict = composite.content
        assert actual == expected
示例#10
0
 def _do_nearest_list_test(innermost: str, middle: str, outermost: str,
                           expected: str) -> None:
     spec: Dict = {
         "innermost": {
             "data_type": innermost,
             "name": "innermost",
             "sort_order": 0,
             "parent": "middle"
         },
         "middle": {
             "data_type": middle,
             "name": "middle",
             "sort_order": 0,
             "parent": "outermost"
         },
         "outermost": {
             "data_type": outermost,
             "name": "outermost",
             "sort_order": 0
         }
     }
     immutable: Track = Track.build(spec, None, "i")
     temporal: Track = Track.build({}, None, "t")
     schema: Schema = Schema(temporal, immutable)
     innermost: Variable = schema.get(cast(VariableId, "innermost"))
     assert innermost.nearest_list == expected
示例#11
0
def schema() -> Schema:
    temporal_spec: Dict = {
        "integer_source": {
            "name": "integer_source",
            "data_type": "Integer",
            "sort_order": 0
        },
        "decimal_source": {
            "name": "decimal_source",
            "data_type": "Decimal",
            "sort_order": 1
        },
        "currency_source": {
            "name": "currency_source",
            "data_type": "Currency",
            "sort_order": 2
        }
    }
    immutable_spec: Dict = {
        "target": {
            "name": "target",
            "data_type": "Decimal",
            "sort_order": 0
        }
    }
    temporal: Track = Track(temporal_spec, None, "temporal")
    immutable: Track = Track(immutable_spec, None, "immutable")
    return Schema(temporal, immutable)
示例#12
0
def schema() -> Schema:
    temporal_spec: Dict = {
        "the_subject": {
            "name": "source",
            "data_type": "Integer",
            "sort_order": 0
        }
    }
    temporal: Track = Track.build(temporal_spec, None, "temporal")

    immutable_spec: Dict = {
        "the_target": {
            "name": "limit",
            "data_type": "Integer",
            "sort_order": 0
        },
        "the_period_id": {
            "name": "limit_period",
            "data_type": "Text",
            "sort_order": 1
        }
    }
    immutable: Track = Track.build(immutable_spec, None, "immutable")
    schema: Schema = Schema(temporal, immutable)
    return schema
示例#13
0
 def _target_schema(source: Schema, data_type: str = "Text") -> Schema:
     temporal_spec: Dict = target_spec("t", data_type)
     temporal: Track = Track.build(temporal_spec, source.temporal,
                                   "temporal")
     immutable_spec: Dict = target_spec("i", data_type)
     immutable: Track = Track.build(immutable_spec, source.immutable,
                                    "immutable")
     return Schema(temporal, immutable, name="target", source=source)
def test_list_in_folder(source, target, index, create_document_value_provider):
    source_spec, source_doc = source
    target_spec, expected = target()
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track, create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", source_doc)
    assert actual == expected[index]
def test_list_in_folder(source, target):
    source_spec, source_doc = source
    target_spec, expected = target()
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == expected
def test_duplicate_name_raises(source_doc, source_spec, target_spec):
    source_doc["list_source_1"]["Stacy"] = {
        "Name": "Another Stacy"
    }
    with pytest.raises(ValueError):
        source_track: Track = Track.build(source_spec, None, "Source")
        target_track: Track = Track.build(target_spec, source_track, "Target")
        translate: Translator = Translator(target_track)
        translate(source_doc)
示例#17
0
    def load(cls,
             path: str,
             source_schema: "Schema" = None,
             path_locator: "PathLocator" = None,
             base_path: str = None) -> Optional["Schema"]:
        """
        Constructs a schema.

        :param path: The path to the schema that is to be loaded, relative to schemas base path.
        :param source_schema: An already-loaded schema from which this schema can be translated, if applicable.
        :param path_locator: Utility class that resolves file paths based on Polytropos' configuration base path.
        :param base_path: Directly supply the base path. Cannot be used with `path_locator`.
        :return:
        """
        assert (path_locator or base_path) and not (path_locator and base_path)
        schema_name: str = "UNSPECIFIED"
        if path is not None:
            schema_name = path.replace("/", "_")

        logging.info('Loading schema "%s".' % schema_name)
        # We return None if path is None to adapt to the case of a task not
        # having a target schema.
        if path is None:
            return None

        if source_schema:
            logging.debug('Schema "%s" has source schema "%s".' %
                          (schema_name, source_schema.name))
        else:
            logging.debug('Schema "%s" has no source schema.' % schema_name)

        source_immutable: Optional[
            Track] = source_schema.immutable if source_schema else None
        source_temporal: Optional[
            Track] = source_schema.temporal if source_schema else None

        temporal_path = _resolve_path(path_locator, base_path, path,
                                      'temporal.json')
        immutable_path = _resolve_path(path_locator, base_path, path,
                                       'immutable.json')

        logging.debug('Temporal path for schema "%s": %s' %
                      (schema_name, temporal_path))
        logging.debug('Immutable path for schema "%s": %s' %
                      (schema_name, temporal_path))

        with open(temporal_path, 'r') as temporal, open(immutable_path,
                                                        'r') as immutable:
            return cls(temporal=Track.build(specs=json.load(temporal),
                                            source=source_temporal,
                                            name='%s_temporal' % schema_name),
                       immutable=Track.build(specs=json.load(immutable),
                                             source=source_immutable,
                                             name='%s_immutable' %
                                             schema_name),
                       name=schema_name)
示例#18
0
def test_rearrange(source_doc: Dict, source_spec: Dict, target_doc: Dict, target_spec: Dict):
    """Verify that translate respects the sort order property of the variables in the target spec, and ignores the
    order in which the variables happen to be defined in the spec. """
    shuffled_source_spec = shuffle(source_spec)
    shuffled_target_spec = shuffle(target_spec)
    source_track: Track = Track.build(shuffled_source_spec, None, "Source")
    target_track: Track = Track.build(shuffled_target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == target_doc
def test_keyed_list_in_list(source, target, index, create_document_value_provider):
    """Reversing the order of the sources in the target list spec results in an equivalent change in the order of the
    resulting list."""
    source_spec, source_doc = source
    target_spec, target_doc = target
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track, create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", source_doc)
    assert actual == target_doc[index]
示例#20
0
def test_translate_all_children_missing(source: Callable, target: Callable):
    __, source_spec = source()
    __, target_spec = target()
    source_doc: Dict = {}
    expected: Dict = {}
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == expected
示例#21
0
def test_named_list_in_list(source, target):
    """Reversing the order of the sources in the target list spec results in an equivalent change in the order of the
    resulting list."""
    source_spec, source_doc = source
    target_spec, target_doc = target
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == target_doc
示例#22
0
def test_rearrange(source_doc: Dict, source_spec: Dict, target_docs: List[Dict], target_spec: Dict, index, create_document_value_provider):
    """Verify that translate respects the sort order property of the variables in the target spec, and ignores the
    order in which the variables happen to be defined in the spec. """
    shuffled_source_spec = shuffle(source_spec)
    shuffled_target_spec = shuffle(target_spec)
    source_track: Track = Track.build(shuffled_source_spec, None, "Source")
    target_track: Track = Track.build(shuffled_target_spec, source_track, "Target")
    translate: Translator = Translator(target_track, create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", source_doc)
    assert actual == target_docs[index]
def test_remove_sources(source_doc: Dict, source_spec: Dict, target_spec: Dict):
    """Remove root sources at runtime, resulting in a cascade; no list is created."""

    expected: Dict = {}
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    target_track["target_root"].sources = []
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == expected
def schema() -> Schema:
    spec: Dict = {
        "the_var": {
            "name": "the_var",
            "data_type": "MultipleText",
            "sort_order": 0
        }
    }
    temporal: Track = Track.build({}, None, "temporal")
    immutable: Track = Track.build(spec, None, "immutable")
    return Schema(temporal, immutable, "Schema")
    def _make_schema(temporal: bool) -> Schema:
        if temporal:
            temporal: Track = Track.build(make_spec(), None, "temporal")
            immutable: Track = Track.build({}, None, "immutable")
            schema: Schema = Schema(temporal, immutable)
        else:
            immutable: Track = Track.build(make_spec(), None, "immutable")
            temporal: Track = Track.build({}, None, "temporal")
            schema: Schema = Schema(temporal, immutable)

        return schema
def ah_schema() -> Schema:
    t_spec: Dict = {
        "ad_hoc_{}".format(idx): {
            "name": "ad_hoc_{}".format(idx),
            "data_type": "Decimal",
            "sort_order": idx
        } for idx in range(4)
    }
    temporal: Track = Track(t_spec, None, "temporal")
    immutable: Track = Track({}, None, "immutable")
    return Schema(temporal, immutable)
示例#27
0
def schema() -> Schema:
    temporal_spec: Dict = {
        "some_field": {
            "data_type": "Text",
            "name": "key",
            "sort_order": 0
        }
    }
    temporal_track: Track = Track.build(temporal_spec, None, "temporal")
    immutable_track: Track = Track.build({}, None, "immutable")
    return Schema(temporal_track, immutable_track)
示例#28
0
def simple_schema() -> Schema:
    temporal_spec: Dict = {
        "the_weight_var": {
            "name": "weight_in_pounds",
            "data_type": "Decimal",
            "sort_order": 0
        }
    }
    temporal_track: Track = Track.build(temporal_spec, None, "temporal")

    immutable_spec: Dict = {
        "the_person_name_var": {
            "name": "first_name",
            "data_type": "Text",
            "sort_order": 0
        },
        "the_gender_var": {
            "name": "gender",
            "data_type": "Text",
            "sort_order": 1
        },
        "the_weight_gain_var": {
            "name": "total_weight_gain",
            "data_type": "Decimal",
            "sort_order": 2
        },
        "the_sentence_var": {
            "name": "personal_summary",
            "data_type": "Text",
            "sort_order": 3
        },
        "color_folder": {
            "name": "color_info",
            "data_type": "Folder",
            "sort_order": 4
        },
        "the_color_var": {
            "name": "favorite_color",
            "data_type": "Text",
            "parent": "color_folder",
            "sort_order": 0
        },
        "the_rgb_var": {
            "name": "rgb_value",
            "data_type": "Text",
            "parent": "color_folder",
            "sort_order": 1
        }
    }

    immutable_track: Track = Track.build(immutable_spec, None, "immutable")
    return Schema(temporal_track, immutable_track)
def test_translate_all_children_missing(source: Callable, target: Callable,
                                        create_document_value_provider,
                                        expected):
    __, source_spec = source()
    __, target_spec = target()
    source_doc: Dict = {}
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track,
                                       create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period",
                                              source_doc)
    assert actual == expected
示例#30
0
    def load(cls,
             path: str,
             schemas_dir: str,
             source_schema: "Schema" = None) -> Optional["Schema"]:
        """
        Constructs a schema.

        :param path: The path to the schema that is to be loaded, relative to schemas base path.
        :param schemas_dir: Directly supply the base schemas path.
        :param source_schema: An already-loaded schema from which this schema can be translated, if applicable.
        :return:
        """
        schema_name: str = "UNSPECIFIED"
        if path is not None:
            schema_name = path.replace("/", "_")

        logging.info('Loading schema "%s".' % schema_name)
        # We return None if path is None to adapt to the case of a task not
        # having a target schema.
        if path is None:
            return None

        if source_schema:
            logging.debug('Schema "%s" has source schema "%s".' %
                          (schema_name, source_schema.name))
        else:
            logging.debug('Schema "%s" has no source schema.' % schema_name)

        source_immutable: Optional[
            Track] = source_schema.immutable if source_schema else None
        source_temporal: Optional[
            Track] = source_schema.temporal if source_schema else None

        temporal_path = os.path.join(schemas_dir, path, 'temporal.json')
        immutable_path = os.path.join(schemas_dir, path, 'immutable.json')

        logging.debug('Temporal path for schema "%s": %s' %
                      (schema_name, temporal_path))
        logging.debug('Immutable path for schema "%s": %s' %
                      (schema_name, temporal_path))

        with open(temporal_path, 'r') as temporal, open(immutable_path,
                                                        'r') as immutable:
            return cls(temporal=Track.build(specs=json.load(temporal),
                                            source=source_temporal,
                                            name='%s_temporal' % schema_name),
                       immutable=Track.build(specs=json.load(immutable),
                                             source=source_immutable,
                                             name='%s_immutable' %
                                             schema_name),
                       name=schema_name)