Пример #1
0
    def load(cls, path_locator: "PathLocator", path: str, source_schema: "Schema"=None):
        """
        Constructs a schema.

        :param path_locator: Utility class that resolves file paths based on Polytropos' configuration base path.
        :param path: The path to the schema that is to be loaded, relative to schemas base path.
        :param source_schema: An already-loaded schema from which this schema can be translated, if applicable.
        :return:
        """
        # TODO Figure out why these two lines are necessary. They definitely are, for now.
        if path is None:
            return None

        source_immutable: Optional[Track] = source_schema.immutable if source_schema else None
        source_temporal: Optional[Track] = source_schema.temporal if source_schema else None

        temporal_path: str = os.path.join(path_locator.schemas_dir, path, 'temporal.json')
        immutable_path: str = os.path.join(path_locator.schemas_dir, path, 'immutable.json')

        with open(temporal_path, 'r') as temporal, open(immutable_path, 'r') as immutable:
            return cls(
                temporal=Track.build(
                    specs=json.load(temporal), source=source_temporal, name='temporal'
                ),
                immutable=Track.build(
                    specs=json.load(immutable), source=source_immutable, name='immutable'
                )
            )
Пример #2
0
def test_underscore_folders_ignored():
    spec: Dict = {
        "binary_in_root": {
            "name": "the_binary",
            "data_type": "Binary",
            "sort_order": 0
        }
    }
    immutable: Track = Track.build(spec, None, "immutable")
    temporal: Track = Track.build({}, None, "temporal")
    schema: Schema = Schema(temporal, immutable)
    content: Dict = {
        "immutable": {
            "the_binary": "true",
            "_folder": {
                "foo": "shouldn't matter",
                "bar": "also shouldn't matter"
            }
        }
    }
    expected: Dict = {
        "immutable": {
            "the_binary": True,
            "_folder": {
                "foo": "shouldn't matter",
                "bar": "also shouldn't matter"
            }
        }
    }
    composite: Composite = Composite(schema, content)
    cast: Cast = Cast(schema, {})
    cast(composite)
    assert composite.content == expected
Пример #3
0
def test_variable_spec_two_errors():
    spec: Dict = {
        "var1": {
            "name": "name/1",
            "data_type": "Text",
            "sort_order": 1
        },
        "var2": {
            "name": "name2",
            "data_type": "Integer",
            "sort_order": 0,
            "parent": "unknown",
            "metadata": {
                "notes": "notes2"
            }
        },
    }
    with pytest.raises(ValidationError) as exc_info:
        Track.build(spec, None, "")
    assert len(exc_info.value.errors) == 2
    lines = str(exc_info.value).split("\n")
    assert lines == [
        "var1: bad name",
        'var2: Variable "var2" lists "unknown" as its parent, but variable doesn\'t exist.',
    ]
def nested_list_schema() -> Schema:
    immutable_spec: Dict = {
        "outer_list_1_id": {
            "name": "outer_list_1",
            "data_type": "List",
            "sort_order": 0
        },
        "inner_list_1_id": {
            "name": "inner_list",
            "data_type": "List",
            "parent": "outer_list_1_id",
            "sort_order": 0
        },
        "name_1_id": {
            "name": "name",
            "data_type": "Text",
            "parent": "inner_list_1_id",
            "sort_order": 0
        }
    }

    immutable_track: Track = Track.build(immutable_spec, None, "immutable")
    temporal_track: Track = Track.build({}, None, "Temporal")
    schema: Schema = Schema(temporal_track, immutable_track)
    return schema
def do_test(s_doc, s_spec, t_doc, t_spec, create_document_value_provider):
    source_track: Track = Track.build(s_spec, None, "Source")
    target_track: Track = Track.build(t_spec, source_track, "Target")
    translate: Translator = Translator(target_track,
                                       create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", s_doc)
    assert actual == t_doc
def test_nested_does_not_short_circuit_crawl():
    """Bug history:
         - Detected around 9/20/2019
         - Isolated minimum reproducible case on 9/24/2019
         - Caused by commit e23b825 (8/27/2019)
         - Regression test based on minimum reproducible case
    """
    spec: Dict = {
        "root": {
            "name": "return",
            "data_type": "Folder",
            "sort_order": 0
        },
        "application_submissions": {
            "name": "application_submissions",
            "data_type": "List",
            "parent": "root",
            "sort_order": 0
        },
        "award_restrict": {
            "name": "award_restrict",
            "data_type": "Text",
            "parent": "application_submissions",
            "sort_order": 0
        },
        "filer": {
            "name": "filer",
            "data_type": "Folder",
            "parent": "root",
            "sort_order": 1
        },
        "name_org": {
            "name": "name_org",
            "data_type": "Text",
            "parent": "filer",
            "sort_order": 0
        }
    }

    temporal: Track = Track.build(spec, None, "temporal")
    immutable: Track = Track.build({}, None, "immutable")
    schema: Schema = Schema(temporal, immutable, name="semantic")

    basepath: str = os.path.dirname(os.path.abspath(__file__))
    composite_path: str = os.path.join(basepath, "data")

    shutil.rmtree(output_path, ignore_errors=True)
    os.makedirs(output_path)
    with Context.build(conf_dir="dummy", data_dir="dummy") as context:
        coverage: CoverageFile = CoverageFile(context, schema, output_path + "/semantic", None, None)
        coverage(composite_path, "dummy")

    expected_path: str = os.path.join(basepath, "expected.csv")
    actual_path: str = os.path.join(output_path, "semantic_temporal.csv")
    with open(expected_path) as expected_fh, open(actual_path) as actual_fh:
        expected: csv.DictReader = csv.DictReader(expected_fh)
        actual: csv.DictReader = csv.DictReader(actual_fh)
        e_rows = [row for row in expected]
        a_rows = [row for row in actual]
        assert a_rows == e_rows
Пример #7
0
    def _do_cast_error_test(data_type: str, raw: Optional[Any]):
        spec: Dict = {
            "var": {
                "name": "the_var",
                "data_type": data_type,
                "sort_order": 0
            }
        }
        immutable: Track = Track.build(spec, None, "immutable")
        temporal: Track = Track.build({}, None, "temporal")
        schema: Schema = Schema(temporal, immutable)
        content: Dict = {"immutable": {"the_var": raw}}
        composite: Composite = Composite(schema, content)
        cast: Cast = Cast(schema, {})
        cast(composite)

        expected: Dict = {
            "immutable": {
                "qc": {
                    "_exceptions": {
                        "cast_errors": {
                            "the_var": raw
                        }
                    }
                }
            }
        }
        actual: Dict = composite.content
        assert actual == expected
Пример #8
0
 def _do_nearest_list_test(innermost: str, middle: str, outermost: str,
                           expected: str) -> None:
     spec: Dict = {
         "innermost": {
             "data_type": innermost,
             "name": "innermost",
             "sort_order": 0,
             "parent": "middle"
         },
         "middle": {
             "data_type": middle,
             "name": "middle",
             "sort_order": 0,
             "parent": "outermost"
         },
         "outermost": {
             "data_type": outermost,
             "name": "outermost",
             "sort_order": 0
         }
     }
     immutable: Track = Track.build(spec, None, "i")
     temporal: Track = Track.build({}, None, "t")
     schema: Schema = Schema(temporal, immutable)
     innermost: Variable = schema.get(cast(VariableId, "innermost"))
     assert innermost.nearest_list == expected
Пример #9
0
def target_schema(source_schema) -> Schema:
    spec_path: str = os.path.join(basepath, "target_spec.json")
    with open(spec_path) as fh:
        spec: Dict = json.load(fh)
    temporal: Track = Track.build(spec, source_schema.temporal, "temporal")
    immutable: Track = Track.build({}, source_schema.immutable, "immutable")
    return Schema(temporal, immutable)
def test_use_same_source_twice(source_spec: Dict, source_doc: Dict,
                               create_document_value_provider, expected):
    """Two targets can use the same source."""
    target_spec: Dict = {
        "target_var_1": {
            "name": "first_target",
            "data_type": "Integer",
            "sources": ["source_var_1"],
            "sort_order": 0
        },
        "target_var_2": {
            "name": "second_target",
            "data_type": "Integer",
            "sources": ["source_var_1"],
            "sort_order": 1
        }
    }
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track,
                                       create_document_value_provider)

    actual: OrderedDict[str, Any] = translate("composite_id", "period",
                                              source_doc)
    assert actual == expected
Пример #11
0
def schema() -> Schema:
    temporal_spec: Dict = {
        "the_subject": {
            "name": "source",
            "data_type": "Integer",
            "sort_order": 0
        }
    }
    temporal: Track = Track.build(temporal_spec, None, "temporal")

    immutable_spec: Dict = {
        "the_target": {
            "name": "limit",
            "data_type": "Integer",
            "sort_order": 0
        },
        "the_period_id": {
            "name": "limit_period",
            "data_type": "Text",
            "sort_order": 1
        }
    }
    immutable: Track = Track.build(immutable_spec, None, "immutable")
    schema: Schema = Schema(temporal, immutable)
    return schema
Пример #12
0
 def _target_schema(source: Schema, data_type: str = "Text") -> Schema:
     temporal_spec: Dict = target_spec("t", data_type)
     temporal: Track = Track.build(temporal_spec, source.temporal,
                                   "temporal")
     immutable_spec: Dict = target_spec("i", data_type)
     immutable: Track = Track.build(immutable_spec, source.immutable,
                                    "immutable")
     return Schema(temporal, immutable, name="target", source=source)
def test_list_in_folder(source, target):
    source_spec, source_doc = source
    target_spec, expected = target()
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == expected
def test_list_in_folder(source, target, index, create_document_value_provider):
    source_spec, source_doc = source
    target_spec, expected = target()
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track, create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", source_doc)
    assert actual == expected[index]
def test_duplicate_name_raises(source_doc, source_spec, target_spec):
    source_doc["list_source_1"]["Stacy"] = {
        "Name": "Another Stacy"
    }
    with pytest.raises(ValueError):
        source_track: Track = Track.build(source_spec, None, "Source")
        target_track: Track = Track.build(target_spec, source_track, "Target")
        translate: Translator = Translator(target_track)
        translate(source_doc)
Пример #16
0
    def load(cls,
             path: str,
             source_schema: "Schema" = None,
             path_locator: "PathLocator" = None,
             base_path: str = None) -> Optional["Schema"]:
        """
        Constructs a schema.

        :param path: The path to the schema that is to be loaded, relative to schemas base path.
        :param source_schema: An already-loaded schema from which this schema can be translated, if applicable.
        :param path_locator: Utility class that resolves file paths based on Polytropos' configuration base path.
        :param base_path: Directly supply the base path. Cannot be used with `path_locator`.
        :return:
        """
        assert (path_locator or base_path) and not (path_locator and base_path)
        schema_name: str = "UNSPECIFIED"
        if path is not None:
            schema_name = path.replace("/", "_")

        logging.info('Loading schema "%s".' % schema_name)
        # We return None if path is None to adapt to the case of a task not
        # having a target schema.
        if path is None:
            return None

        if source_schema:
            logging.debug('Schema "%s" has source schema "%s".' %
                          (schema_name, source_schema.name))
        else:
            logging.debug('Schema "%s" has no source schema.' % schema_name)

        source_immutable: Optional[
            Track] = source_schema.immutable if source_schema else None
        source_temporal: Optional[
            Track] = source_schema.temporal if source_schema else None

        temporal_path = _resolve_path(path_locator, base_path, path,
                                      'temporal.json')
        immutable_path = _resolve_path(path_locator, base_path, path,
                                       'immutable.json')

        logging.debug('Temporal path for schema "%s": %s' %
                      (schema_name, temporal_path))
        logging.debug('Immutable path for schema "%s": %s' %
                      (schema_name, temporal_path))

        with open(temporal_path, 'r') as temporal, open(immutable_path,
                                                        'r') as immutable:
            return cls(temporal=Track.build(specs=json.load(temporal),
                                            source=source_temporal,
                                            name='%s_temporal' % schema_name),
                       immutable=Track.build(specs=json.load(immutable),
                                             source=source_immutable,
                                             name='%s_immutable' %
                                             schema_name),
                       name=schema_name)
Пример #17
0
def test_rearrange(source_doc: Dict, source_spec: Dict, target_doc: Dict, target_spec: Dict):
    """Verify that translate respects the sort order property of the variables in the target spec, and ignores the
    order in which the variables happen to be defined in the spec. """
    shuffled_source_spec = shuffle(source_spec)
    shuffled_target_spec = shuffle(target_spec)
    source_track: Track = Track.build(shuffled_source_spec, None, "Source")
    target_track: Track = Track.build(shuffled_target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == target_doc
def test_keyed_list_in_list(source, target, index, create_document_value_provider):
    """Reversing the order of the sources in the target list spec results in an equivalent change in the order of the
    resulting list."""
    source_spec, source_doc = source
    target_spec, target_doc = target
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track, create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", source_doc)
    assert actual == target_doc[index]
Пример #19
0
def test_translate_all_children_missing(source: Callable, target: Callable):
    __, source_spec = source()
    __, target_spec = target()
    source_doc: Dict = {}
    expected: Dict = {}
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == expected
Пример #20
0
def test_named_list_in_list(source, target):
    """Reversing the order of the sources in the target list spec results in an equivalent change in the order of the
    resulting list."""
    source_spec, source_doc = source
    target_spec, target_doc = target
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == target_doc
Пример #21
0
def test_rearrange(source_doc: Dict, source_spec: Dict, target_docs: List[Dict], target_spec: Dict, index, create_document_value_provider):
    """Verify that translate respects the sort order property of the variables in the target spec, and ignores the
    order in which the variables happen to be defined in the spec. """
    shuffled_source_spec = shuffle(source_spec)
    shuffled_target_spec = shuffle(target_spec)
    source_track: Track = Track.build(shuffled_source_spec, None, "Source")
    target_track: Track = Track.build(shuffled_target_spec, source_track, "Target")
    translate: Translator = Translator(target_track, create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period", source_doc)
    assert actual == target_docs[index]
def test_remove_sources(source_doc: Dict, source_spec: Dict, target_spec: Dict):
    """Remove root sources at runtime, resulting in a cascade; no list is created."""

    expected: Dict = {}
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    target_track["target_root"].sources = []
    translate: Translator = Translator(target_track)
    actual: Dict = translate(source_doc)
    assert actual == expected
    def _make_schema(temporal: bool) -> Schema:
        if temporal:
            temporal: Track = Track.build(make_spec(), None, "temporal")
            immutable: Track = Track.build({}, None, "immutable")
            schema: Schema = Schema(temporal, immutable)
        else:
            immutable: Track = Track.build(make_spec(), None, "immutable")
            temporal: Track = Track.build({}, None, "temporal")
            schema: Schema = Schema(temporal, immutable)

        return schema
Пример #24
0
def schema() -> Schema:
    spec: Dict = {
        "the_var": {
            "name": "the_var",
            "data_type": "MultipleText",
            "sort_order": 0
        }
    }
    temporal: Track = Track.build({}, None, "temporal")
    immutable: Track = Track.build(spec, None, "immutable")
    return Schema(temporal, immutable, "Schema")
Пример #25
0
def schema() -> Schema:
    temporal_spec: Dict = {
        "some_field": {
            "data_type": "Text",
            "name": "key",
            "sort_order": 0
        }
    }
    temporal_track: Track = Track.build(temporal_spec, None, "temporal")
    immutable_track: Track = Track.build({}, None, "immutable")
    return Schema(temporal_track, immutable_track)
Пример #26
0
def simple_schema() -> Schema:
    temporal_spec: Dict = {
        "the_weight_var": {
            "name": "weight_in_pounds",
            "data_type": "Decimal",
            "sort_order": 0
        }
    }
    temporal_track: Track = Track.build(temporal_spec, None, "temporal")

    immutable_spec: Dict = {
        "the_person_name_var": {
            "name": "first_name",
            "data_type": "Text",
            "sort_order": 0
        },
        "the_gender_var": {
            "name": "gender",
            "data_type": "Text",
            "sort_order": 1
        },
        "the_weight_gain_var": {
            "name": "total_weight_gain",
            "data_type": "Decimal",
            "sort_order": 2
        },
        "the_sentence_var": {
            "name": "personal_summary",
            "data_type": "Text",
            "sort_order": 3
        },
        "color_folder": {
            "name": "color_info",
            "data_type": "Folder",
            "sort_order": 4
        },
        "the_color_var": {
            "name": "favorite_color",
            "data_type": "Text",
            "parent": "color_folder",
            "sort_order": 0
        },
        "the_rgb_var": {
            "name": "rgb_value",
            "data_type": "Text",
            "parent": "color_folder",
            "sort_order": 1
        }
    }

    immutable_track: Track = Track.build(immutable_spec, None, "immutable")
    return Schema(temporal_track, immutable_track)
def test_translate_all_children_missing(source: Callable, target: Callable,
                                        create_document_value_provider,
                                        expected):
    __, source_spec = source()
    __, target_spec = target()
    source_doc: Dict = {}
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track,
                                       create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period",
                                              source_doc)
    assert actual == expected
Пример #28
0
    def load(cls,
             path: str,
             schemas_dir: str,
             source_schema: "Schema" = None) -> Optional["Schema"]:
        """
        Constructs a schema.

        :param path: The path to the schema that is to be loaded, relative to schemas base path.
        :param schemas_dir: Directly supply the base schemas path.
        :param source_schema: An already-loaded schema from which this schema can be translated, if applicable.
        :return:
        """
        schema_name: str = "UNSPECIFIED"
        if path is not None:
            schema_name = path.replace("/", "_")

        logging.info('Loading schema "%s".' % schema_name)
        # We return None if path is None to adapt to the case of a task not
        # having a target schema.
        if path is None:
            return None

        if source_schema:
            logging.debug('Schema "%s" has source schema "%s".' %
                          (schema_name, source_schema.name))
        else:
            logging.debug('Schema "%s" has no source schema.' % schema_name)

        source_immutable: Optional[
            Track] = source_schema.immutable if source_schema else None
        source_temporal: Optional[
            Track] = source_schema.temporal if source_schema else None

        temporal_path = os.path.join(schemas_dir, path, 'temporal.json')
        immutable_path = os.path.join(schemas_dir, path, 'immutable.json')

        logging.debug('Temporal path for schema "%s": %s' %
                      (schema_name, temporal_path))
        logging.debug('Immutable path for schema "%s": %s' %
                      (schema_name, temporal_path))

        with open(temporal_path, 'r') as temporal, open(immutable_path,
                                                        'r') as immutable:
            return cls(temporal=Track.build(specs=json.load(temporal),
                                            source=source_temporal,
                                            name='%s_temporal' % schema_name),
                       immutable=Track.build(specs=json.load(immutable),
                                             source=source_immutable,
                                             name='%s_immutable' %
                                             schema_name),
                       name=schema_name)
def test_folder_null_skipped(source, target, create_document_value_provider,
                             expected):
    """On occasion, e-files contain <EmptyElements/> that would normally contain list items. These are converted to
    JSON as {"EmptyElement": null} and are not included as list items during translation."""
    source_spec, source_doc = source
    source_doc["second_source_folder"] = None
    target_spec, _ = target
    source_track: Track = Track.build(source_spec, None, "Source")
    target_track: Track = Track.build(target_spec, source_track, "Target")
    translate: Translator = Translator(target_track,
                                       create_document_value_provider)
    actual: OrderedDict[str, Any] = translate("composite_id", "period",
                                              source_doc)
    assert actual == expected
Пример #30
0
def test_get_conflict_raises(track_type):
    t_spec: Dict = {
        "A": {
            "name": "temporal variable",
            "data_type": "Text",
            "sort_order": 0
        }
    }
    t_track = Track.build(t_spec, None, "temporal")
    i_spec = copy.deepcopy(t_spec)
    i_track = Track.build(i_spec, None, "immutable")
    schema = Schema(t_track, i_track)
    with pytest.raises(ValueError):
        schema.get("A", track_type=track_type)