Exemplo n.º 1
0
    async def test_model_json_data_partition_location_consistency(self):
        '''
            Testing whether DataPartition Location is consistently populated when:
             1. Manifest is read directly.
             2. Manifest is obtained by converting a model.json.
        '''
        test_name = 'test_model_json_data_partition_location_consistency'
        test_name_in_pascal_case = StringUtils.snake_case_to_pascal_case(
            test_name)
        cdm_corpus = TestHelper.get_local_corpus(self.tests_subpath, test_name)
        manifest_read = await cdm_corpus.fetch_object_async(
            'default.manifest.cdm.json',
            cdm_corpus.storage.fetch_root_folder('local'))
        self.assertEqual('EpisodeOfCare/partition-data.csv',
                         manifest_read.entities[0].data_partitions[0].location)

        converted_to_model_json = await ManifestPersistence.to_data(
            manifest_read, None, None)
        location = converted_to_model_json.entities[0]['partitions'][0][
            'location']  # type: str
        location_path = os.path.join('TestData', 'Persistence', 'ModelJson',
                                     'DataPartition', test_name_in_pascal_case,
                                     'Input', 'EpisodeOfCare',
                                     'partition-data.csv')
        # Model Json uses absolute adapter path.
        self.assertTrue(location.find(location_path) != -1)

        cdm_corpus2 = TestHelper.get_local_corpus(self.tests_subpath,
                                                  test_name)
        manifest_after_convertion = await ManifestPersistence.from_object(
            cdm_corpus2.ctx, converted_to_model_json,
            cdm_corpus2.storage.fetch_root_folder('local'))
        self.assertEqual(
            'EpisodeOfCare/partition-data.csv',
            manifest_after_convertion.entities[0].data_partitions[0].location)

        # TODO: Need to change path in C#
        cdm_corpus3 = TestHelper.get_local_corpus(self.tests_subpath,
                                                  test_name)
        read_file = TestHelper.get_input_file_content(self.tests_subpath,
                                                      test_name, 'model.json')
        namespace_folder = cdm_corpus3.storage.fetch_root_folder('local')
        location_path3 = 'C:\\\\cdm\\\\testData\\\\Persistence\\\\ModelJson\\\\DataPartition\\\\TestModelJsonDataPartitionLocationConsistency\\\\Input\\\\EpisodeOfCare\\\\partition-data.csv'
        model_json_as_string = read_file.replace('\uFEFF', '').replace(
            location_path3, location.replace('\\', '\\\\'))

        manifest_read_from_model_json = await ManifestPersistence.from_object(
            cdm_corpus3.ctx,
            Model().decode(model_json_as_string), namespace_folder)
        self.assertEqual(
            'EpisodeOfCare/partition-data.csv', manifest_read_from_model_json.
            entities[0].data_partitions[0].location)
Exemplo n.º 2
0
    async def test_model_json_data_partition_location_consistency(self):
        test_name = 'TestModelJsonDataPartitionLocationConsistency'
        cdm_corpus = TestHelper.get_local_corpus(self.tests_subpath, test_name)
        manifest_read = await cdm_corpus.fetch_object_async(
            'default.manifest.cdm.json',
            cdm_corpus.storage.fetch_root_folder('local'))
        self.assertEqual('EpisodeOfCare/partition-data.csv',
                         manifest_read.entities[0].data_partitions[0].location)

        converted_to_model_json = await ManifestPersistence.to_data(
            manifest_read, None, None)
        location = converted_to_model_json.entities[0]['partitions'][0][
            'location']  # type: str

        # Model Json uses absolute adapter path.
        self.assertTrue(
            location.find(
                'testdata\\persistence\\modeljson\\data_partition\\TestModelJsonDataPartitionLocationConsistency\\input\\EpisodeOfCare\\partition-data.csv'
            ) != -1)

        cdm_corpus2 = TestHelper.get_local_corpus(self.tests_subpath,
                                                  test_name)
        manifest_after_convertion = await ManifestPersistence.from_data(
            cdm_corpus2.ctx, converted_to_model_json,
            cdm_corpus2.storage.fetch_root_folder('local'))
        self.assertEqual(
            'EpisodeOfCare/partition-data.csv',
            manifest_after_convertion.entities[0].data_partitions[0].location)

        cdm_corpus3 = TestHelper.get_local_corpus(self.tests_subpath,
                                                  test_name)
        read_file = TestHelper.get_input_file_content(self.tests_subpath,
                                                      test_name, 'model.json')
        namespace_folder = cdm_corpus3.storage.fetch_root_folder('local')
        model_json_as_string = read_file.replace(
            'C:\\\\cdm\\\\CDM.ObjectModel.CSharp\\\\Microsoft.CommonDataModel\\\\Microsoft.CommonDataModel.ObjectModel.Tests\\\\TestData\\\\Persistence\\\\ModelJson\\\\DataPartition\\\\TestModelJsonDataPartitionLocationConsistency\\\\Input\\\\EpisodeOfCare\\\\partition-data.csv',
            location.replace('\\', '\\\\'))

        manifest_read_from_model_json = await ManifestPersistence.from_data(
            cdm_corpus3.ctx,
            Model().decode(model_json_as_string), namespace_folder)
        self.assertEqual(
            'EpisodeOfCare/partition-data.csv', manifest_read_from_model_json.
            entities[0].data_partitions[0].location)
Exemplo n.º 3
0
    async def _load_document_from_path_async(self, folder: 'CdmFolderDefinition', doc_name: str,
                                             doc_container: 'CdmDocumentDefinition',
                                             res_opt: Optional[ResolveOptions] = None) \
            -> 'CdmDocumentDefinition':
        #  go get the doc
        doc_content = None  # type: Optional[CdmDocumentDefinition]
        json_data = None
        fs_modified_time = None
        doc_path = folder.folder_path + doc_name
        adapter = self._ctx.corpus.storage.fetch_adapter(
            folder.namespace)  # type: StorageAdapter

        try:
            if adapter.can_read():
                # log message used by navigator, do not change or remove
                logger.debug(self._ctx, self._TAG,
                             self._load_document_from_path_async.__name__,
                             doc_path, 'request file: {}'.format(doc_path))
                json_data = await adapter.read_async(doc_path)
                # log message used by navigator, do not change or remove
                logger.debug(self._ctx, self._TAG,
                             self._load_document_from_path_async.__name__,
                             doc_path, 'received file: {}'.format(doc_path))
            else:
                raise Exception('Storage Adapter is not enabled to read.')
        except Exception as e:
            # log message used by navigator, do not change or remove
            logger.debug(self._ctx, self._TAG,
                         self._load_document_from_path_async.__name__,
                         doc_path, 'fail file: {}'.format(doc_path))

            # when shallow validation is enabled, log messages about being unable to find referenced documents as warnings instead of errors.
            if res_opt and res_opt.shallow_validation:
                logger.warning(
                    self._ctx, self._TAG,
                    PersistenceLayer._load_document_from_path_async.__name__,
                    doc_path, CdmLogCode.WARN_PERSIST_FILE_READ_FAILURE,
                    doc_path, folder.Namespace, e)
            else:
                logger.error(self._ctx, self._TAG,
                             self._load_document_from_path_async.__name__,
                             doc_path,
                             CdmLogCode.ERR_PERSIST_FILE_READ_FAILURE,
                             doc_path, str(folder.namespace), e)
            return None

        try:
            fs_modified_time = await adapter.compute_last_modified_time_async(
                doc_path)
        except Exception as e:
            logger.warning(
                self._ctx, self._TAG,
                PersistenceLayer._load_document_from_path_async.__name__,
                doc_path, CdmLogCode.WARN_PERSIST_FILE_MOD_COMPUTE_FAILED,
                e.Message)

        if not doc_name:
            logger.error(self._ctx, self._TAG,
                         self._load_document_from_path_async.__name__,
                         doc_path, CdmLogCode.ERR_PERSIST_NULL_DOC_NAME)
            return None

        doc_name_lower = doc_name.lower()

        # If loading an model.json file, check that it is named correctly.
        if doc_name_lower.endswith(
                self.MODEL_JSON_EXTENSION
        ) and not doc_name.lower() == self.MODEL_JSON_EXTENSION:
            logger.error(self._ctx, self._TAG,
                         self._load_document_from_path_async.__name__,
                         doc_path,
                         CdmLogCode.ERR_PERSIST_DOC_NAME_LOAD_FAILURE,
                         doc_name, self.MODEL_JSON_EXTENSION)
            return None

        try:
            if doc_name_lower.endswith(PersistenceLayer.MANIFEST_EXTENSION
                                       ) or doc_name_lower.endswith(
                                           PersistenceLayer.FOLIO_EXTENSION):
                from cdm.persistence.cdmfolder import ManifestPersistence
                from cdm.persistence.cdmfolder.types import ManifestContent
                manifest = ManifestContent()
                manifest.decode(json_data)
                doc_content = ManifestPersistence.from_object(
                    self._ctx, doc_name, folder.namespace, folder.folder_path,
                    manifest)
            elif doc_name_lower.endswith(
                    PersistenceLayer.MODEL_JSON_EXTENSION):
                if doc_name_lower != PersistenceLayer.MODEL_JSON_EXTENSION:
                    logger.error(self._ctx, self._TAG,
                                 self._load_document_from_path_async.__name__,
                                 doc_path,
                                 CdmLogCode.ERR_PERSIST_DOC_NAME_LOAD_FAILURE,
                                 doc_name, self.MODEL_JSON_EXTENSION)
                    return None
                from cdm.persistence.modeljson import ManifestPersistence
                from cdm.persistence.modeljson.types import Model
                model = Model()
                model.decode(json_data)
                doc_content = await ManifestPersistence.from_object(
                    self._ctx, model, folder)
            elif doc_name_lower.endswith(PersistenceLayer.CDM_EXTENSION):
                from cdm.persistence.cdmfolder import DocumentPersistence
                from cdm.persistence.cdmfolder.types import DocumentContent
                document = DocumentContent()
                document.decode(json_data)
                doc_content = DocumentPersistence.from_object(
                    self._ctx, doc_name, folder.namespace, folder.folder_path,
                    document)
            else:
                # Could not find a registered persistence class to handle this document type.
                logger.error(self._ctx, self._TAG,
                             self._load_document_from_path_async.__name__,
                             doc_path, CdmLogCode.ERR_PERSIST_CLASS_MISSING,
                             doc_name)
                return None
        except Exception as e:
            logger.error(self._ctx, self._TAG,
                         self._load_document_from_path_async.__name__,
                         doc_path,
                         CdmLogCode.ERR_PERSIST_DOC_CONVERSION_FAILURE,
                         doc_path, e)
            return None

        # add document to the folder, this sets all the folder/path things, caches name to content association and may trigger indexing on content
        if doc_content is not None:
            if doc_container:
                # there are situations where a previously loaded document must be re-loaded.
                # the end of that chain of work is here where the old version of the document has been removed from
                # the corpus and we have created a new document and loaded it from storage and after this call we will probably
                # add it to the corpus and index it, etc.
                # it would be really rude to just kill that old object and replace it with this replicant, especially because
                # the caller has no idea this happened. so... sigh ... instead of returning the new object return the one that
                # was just killed off but make it contain everything the new document loaded.
                doc_content = doc_content.copy(
                    ResolveOptions(wrt_doc=doc_container,
                                   directives=self._ctx.corpus.
                                   default_resolution_directives),
                    doc_container)

            folder.documents.append(doc_content, doc_name)
            doc_content._file_system_modified_time = fs_modified_time
            doc_content._is_dirty = False

        return doc_content
Exemplo n.º 4
0
attrib = Attribute()
attrib.dataType = 'Attribute'
attrib.name = 'accountId'
attrib.traits = [{'is.primaryId': 'true'}]
attrib.extension_fields = 'extfieldsvalue'

entity = LocalEntity()
entity.type = 'LocalEntity'
entity.name = 'Account'
entity.traits = [{'is.some.trait': 'foo'}]
entity.annotations = [{'some:annotation': 'bar'}]
entity.attributes = [attrib]
entity.extension_fields = 'extfieldsvalue'

model = Model()
model.name = 'SomeModel'
model.description = 'SomeDescription'
model.version = '1.0'
model.entities = [entity]

modelJson = model.encode()

modelCheck = Model()
modelCheck.decode(modelJson)
modelCheckJson = modelCheck.encode()

assert modelJson == modelCheckJson
assert not modelCheck.entities[0].get('extension_fields')  # there should be no extension_fields in deserialized object
assert not modelCheck.entities[0].attributes[0].get('extension_fields')  # there should be no extension_fields in deserialized object
Exemplo n.º 5
0
    async def _fetch_document_from_folder_path_async(
            self, document_path: str, adapter: 'StorageAdapterBase',
            force_reload: bool) -> 'CdmDocumentDefinition':
        """Gets the document from folder path.

        arguments:
        path: The path.
        adapter: The storage adapter where the document can be found."""
        from .cdm_corpus_def import FOLIO_EXTENSION, MANIFEST_EXTENSION, MODEL_JSON_EXTENSION

        doc_name = None
        first = document_path.find('/')

        if first < 0:
            doc_name = document_path
        else:
            doc_name = document_path[0:first]

        if doc_name in self._document_lookup and not force_reload:
            return self._document_lookup[doc_name]

        is_cdm_folder = doc_name.endswith(
            FOLIO_EXTENSION) or doc_name.endswith(MANIFEST_EXTENSION)
        is_model_json = doc_name.endswith(MODEL_JSON_EXTENSION)

        # got that doc?
        doc = None

        #  go get the doc
        doc_path = '{}{}'.format(self.folder_path, doc_name)
        json_data = None
        fs_modified_time = None

        try:
            if adapter.can_read():
                json_data = await adapter.read_async(doc_path)
                fs_modified_time = await adapter.compute_last_modified_time_async(
                    adapter.create_adapter_path(doc_path))
        except Exception as e:
            self.corpus.ctx.logger.exception(
                'Could not read %s from the \'%s\' namespace.', doc_path,
                self.namespace)
            return None

        if is_cdm_folder:
            from cdm.persistence.cdmfolder import ManifestPersistence
            from cdm.persistence.cdmfolder.types import ManifestContent
            manifest = ManifestContent()
            manifest.decode(json_data)
            doc = await ManifestPersistence.from_data(self.ctx, doc_name,
                                                      self.namespace,
                                                      self.folder_path,
                                                      manifest)
            doc.folder = self
            self.documents.append(doc)
            self.corpus._add_document_objects(self, doc)
            self._document_lookup[doc_name] = doc
        elif is_model_json:
            from cdm.persistence.modeljson import ManifestPersistence
            from cdm.persistence.modeljson.types import Model
            model = Model()
            model.decode(json_data)
            doc = await ManifestPersistence.from_data(self.ctx, model, self)
            doc.folder = self
            doc.folder_path = self.folder_path
        else:
            from cdm.persistence.cdmfolder import DocumentPersistence
            from cdm.persistence.cdmfolder.types import DocumentContent
            document = DocumentContent()
            document.decode(json_data)
            doc = self.documents.append(await DocumentPersistence.from_data(
                self.ctx, doc_name, self.namespace, self.folder_path,
                document))

        doc._file_system_modified_time = fs_modified_time
        doc._is_dirty = False

        return doc
Exemplo n.º 6
0
async def load_document_from_path_async(
        folder: 'CdmFolderDefinition', doc_name: str,
        doc_container: 'CdmDocumentDefinition') -> 'CdmDocumentDefinition':
    is_cdm_folder = doc_name.lower().endswith(
        CdmCorpusDefinition._FOLIO_EXTENSION) or doc_name.lower().endswith(
            CdmCorpusDefinition._MANIFEST_EXTENSION)
    is_model_json = doc_name.lower().endswith(
        CdmCorpusDefinition._MODEL_JSON_EXTENSION)

    #  go get the doc
    doc_content = None  # type: Optional[CdmDocumentDefinition]
    json_data = None
    fs_modified_time = None
    ctx = folder.ctx
    doc_path = folder.folder_path + doc_name
    adapter = ctx.corpus.storage.fetch_adapter(
        folder.namespace)  # type: StorageAdapter

    try:
        if adapter.can_read():
            json_data = await adapter.read_async(doc_path)
            fs_modified_time = await adapter.compute_last_modified_time_async(
                adapter.create_adapter_path(doc_path))
            ctx.logger.info('read file: {}'.format(doc_path))
    except Exception as e:
        ctx.logger.error(
            'could not read %s from the \'%s\' namespace.\n Reason: \n%s',
            doc_path, folder.namespace, e)
        return None

    try:
        if is_cdm_folder:
            from cdm.persistence.cdmfolder import ManifestPersistence
            from cdm.persistence.cdmfolder.types import ManifestContent
            manifest = ManifestContent()
            manifest.decode(json_data)
            doc_content = await ManifestPersistence.from_data(
                ctx, doc_name, folder.namespace, folder.folder_path, manifest)
        elif is_model_json:
            if doc_name.lower() != CdmCorpusDefinition._MODEL_JSON_EXTENSION:
                ctx.logger.error(
                    'Failed to load \'{}\', as it\'s not an acceptable filename. It must be model.json'
                    .format(doc_name))
                return None
            from cdm.persistence.modeljson import ManifestPersistence
            from cdm.persistence.modeljson.types import Model
            model = Model()
            model.decode(json_data)
            doc_content = await ManifestPersistence.from_data(
                ctx, model, folder)
        else:
            from cdm.persistence.cdmfolder import DocumentPersistence
            from cdm.persistence.cdmfolder.types import DocumentContent
            document = DocumentContent()
            document.decode(json_data)
            doc_content = await DocumentPersistence.from_data(
                ctx, doc_name, folder.namespace, folder.folder_path, document)
    except Exception as e:
        ctx.logger.error('Could not convert \'{}\'. Reason \'{}\''.format(
            doc_path, e))
        return None

    # add document to the folder, this sets all the folder/path things, caches name to content association and may trigger indexing on content
    if doc_content is not None:
        if doc_container:
            # there are situations where a previously loaded document must be re-loaded.
            # the end of that chain of work is here where the old version of the document has been removed from
            # the corpus and we have created a new document and loaded it from storage and after this call we will probably
            # add it to the corpus and index it, etc.
            # it would be really rude to just kill that old object and replace it with this replicant, especially because
            # the caller has no idea this happened. so... sigh ... instead of returning the new object return the one that
            # was just killed off but make it contain everything the new document loaded.
            doc_content = doc_content.copy(
                ResolveOptions(wrt_doc=doc_container), doc_container)

        folder.documents.append(doc_content, doc_name)
        doc_content._file_system_modified_time = fs_modified_time
        doc_content._is_dirty = False

    return doc_content