def _relation_indexes_for_collection(catalog_name, collection_name, collection, idx_prefix): model = GOBModel() sources = GOBSources() indexes = {} table_name = model.get_table_name(catalog_name, collection_name) reference_columns = { column: desc['ref'] for column, desc in collection['all_fields'].items() if desc['type'] in ['GOB.Reference', 'GOB.ManyReference'] } # Search source and destination attributes for relation and define index for col, ref in reference_columns.items(): dst_index_table = model.get_table_name_from_ref(ref) dst_collection = model.get_collection_from_ref(ref) dst_catalog_name, dst_collection_name = model.get_catalog_collection_names_from_ref( ref) dst_catalog = model.get_catalog(dst_catalog_name) relations = sources.get_field_relations(catalog_name, collection_name, col) for relation in relations: dst_idx_prefix = f"{dst_catalog['abbreviation']}_{dst_collection['abbreviation']}".lower( ) src_index_col = f"{relation['source_attribute'] if 'source_attribute' in relation else col}" # Source column name = _hashed_index_name( idx_prefix, _remove_leading_underscore(src_index_col)) indexes[name] = { "table_name": table_name, "columns": [src_index_col], } indexes[name]["type"] = _get_special_column_type( collection['all_fields'][src_index_col]['type']) # Destination column name = _hashed_index_name( dst_idx_prefix, _remove_leading_underscore(relation['destination_attribute'])) indexes[name] = { "table_name": dst_index_table, "columns": [relation['destination_attribute']], "type": _get_special_column_type(dst_collection['all_fields'][ relation['destination_attribute']]['type']), } return indexes
class TestSources(unittest.TestCase): def setUp(self): self.sources = GOBSources() def test_get_relations(self): # Assert we get a list of relations for a collection self.assertIsInstance(self.sources.get_relations('nap', 'peilmerken'), list) def test_get_field_relations_keyerror(self): self.sources.get_relations = MagicMock(side_effect=KeyError) self.assertEqual([], self.sources.get_field_relations( 'catalog', 'collection', 'fieldname'))
class TestSources(unittest.TestCase): def setUp(self): self.sources = GOBSources() def test_get_relations(self): # Assert we get a list of relations for a collection self.assertIsInstance(self.sources.get_relations('nap', 'peilmerken'), list)
def _check_message(msg: dict): required = [CATALOG_KEY, COLLECTION_KEY, ATTRIBUTE_KEY] header = msg.get('header', {}) for key in required: if not header.get(key): raise GOBException(f"Missing {key} attribute in header") model = GOBModel() sources = GOBSources() if not model.get_catalog(header[CATALOG_KEY]): raise GOBException(f"Invalid catalog name {header[CATALOG_KEY]}") if not model.get_collection(header[CATALOG_KEY], header[COLLECTION_KEY]): raise GOBException(f"Invalid catalog/collection combination: {header[CATALOG_KEY]}/{header[COLLECTION_KEY]}") if not sources.get_field_relations(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]): raise GOBException(f"Missing relation specification for {header[CATALOG_KEY]} {header[COLLECTION_KEY]} " f"{header[ATTRIBUTE_KEY]}")
def check_relations(src_catalog_name, src_collection_name, src_field_name): """ Check relations for any dangling relations Dangling can be because a relation exist without any bronwaarde or the bronwaarde cannot be matched with any referenced entity :param src_catalog_name: :param src_collection_name: :param src_field_name: :return: None """ name = f"{src_collection_name} {src_field_name}" # Only include sources where not none_allowed sources = GOBSources().get_field_relations(src_catalog_name, src_collection_name, src_field_name) check_sources = [ source['source'] for source in sources if not source.get('none_allowed', False) ] if not check_sources: logger.info( f"All sources for {src_catalog_name} {src_collection_name} {src_field_name} allow empty " f"relations. Skipping check.") return # Only filter on sources when necessary (i.e. when there are multiple sources with different values for # none_allowed) check_sources = check_sources if len(sources) != len( check_sources) else None missing_query = _get_relation_check_query("missing", src_catalog_name, src_collection_name, src_field_name, check_sources) _query_missing(missing_query, QA_CHECK.Sourcevalue_exists, name) dangling_query = _get_relation_check_query("dangling", src_catalog_name, src_collection_name, src_field_name, check_sources) _query_missing(dangling_query, QA_CHECK.Reference_exists, name)
import json import os import sys # To have access to the gobapi module, while still being able to run python amschema.py sys.path.append(os.path.join('..')) from gobapi.auth.auth_query import Authority # noqa: E402, module level import not at top of file # Suppress any output from GOBModel class (otherwise GOB Model messages can appear in the schema output) sys.stdout = open(os.devnull, 'w') from gobcore.model import GOBModel # noqa: E402, module level import not at top of file from gobcore.model.metadata import FIELD # noqa: E402, module level import not at top of file from gobcore.sources import GOBSources # noqa: E402, module level import not at top of file model = GOBModel() sources = GOBSources() sys.stdout = sys.__stdout__ def get_schema(catalog_name, collection_name=None): """ Get a Amsterdam Schema for the given catalog If a collection is specified only the schema for the given collection is returned :param catalog_name: :param collection_name: :return: """ if collection_name: schema = _get_collection_schema(catalog_name, collection_name)
def setUp(self): self.sources = GOBSources()
def _split_job(msg: dict): header = msg.get('header', {}) catalog_name = header.get('catalogue') collection_name = header.get('collection') attribute_name = header.get('attribute') assert catalog_name is not None, "A catalog name is required" model = GOBModel() catalog = model.get_catalog(catalog_name) assert catalog is not None, f"Invalid catalog name '{catalog_name}'" if collection_name is None: collection_names = model.get_collection_names(catalog_name) else: collection_names = [collection_name] assert collection_names, f"No collections specified or found for catalog {catalog_name}" with MessageBrokerConnection(CONNECTION_PARAMS) as connection: for collection_name in collection_names: collection = model.get_collection(catalog_name, collection_name) assert collection is not None, f"Invalid collection name '{collection_name}'" logger.info(f"** Split {collection_name}") attributes = model._extract_references(collection['attributes']) \ if attribute_name is None \ else [attribute_name] for attr_name in attributes: sources = GOBSources() relation_specs = sources.get_field_relations(catalog_name, collection_name, attr_name) if not relation_specs: logger.info(f"Missing relation specification for {catalog_name} {collection_name} " f"{attr_name}. Skipping") continue if relation_specs[0]['type'] == fully_qualified_type_name(VeryManyReference): logger.info(f"Skipping VeryManyReference {catalog_name} {collection_name} {attr_name}") continue logger.info(f"Splitting job for {catalog_name} {collection_name} {attr_name}") original_header = msg.get('header', {}) split_msg = { **msg, "header": { **original_header, "catalogue": catalog_name, "collection": collection_name, "attribute": attr_name, "split_from": original_header.get('jobid'), }, "workflow": { "workflow_name": "relate", } } del split_msg['header']['jobid'] del split_msg['header']['stepid'] connection.publish(WORKFLOW_EXCHANGE, WORKFLOW_REQUEST_KEY, split_msg)