def test_datastore_config(self): self.assertEqual({ 'some': 'config', 'name': 'DatastoreA', }, get_datastore_config('DatastoreA')) with self.assertRaises(GOBConfigException): get_datastore_config('NonExistent')
def _init_config(self): """ Initialize dumper configuration If localhost is used then use the public GOB url for all other case use the GOB secure url. Read the destination database properties from the environment :return: """ api_host = get_host() api_url = PUBLIC_URL if any(host in api_host for host in ["localhost", "gobapi"]) else SECURE_URL self.dump_api = f"{api_host}{api_url}" self.db_config = get_datastore_config(ANALYSE_DB_DATASTORE_ID)
def _get_datastore(destination_name: str): """Returns Datastore and base_directory for Datastore. Returned Datastore has an initialised connection for destination_name :param destination_name: :return: """ datastore_config = get_datastore_config(destination_name) datastore = DatastoreFactory.get_datastore(datastore_config) datastore.connect() # Prepend main directory to file, except for ObjectDatastore, as this will use a container by default base_directory = f"{CONTAINER_BASE}/" if not isinstance( datastore, ObjectDatastore) else "" return datastore, base_directory
def connect(self): # noqa: C901 """The first step of every import is a technical step. A connection need to be setup to connect to a database, filesystem, API, ... :return: """ # Get manually added config, or config based on application name datastore_config = self.source.get( 'application_config') or get_datastore_config( self.source['application']) read_config = {**self.source.get('read_config', {}), 'mode': self.mode} self.datastore = DatastoreFactory.get_datastore( datastore_config, read_config) self.datastore.connect() logger.info( f"Connection to {self.app} {self.datastore.user} has been made.")
def __init__(self, config, row_formatter=None): """Constructor Lazy loading, Just register objectstore connection and reader and wait for the iterator to be called to load the data :param config: """ self.config = config self.objectstore_config = get_datastore_config( self.config['objectstore']) self.datastore = DatastoreFactory.get_datastore( self.objectstore_config, self.config) assert isinstance(self.datastore, ObjectDatastore) self.datastore.connect() self.row_formatter = row_formatter
def _get_cbs_features(path: str) -> dict[str, dict[str, str]]: """ Gets the CBS codes from the Objectstore and returns a list of dicts with the naam, code (wijk or buurt). :param path: the path to source file :return: a list of dicts with CBS Code and CBS naam, mapped on the local code. """ datastore = ObjectDatastore( connection_config=get_datastore_config("Basisinformatie"), read_config={ "file_filter": path, "file_type": "XLS" }) datastore.connect() result = list(datastore.query('')) datastore.disconnect() if not result: raise GOBException(f"No CBS features found for path '{path}'") return {row[0]: {"code": row[1], "naam": row[2]} for row in result}
def test(catalogue): """ Test export files for a given catalogue :param catalogue: catalogue to test :return: None """ logger.info(f"Test export for catalogue {catalogue}") logger.info("Connect to Objectstore") config = get_datastore_config(GOB_OBJECTSTORE) datastore = DatastoreFactory.get_datastore(config) datastore.connect() container_name = CONTAINER_BASE logger.info(f"Load files from {container_name}") conn_info = { "connection": datastore.connection, "container": container_name } # Get test definitions for the given catalogue checks = _get_checks(conn_info, catalogue) # Make proposals for any missing test definitions proposals = {} for config in _export_config[catalogue]: resolve_config_filenames(config) for name, product in config.products.items(): filenames = [product['filename']] + [ product['filename'] for product in product.get('extra_files', []) ] for filename in filenames: # Check the previously exported file at its temporary location obj_info, obj = _get_file( conn_info, f"{EXPORT_DIR}/{catalogue}/{filename}") # Clone check so that changes to the check file don't affect other runs check = copy.deepcopy(_get_check(checks, filename)) # Report results with the name of the matched file matched_filename = obj_info['name'] if obj_info else filename if obj_info is None: logger.error(f"File {filename} MISSING") elif check: stats = _get_analysis(obj_info, obj, check) if _check_file(check, matched_filename, stats): logger.info(f"Check {matched_filename} OK") # Copy the file to its final location distribute_file(conn_info, matched_filename) else: logger.info(f"Check {matched_filename} FAILED") _propose_check_file(proposals, filename, obj_info, obj) else: logger.warning(f"File {filename} UNCHECKED") # Do not copy unchecked files _propose_check_file(proposals, filename, obj_info, obj) # Write out any missing test definitions _write_proposals(conn_info, catalogue, checks, proposals)
def __init__(self): config = get_datastore_config(GOB_OBJECTSTORE) datastore = DatastoreFactory.get_datastore(config) datastore.connect() self.connection = datastore.connection
def _export_collection(host, catalogue, collection, product_name, destination): # noqa: C901 """Export a collection from a catalog :param host: The API host to retrieve the catalog and collection from :param catalog: The name of the catalog :param collection: The name of the collection :param product_name: The name of the product to export :param destination: The destination of the resulting output file(s) :return: """ logger.info(f"Export {catalogue}:{collection} to {destination} started.") # Get the configuration for this collection config = CONFIG_MAPPING[catalogue][collection] resolve_config_filenames(config) files = [] # If a product has been supplied, export only that product try: products = { product_name: config.products[product_name] } if product_name else config.products except KeyError: logger.error(f"Product '{product_name}' not found") return # Start exporting each product for name, product in products.items(): logger.info( f"Export to file '{name}' started, API type: {product.get('api_type', 'REST')}" ) # Get name of local file to write results to results_file = _get_filename( product['filename'] ) if destination == "Objectstore" else product['filename'] if product.get('append', False): # Add .to_append to avoid writing to the previously created file results_file = _get_filename(f"{product['filename']}.to_append") product['append_to_filename'] = _get_filename(product['filename']) \ if destination == "Objectstore" \ else product['filename'] # Buffer items if they are used multiple times. This prevents calling API multiple times for same data source = product_source(product) buffer_items = len( list( filter(lambda p: product_source(p) == source, config.products.values()))) > 1 logger.info( f"Buffering API output {'enabled' if buffer_items else 'disabled'}" ) try: row_count = _with_retries( lambda: export_to_file(host, product, results_file, catalogue, product.get('collection', collection), buffer_items=buffer_items)) except Exception as e: logger.error(f"Export to local file {name} failed: {str(e)}.") else: logger.info(f"{row_count} records exported to local file {name}.") if product.get('append', False): # Append temporary file to existing file and cleanup temp file _append_to_file(results_file, product['append_to_filename']) os.remove(results_file) else: # Do not add file to files again when appending files.append({ 'temp_location': results_file, 'distribution': product['filename'], 'mime_type': product['mime_type'] }) # Add extra result files (e.g. .prj file) extra_files = product.get('extra_files', []) files.extend([{ 'temp_location': _get_filename(file['filename']), 'distribution': file['filename'], 'mime_type': file['mime_type'] } for file in extra_files]) if destination == "Objectstore": # Get objectstore connection config = get_datastore_config(GOB_OBJECTSTORE) datastore = DatastoreFactory.get_datastore(config) datastore.connect() assert isinstance(datastore, ObjectDatastore) connection = datastore.connection logger.info( f"Connection to {destination} {datastore.user} has been made.") # Start distribution of all resulting files for file in files: logger.info(f"Write file '{file['distribution']}'.") if destination == "Objectstore": # Distribute to pre-final location container = f'{CONTAINER_BASE}/{EXPORT_DIR}/{catalogue}/' with open(file['temp_location'], 'rb') as fp: try: distribute_to_objectstore(connection, container, file['distribution'], fp, file['mime_type']) except GOBException as e: logger.error( f"Failed to copy to {destination} on location: {container}{file['distribution']}. \ Error: {e}") return False logger.info( f"File copied to {destination} on location: {container}{file['distribution']}." ) cleanup_datefiles( connection, CONTAINER_BASE, f"{EXPORT_DIR}/{catalogue}/{file['distribution']}") # Delete temp file os.remove(file['temp_location']) elif destination == "File": logger.info(f"Export is written to {file['distribution']}.") logger.info("Export completed")