def export_from_cloud_sql_to_gcs_csv( schema_type: SchemaType, table_name: str, gcs_uri: GcsfsFilePath, columns: List[str], ) -> None: cloud_sql_client = CloudSQLClientImpl() instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id( schema_type) if instance_name is None: raise ValueError("Could not find instance name.") operation_id = cloud_sql_client.export_to_gcs_csv( instance_name=instance_name, table_name=table_name, gcs_uri=gcs_uri, columns=columns, ) if operation_id is None: raise RuntimeError( "Cloud SQL export operation was not started successfully.") operation_succeeded = cloud_sql_client.wait_until_operation_completed( operation_id) if not operation_succeeded: raise RuntimeError("Cloud SQL export failed.")
def confirm_correct_db(database: SchemaType) -> None: dbname = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id( database) if dbname is None: logging.error("Could not find database instance.") logging.error("Exiting...") sys.exit(1) prompt_for_confirmation(f"Running migrations on {dbname}.", dbname)
def testGetStrippedCloudSqlInstanceId( self, mock_secrets: mock.MagicMock) -> None: # Arrange mock_secrets.side_effect = [ "project:region:111", ] # Act instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id( schema_type=SchemaType.OPERATIONS) # Assert self.assertEqual(instance_id, "111") mock_secrets.assert_called_with("operations_cloudsql_instance_id")
def export_table(table_name: str, cloud_sql_to_bq_config: CloudSqlToBQConfig) -> bool: """Export a Cloud SQL table to a CSV file on GCS. Given a table name and export_query, retrieve the export URI from cloud_sql_to_bq_config, then execute the export operation and wait until it completes. Args: table_name: Table to export. cloud_sql_to_bq_config: The export config class for the table's SchemaType. Returns: True if operation succeeded without errors, False if not. """ schema_type = cloud_sql_to_bq_config.schema_type export_query = cloud_sql_to_bq_config.get_table_export_query(table_name) export_uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name) export_context = create_export_context(schema_type, export_uri, export_query) project_id = metadata.project_id() instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(schema_type) export_request = ( sqladmin_client() .instances() .export(project=project_id, instance=instance_id, body=export_context) ) logging.info("GCS URI [%s] in project [%s]", export_uri, project_id) logging.info("Starting export: [%s]", str(export_request.to_json())) try: response = export_request.execute() except googleapiclient.errors.HttpError: logging.exception("Failed to export table [%s]", table_name) return False # We need to block until the operation is done because # the Cloud SQL API only supports one operation at a time. operation_id = response["name"] logging.info( "Waiting for export operation [%s] to complete for table [%s] " "in database [%s] in project [%s]", operation_id, table_name, instance_id, project_id, ) operation_success = wait_until_operation_finished(operation_id) return operation_success
def _import_csv_to_temp_table( database_key: SQLAlchemyDatabaseKey, schema_type: SchemaType, destination_table: str, tmp_table_name: str, gcs_uri: GcsfsFilePath, columns: List[str], seconds_to_wait: int = 60, ) -> None: """Imports a GCS CSV file to a temp table that is created with the destination table as a template.""" with SessionFactory.using_database(database_key=database_key) as session: # Drop old temp table if exists, Create new temp table session.execute(f"DROP TABLE IF EXISTS {tmp_table_name}") session.execute( f"CREATE TABLE {tmp_table_name} AS TABLE {destination_table} WITH NO DATA" ) # Import CSV to temp table logging.info("Starting import from GCS URI: %s", gcs_uri) logging.info("Starting import to tmp destination table: %s", tmp_table_name) logging.info("Starting import using columns: %s", columns) cloud_sql_client = CloudSQLClientImpl() instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id( schema_type=schema_type) if instance_name is None: raise ValueError("Could not find instance name.") # Create temp table with CSV file operation_id = cloud_sql_client.import_gcs_csv( instance_name=instance_name, table_name=tmp_table_name, gcs_uri=gcs_uri, columns=columns, ) if operation_id is None: raise RuntimeError( "Cloud SQL import operation was not started successfully.") operation_succeeded = cloud_sql_client.wait_until_operation_completed( operation_id=operation_id, seconds_to_wait=seconds_to_wait, ) if not operation_succeeded: raise RuntimeError(f"Cloud SQL import to {tmp_table_name} failed.")
def import_gcs_sql( self, database_key: SQLAlchemyDatabaseKey, gcs_uri: GcsfsFilePath, ) -> Optional[str]: logging.debug("Starting Cloud SQL import operation.") instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id( database_key.schema_type) req = self.service.instances().import_( project=self.project_id, instance=instance_name, body={ "importContext": { "database": database_key.db_name, "fileType": "SQL", "uri": f"gs://{gcs_uri.abs_path()}", }, }, ) resp = req.execute() return resp.get("name")
def import_gcs_csv_to_cloud_sql(destination_table: str, gcs_uri: GcsfsFilePath, columns: List[str]) -> None: """Implements the import of GCS CSV to Cloud SQL by creating a temporary table, uploading the results to the temporary table, and then swapping the contents of the table.""" engine = SQLAlchemyEngineManager.get_engine_for_schema_base( SQLAlchemyEngineManager.declarative_method_for_schema( SchemaType.CASE_TRIAGE)) if engine is None: raise RuntimeError("Could not create postgres sqlalchemy engine") # Drop old temporary table if it exists tmp_table_name = f"tmp__{destination_table}" with engine.connect() as conn: conn.execute(f"DROP TABLE IF EXISTS {tmp_table_name}") # Create temporary table with engine.connect() as conn: conn.execute( f"CREATE TABLE {tmp_table_name} AS TABLE {destination_table} WITH NO DATA" ) try: # Start actual Cloud SQL import logging.info("Starting import from GCS URI: %s", gcs_uri) logging.info("Starting import to destination table: %s", destination_table) logging.info("Starting import using columns: %s", columns) cloud_sql_client = CloudSQLClientImpl() instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id( SchemaType.CASE_TRIAGE) if instance_name is None: raise ValueError("Could not find instance name.") operation_id = cloud_sql_client.import_gcs_csv( instance_name=instance_name, table_name=tmp_table_name, gcs_uri=gcs_uri, columns=columns, ) if operation_id is None: raise RuntimeError( "Cloud SQL import operation was not started successfully.") operation_succeeded = cloud_sql_client.wait_until_operation_completed( operation_id) if not operation_succeeded: raise RuntimeError("Cloud SQL import failed.") except Exception as e: logging.warning( "Dropping newly created table due to raised exception.") conn.execute(f"DROP TABLE {tmp_table_name}") raise e # Swap in new table old_table_name = f"old__{destination_table}" with engine.begin() as conn: conn.execute( f"ALTER TABLE {destination_table} RENAME TO {old_table_name}") conn.execute( f"ALTER TABLE {tmp_table_name} RENAME TO {destination_table}") conn.execute(f"DROP TABLE {old_table_name}")
def main(database: SchemaType, repo_root: str, ssl_cert_path: str, dry_run: bool): """ Invokes the main code path for running migrations. This checks for user validations that the database and branches are correct and then runs existing pending migrations. """ requires_ssl = SQLAlchemyEngineManager.database_requires_ssl( metadata.project_id()) if requires_ssl and not ssl_cert_path: logging.error( 'Specifying an argument to --ssl-cert-path is required for the specified database.' ) logging.error('Exiting...') sys.exit(1) if dry_run: if not local_postgres_helpers.can_start_on_disk_postgresql_database(): logging.error('pg_ctl is not installed. Cannot perform a dry-run.') logging.error('Exiting...') sys.exit(1) logging.info('Creating a dry-run...\n') is_prod = metadata.project_id() == GCP_PROJECT_PRODUCTION dbname = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id( database) if is_prod: logging.info('RUNNING AGAINST PRODUCTION\n') db_check = input( f'Running new migrations on {dbname}. Please type "{dbname}" to confirm. (Anything else exits):\n' ) if db_check != dbname: logging.warning('\nConfirmation aborted.') sys.exit(1) # Get user to validate git branch try: repo = Repository(repo_root) except Exception as e: logging.error('improper project root provided: %s', e) sys.exit(1) current_branch = repo.head.shorthand if is_prod and not current_branch.startswith('releases/'): logging.error( 'Migrations run against production must be from a release branch. The current branch is %s.', current_branch) sys.exit(1) branch_check = input( f'\nThis script will execute all migrations on this branch ({current_branch}) that have not yet been run ' f'against {dbname}. Please type "{current_branch}" to confirm your branch. (Anything else exits):\n' ) if branch_check != current_branch: logging.warning('\nConfirmation aborted.') sys.exit(1) # Fetch secrets if dry_run: overriden_env_vars = local_postgres_helpers.update_local_sqlalchemy_postgres_env_vars( ) else: overriden_env_vars = SQLAlchemyEngineManager.update_sqlalchemy_env_vars( database, ssl_cert_path=ssl_cert_path) # Run migrations try: if dry_run: logging.info( 'Starting local postgres database for migrations dry run') db_dir = local_postgres_helpers.start_on_disk_postgresql_database() config = alembic.config.Config( SQLAlchemyEngineManager.get_alembic_file(database)) alembic.command.upgrade(config, 'head') except Exception as e: logging.error('Migrations failed to run: %s', e) sys.exit(1) finally: local_postgres_helpers.restore_local_env_vars(overriden_env_vars) if dry_run: try: logging.info('Stopping local postgres database') local_postgres_helpers.stop_and_clear_on_disk_postgresql_database( db_dir) except Exception as e2: logging.error('Error cleaning up postgres: %s', e2)