class TestCloudSqlDatabaseHook(unittest.TestCase): @mock.patch( 'airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_connection' ) def setUp(self, m): super().setUp() self.sql_connection = Connection( conn_id='my_gcp_sql_connection', conn_type='gcpcloudsql', login='******', password='******', host='host', schema='schema', extra='{"database_type":"postgres", "location":"my_location", ' '"instance":"my_instance", "use_proxy": true, ' '"project_id":"my_project"}') self.connection = Connection( conn_id='my_gcp_connection', conn_type='google_cloud_platform', ) scopes = [ "https://www.googleapis.com/auth/pubsub", "https://www.googleapis.com/auth/datastore", "https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/devstorage.read_write", "https://www.googleapis.com/auth/logging.write", "https://www.googleapis.com/auth/cloud-platform", ] conn_extra = { "extra__google_cloud_platform__scope": ",".join(scopes), "extra__google_cloud_platform__project": "your-gcp-project", "extra__google_cloud_platform__key_path": '/var/local/google_cloud_default.json' } conn_extra_json = json.dumps(conn_extra) self.connection.set_extra(conn_extra_json) m.side_effect = [self.sql_connection, self.connection] self.db_hook = CloudSqlDatabaseHook( gcp_cloudsql_conn_id='my_gcp_sql_connection', gcp_conn_id='my_gcp_connection') def test_get_sqlproxy_runner(self): self.db_hook._generate_connection_uri() sqlproxy_runner = self.db_hook.get_sqlproxy_runner() self.assertEqual(sqlproxy_runner.gcp_conn_id, self.connection.conn_id) project = self.sql_connection.extra_dejson['project_id'] location = self.sql_connection.extra_dejson['location'] instance = self.sql_connection.extra_dejson['instance'] instance_spec = "{project}:{location}:{instance}".format( project=project, location=location, instance=instance) self.assertEqual(sqlproxy_runner.instance_specification, instance_spec)
def execute(self, context): hook = CloudSqlDatabaseHook( gcp_cloudsql_conn_id=self.gcp_cloudsql_conn_id, gcp_conn_id=self.gcp_conn_id, default_gcp_project_id=self.gcp_connection.extra_dejson.get( 'extra__google_cloud_platform__project') ) hook.validate_ssl_certs() connection = hook.create_connection() hook.validate_socket_path_length() database_hook = hook.get_database_hook(connection=connection) try: self._execute_query(hook, database_hook) finally: hook.cleanup_database_hook()
def _execute_query(self, hook: CloudSqlDatabaseHook, database_hook: Union[PostgresHook, MySqlHook]): cloud_sql_proxy_runner = None try: if hook.use_proxy: cloud_sql_proxy_runner = hook.get_sqlproxy_runner() hook.free_reserved_port() # There is very, very slim chance that the socket will # be taken over here by another bind(0). # It's quite unlikely to happen though! cloud_sql_proxy_runner.start_proxy() self.log.info('Executing: "%s"', self.sql) database_hook.run(self.sql, self.autocommit, parameters=self.parameters) finally: if cloud_sql_proxy_runner: cloud_sql_proxy_runner.stop_proxy()
def __init__(self, sql: Union[List[str], str], autocommit: bool = False, parameters: Optional[Union[Dict, Iterable]] = None, gcp_conn_id: str = 'google_cloud_default', gcp_cloudsql_conn_id: str = 'google_cloud_sql_default', *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.sql = sql self.gcp_conn_id = gcp_conn_id self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id self.autocommit = autocommit self.parameters = parameters self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id) self.cloudsql_db_hook = CloudSqlDatabaseHook( gcp_cloudsql_conn_id=gcp_cloudsql_conn_id, gcp_conn_id=gcp_conn_id, default_gcp_project_id=self.gcp_connection.extra_dejson.get( 'extra__google_cloud_platform__project')) self.cloud_sql_proxy_runner = None self.database_hook = None
def setUp(self, m): super().setUp() self.sql_connection = Connection( conn_id='my_gcp_sql_connection', conn_type='gcpcloudsql', login='******', password='******', host='host', schema='schema', extra='{"database_type":"postgres", "location":"my_location", ' '"instance":"my_instance", "use_proxy": true, ' '"project_id":"my_project"}') self.connection = Connection( conn_id='my_gcp_connection', conn_type='google_cloud_platform', ) scopes = [ "https://www.googleapis.com/auth/pubsub", "https://www.googleapis.com/auth/datastore", "https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/devstorage.read_write", "https://www.googleapis.com/auth/logging.write", "https://www.googleapis.com/auth/cloud-platform", ] conn_extra = { "extra__google_cloud_platform__scope": ",".join(scopes), "extra__google_cloud_platform__project": "your-gcp-project", "extra__google_cloud_platform__key_path": '/var/local/google_cloud_default.json' } conn_extra_json = json.dumps(conn_extra) self.connection.set_extra(conn_extra_json) m.side_effect = [self.sql_connection, self.connection] self.db_hook = CloudSqlDatabaseHook( gcp_cloudsql_conn_id='my_gcp_sql_connection', gcp_conn_id='my_gcp_connection')
def __init__(self, sql, autocommit=False, parameters=None, gcp_conn_id='google_cloud_default', gcp_cloudsql_conn_id='google_cloud_sql_default', *args, **kwargs): super().__init__(*args, **kwargs) self.sql = sql self.gcp_conn_id = gcp_conn_id self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id self.autocommit = autocommit self.parameters = parameters self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id) self.cloudsql_db_hook = CloudSqlDatabaseHook( gcp_cloudsql_conn_id=gcp_cloudsql_conn_id, gcp_conn_id=gcp_conn_id, default_gcp_project_id=self.gcp_connection.extra_dejson.get( 'extra__google_cloud_platform__project')) self.cloud_sql_proxy_runner = None self.database_hook = None
def get_hook(self): if self.conn_type == 'mysql': from airflow.hooks.mysql_hook import MySqlHook return MySqlHook(mysql_conn_id=self.conn_id) elif self.conn_type == 'google_cloud_platform': from airflow.gcp.hooks.bigquery import BigQueryHook return BigQueryHook(bigquery_conn_id=self.conn_id) elif self.conn_type == 'postgres': from airflow.hooks.postgres_hook import PostgresHook return PostgresHook(postgres_conn_id=self.conn_id) elif self.conn_type == 'pig_cli': from airflow.hooks.pig_hook import PigCliHook return PigCliHook(pig_cli_conn_id=self.conn_id) elif self.conn_type == 'hive_cli': from airflow.hooks.hive_hooks import HiveCliHook return HiveCliHook(hive_cli_conn_id=self.conn_id) elif self.conn_type == 'presto': from airflow.hooks.presto_hook import PrestoHook return PrestoHook(presto_conn_id=self.conn_id) elif self.conn_type == 'hiveserver2': from airflow.hooks.hive_hooks import HiveServer2Hook return HiveServer2Hook(hiveserver2_conn_id=self.conn_id) elif self.conn_type == 'sqlite': from airflow.hooks.sqlite_hook import SqliteHook return SqliteHook(sqlite_conn_id=self.conn_id) elif self.conn_type == 'jdbc': from airflow.hooks.jdbc_hook import JdbcHook return JdbcHook(jdbc_conn_id=self.conn_id) elif self.conn_type == 'mssql': from airflow.hooks.mssql_hook import MsSqlHook return MsSqlHook(mssql_conn_id=self.conn_id) elif self.conn_type == 'oracle': from airflow.hooks.oracle_hook import OracleHook return OracleHook(oracle_conn_id=self.conn_id) elif self.conn_type == 'vertica': from airflow.contrib.hooks.vertica_hook import VerticaHook return VerticaHook(vertica_conn_id=self.conn_id) elif self.conn_type == 'cloudant': from airflow.contrib.hooks.cloudant_hook import CloudantHook return CloudantHook(cloudant_conn_id=self.conn_id) elif self.conn_type == 'jira': from airflow.contrib.hooks.jira_hook import JiraHook return JiraHook(jira_conn_id=self.conn_id) elif self.conn_type == 'redis': from airflow.contrib.hooks.redis_hook import RedisHook return RedisHook(redis_conn_id=self.conn_id) elif self.conn_type == 'wasb': from airflow.contrib.hooks.wasb_hook import WasbHook return WasbHook(wasb_conn_id=self.conn_id) elif self.conn_type == 'docker': from airflow.hooks.docker_hook import DockerHook return DockerHook(docker_conn_id=self.conn_id) elif self.conn_type == 'azure_data_lake': from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id) elif self.conn_type == 'azure_cosmos': from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id) elif self.conn_type == 'cassandra': from airflow.contrib.hooks.cassandra_hook import CassandraHook return CassandraHook(cassandra_conn_id=self.conn_id) elif self.conn_type == 'mongo': from airflow.contrib.hooks.mongo_hook import MongoHook return MongoHook(conn_id=self.conn_id) elif self.conn_type == 'gcpcloudsql': from airflow.gcp.hooks.cloud_sql import CloudSqlDatabaseHook return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id) elif self.conn_type == 'grpc': from airflow.contrib.hooks.grpc_hook import GrpcHook return GrpcHook(grpc_conn_id=self.conn_id) raise AirflowException("Unknown hook type {}".format(self.conn_type))
class CloudSqlQueryOperator(BaseOperator): """ Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses cloud-sql-proxy to establish secure connection with the database. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:CloudSqlQueryOperator` :param sql: SQL query or list of queries to run (should be DML or DDL query - this operator does not return any data from the database, so it is useless to pass it DQL queries. Note that it is responsibility of the author of the queries to make sure that the queries are idempotent. For example you can use CREATE TABLE IF NOT EXISTS to create a table. :type sql: str or list[str] :param parameters: (optional) the parameters to render the SQL query with. :type parameters: dict or iterable :param autocommit: if True, each command is automatically committed. (default value: False) :type autocommit: bool :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform for cloud-sql-proxy authentication. :type gcp_conn_id: str :param gcp_cloudsql_conn_id: The connection ID used to connect to Google Cloud SQL its schema should be gcpcloudsql://. See :class:`~airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook` for details on how to define gcpcloudsql:// connection. :type gcp_cloudsql_conn_id: str """ # [START gcp_sql_query_template_fields] template_fields = ('sql', 'gcp_cloudsql_conn_id', 'gcp_conn_id') template_ext = ('.sql', ) # [END gcp_sql_query_template_fields] @apply_defaults def __init__(self, sql: Union[List[str], str], autocommit: bool = False, parameters: Optional[Union[Dict, Iterable]] = None, gcp_conn_id: str = 'google_cloud_default', gcp_cloudsql_conn_id: str = 'google_cloud_sql_default', *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.sql = sql self.gcp_conn_id = gcp_conn_id self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id self.autocommit = autocommit self.parameters = parameters self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id) self.cloudsql_db_hook = CloudSqlDatabaseHook( gcp_cloudsql_conn_id=gcp_cloudsql_conn_id, gcp_conn_id=gcp_conn_id, default_gcp_project_id=self.gcp_connection.extra_dejson.get( 'extra__google_cloud_platform__project')) self.cloud_sql_proxy_runner = None self.database_hook = None def _execute_query(self): try: if self.cloudsql_db_hook.use_proxy: self.cloud_sql_proxy_runner = self.cloudsql_db_hook. \ get_sqlproxy_runner() self.cloudsql_db_hook.free_reserved_port() # There is very, very slim chance that the socket will # be taken over here by another bind(0). # It's quite unlikely to happen though! self.cloud_sql_proxy_runner.start_proxy() self.log.info('Executing: "%s"', self.sql) self.database_hook.run(self.sql, self.autocommit, parameters=self.parameters) finally: if self.cloud_sql_proxy_runner: self.cloud_sql_proxy_runner.stop_proxy() self.cloud_sql_proxy_runner = None def execute(self, context): self.cloudsql_db_hook.validate_ssl_certs() self.cloudsql_db_hook.create_connection() try: self.cloudsql_db_hook.validate_socket_path_length() self.database_hook = self.cloudsql_db_hook.get_database_hook() try: self._execute_query() finally: self.cloudsql_db_hook.cleanup_database_hook() finally: self.cloudsql_db_hook.delete_connection() self.cloudsql_db_hook = None