def test_create_operator_with_wrong_parameters(self, project_id, location, instance_name, database_type, use_proxy, use_ssl, sql, message, get_connections): uri = \ "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?" \ "database_type={database_type}&" \ "project_id={project_id}&location={location}&instance={instance_name}&" \ "use_proxy={use_proxy}&use_ssl={use_ssl}".format( database_type=database_type, project_id=project_id, location=location, instance_name=instance_name, use_proxy=use_proxy, use_ssl=use_ssl) self._setup_connections(get_connections, uri) with self.assertRaises(AirflowException) as cm: op = CloudSqlQueryOperator( sql=sql, task_id='task_id' ) op.execute(None) err = cm.exception self.assertIn(message, str(err))
def test_cloudsql_hook_delete_connection_on_exception( self, get_connections, run, get_connection, delete_connection): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=False") get_connection.return_value = connection db_connection = Connection() db_connection.host = "127.0.0.1" db_connection.set_extra(json.dumps({"project_id": "example-project", "location": "europe-west1", "instance": "testdb", "database_type": "mysql"})) get_connections.return_value = [db_connection] run.side_effect = Exception("Exception when running a query") operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) with self.assertRaises(Exception) as cm: operator.execute(None) err = cm.exception self.assertEqual("Exception when running a query", str(err)) delete_connection.assert_called_once_with()
def test_create_operator_with_too_long_unix_socket_path(self, get_connections): uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \ "project_id=example-project&location=europe-west1&" \ "instance=" \ "test_db_with_long_name_a_bit_above" \ "_the_limit_of_UNIX_socket_asdadadasadasd&" \ "use_proxy=True&sql_proxy_use_tcp=False" self._setup_connections(get_connections, uri) operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) with self.assertRaises(AirflowException) as cm: operator.execute(None) err = cm.exception self.assertIn("The UNIX socket path length cannot exceed", str(err))
def test_create_operator_with_not_too_long_unix_socket_path(self, get_connections): uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \ "project_id=example-project&location=europe-west1&" \ "instance=" \ "test_db_with_longname_but_with_limit_of_UNIX_socket&" \ "use_proxy=True&sql_proxy_use_tcp=False" self._setup_connections(get_connections, uri) operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.postgres_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('postgres', conn.conn_type) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_correct_parameters_mysql_tcp(self, get_connections): uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=mysql&" \ "project_id=example-project&location=europe-west1&instance=testdb&" \ "use_proxy=True&sql_proxy_use_tcp=True" self._setup_connections(get_connections, uri) operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('mysql', conn.conn_type) self.assertEqual('127.0.0.1', conn.host) self.assertNotEqual(3200, conn.port) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_correct_parameters_mysql( self, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=False&use_ssl=False") get_connections.return_value = [connection] operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'], task_id='task_id') operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('mysql', conn.conn_type) self.assertEqual('8.8.8.8', conn.host) self.assertEqual(3200, conn.port) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_correct_parameters_postgres_proxy_socket( self, get_connections): uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \ "project_id=example-project&location=europe-west1&instance=testdb&" \ "use_proxy=True&sql_proxy_use_tcp=False" self._setup_connections(get_connections, uri) operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'], task_id='task_id') operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db( db_hook.postgres_conn_id)[0] # pylint: disable=no-member finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('postgres', conn.conn_type) self.assertIn('/tmp', conn.host) self.assertIn('example-project:europe-west1:testdb', conn.host) self.assertIsNone(conn.port) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_wrong_parameters(self, project_id, location, instance_name, database_type, use_proxy, use_ssl, sql, message, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type={database_type}&" "project_id={project_id}&location={location}&instance={instance_name}&" "use_proxy={use_proxy}&use_ssl={use_ssl}".format( database_type=database_type, project_id=project_id, location=location, instance_name=instance_name, use_proxy=use_proxy, use_ssl=use_ssl)) get_connections.return_value = [connection] with self.assertRaises(AirflowException) as cm: CloudSqlQueryOperator(sql=sql, task_id='task_id') err = cm.exception self.assertIn(message, str(err))
def test_create_operator_with_correct_parameters_postgres_proxy_socket( self, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=postgres&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=True&sql_proxy_use_tcp=False") get_connections.return_value = [connection] operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'], task_id='task_id') operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db( db_hook.postgres_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('postgres', conn.conn_type) self.assertIn('/tmp', conn.host) self.assertIn('example-project:europe-west1:testdb', conn.host) self.assertIsNone(conn.port) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_correct_parameters_postgres_ssl( self, get_connections): uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \ "project_id=example-project&location=europe-west1&instance=testdb&" \ "use_proxy=False&use_ssl=True&sslcert=/bin/bash&" \ "sslkey=/bin/bash&sslrootcert=/bin/bash" self._setup_connections(get_connections, uri) operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'], task_id='task_id') operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db( db_hook.postgres_conn_id)[0] # pylint: disable=no-member finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('postgres', conn.conn_type) self.assertEqual('127.0.0.1', conn.host) self.assertEqual(3200, conn.port) self.assertEqual('testdb', conn.schema) self.assertEqual('/bin/bash', conn.extra_dejson['sslkey']) self.assertEqual('/bin/bash', conn.extra_dejson['sslcert']) self.assertEqual('/bin/bash', conn.extra_dejson['sslrootcert'])
def test_create_operator_with_correct_parameters_mysql_proxy_socket(self, get_connections): uri = "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&" \ "project_id=example-project&location=europe-west1&instance=testdb&" \ "use_proxy=True&sql_proxy_use_tcp=False" self._setup_connections(get_connections, uri) operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('mysql', conn.conn_type) self.assertEqual('localhost', conn.host) self.assertIn('/tmp', conn.extra_dejson['unix_socket']) self.assertIn('example-project:europe-west1:testdb', conn.extra_dejson['unix_socket']) self.assertIsNone(conn.port) self.assertEqual('testdb', conn.schema)
# [START howto_operator_cloudsql_query_operators] connection_names = [ "proxy_postgres_tcp", "proxy_postgres_socket", "public_postgres_tcp", "public_postgres_tcp_ssl", "proxy_mysql_tcp", "proxy_mysql_socket", "public_mysql_tcp", "public_mysql_tcp_ssl" ] tasks = [] with models.DAG( dag_id='example_gcp_sql_query', default_args=default_args, schedule_interval=None ) as dag: for connection_name in connection_names: tasks.append( CloudSqlQueryOperator( gcp_cloudsql_conn_id=connection_name, task_id="example_gcp_sql_task_" + connection_name, sql=SQL ) ) # [END howto_operator_cloudsql_query_operators]
'dmp_proc_report', default_args=default_args # , on_success_callback=cleanup_xcom , description='Prebuilt Report Generation pipeline', schedule_interval=None #ga data 수집완료시점으로 매일 아침 7시(한국시간) '0 7 * * *' 일배치 수행 , start_date=airflow.utils.dates.days_ago( 2) #기준일을 지난 기간까지 포함할 경우 이전 날짜로 지정 start_date=datetime(2019,11,13) , catchup=False) # Cloud SQL 의 DP_TB_PR_CRT_RPRT_INF Table 의 모든 건을 삭제 delete_mart_mysql = CloudSqlQueryOperator( task_id="delete_mart_in_mysql", gcp_cloudsql_conn_id=connection_name, sql="DELETE FROM DKDMPDB.DP_TB_PR_CRT_RPRT_INF", retries=1, dag=dag) # BigQuery 의 Report Mart Table 의 모든 건을 CloudSQL 의 Table 로 Load load_bq_mart_to_mysql = BigQueryToMySqlOperator( task_id="load_bqmart_to_mysql", dataset_table=reportMartDSTable, mysql_table="DP_TB_PR_CRT_RPRT_INF", gcp_conn_id="google_cloud_default", mysql_conn_id=cloudsql_connection, selected_fields= "SGM_NO,ITM_ID,PRP_NM,SUB_PRP_NM,POPART_V,OJ_CONS_RT,AM_CONS_RT", replace=True, batch_size=50000, location=loc_ga,
value="CREATE TABLE {} ({})".format( TABLE, sqlcolumnas)) with DAG(ENV + "_" + OBJECT.upper(), schedule_interval=None, default_args=default_args, template_searchpath=''.format( cfg.ENV, OBJECT)) as dag: # TODO: Template_searchpath get_df_and_types = PythonOperator(task_id='get_df_and_types', python_callable=get_df_and_types, provide_context=True) creartabla = CloudSqlQueryOperator( gcp_cloudsql_conn_id='cloudsql_pipeline', task_id="creartabla", sql="{{ti.xcom_pull(task_ids='get_df_and_types',key='SQL')}}") import_to_csql = CloudSqlInstanceImportOperator( task_id='import_to_csql', project_id=SQL_PROJECT, instance='servicedat-cal-mysql', body={ "importContext": { "kind": "sql#importContext", "fileType": 'CSV', "uri": '{}/cloudSQLexport_temp.csv'.format(DIR_TMP), "database": DATABASE, "csvImportOptions": { "table": TABLE }