def test_init_with_template_connection(self): with DAG(DAG_ID, start_date=DEFAULT_DATE): task = QuboleOperator(task_id=TASK_ID, qubole_conn_id="{{ qubole_conn_id }}") task.render_template_fields({'qubole_conn_id': TEMPLATE_CONN}) assert task.task_id == TASK_ID assert task.qubole_conn_id == TEMPLATE_CONN
def test_notify(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) with dag: task = QuboleOperator(task_id=TASK_ID, command_type='sparkcmd', notify=True, dag=dag) assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[0] == "--notify"
def test_get_hook(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) with dag: task = QuboleOperator(task_id=TASK_ID, command_type='hivecmd', dag=dag) hook = task.get_hook() assert hook.__class__ == QuboleHook
def test_hyphen_args_note_id(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) with dag: task = QuboleOperator(task_id=TASK_ID, command_type='sparkcmd', note_id="123", dag=dag) self.assertEqual( task.get_hook().create_cmd_args({'run_id': 'dummy'})[0], "--note-id=123")
def test_extra_serialized_field(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) with dag: QuboleOperator( task_id=TASK_ID, command_type='shellcmd', qubole_conn_id=TEST_CONN, ) serialized_dag = SerializedDAG.to_dict(dag) self.assertIn("qubole_conn_id", serialized_dag["dag"]["tasks"][0]) dag = SerializedDAG.from_dict(serialized_dag) simple_task = dag.task_dict[TASK_ID] self.assertEqual(getattr(simple_task, "qubole_conn_id"), TEST_CONN) ######################################################### # Verify Operator Links work with Serialized Operator ######################################################### self.assertIsInstance( list(simple_task.operator_extra_links)[0], QDSLink) ti = TaskInstance(task=simple_task, execution_date=DEFAULT_DATE) ti.xcom_push('qbol_cmd_id', 12345) # check for positive case url = simple_task.get_extra_links(DEFAULT_DATE, 'Go to QDS') self.assertEqual(url, 'http://localhost/v2/analyze?command_id=12345') # check for negative case url2 = simple_task.get_extra_links(datetime(2017, 1, 2), 'Go to QDS') self.assertEqual(url2, '')
def test_init_with_template_cluster_label(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) task = QuboleOperator(task_id=TASK_ID, dag=dag, cluster_label='{{ params.cluster_label }}', params={'cluster_label': 'default'}) ti = TaskInstance(task, DEFAULT_DATE) ti.render_templates() self.assertEqual(task.cluster_label, 'default')
def test_get_redirect_url(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) with dag: task = QuboleOperator(task_id=TASK_ID, qubole_conn_id=TEST_CONN, command_type='shellcmd', parameters="param1 param2", dag=dag) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.xcom_push('qbol_cmd_id', 12345) # check for positive case url = task.get_extra_links(DEFAULT_DATE, 'Go to QDS') self.assertEqual(url, 'http://localhost/v2/analyze?command_id=12345') # check for negative case url2 = task.get_extra_links(datetime(2017, 1, 2), 'Go to QDS') self.assertEqual(url2, '')
def test_position_args_parameters(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) with dag: task = QuboleOperator( task_id=TASK_ID, command_type='pigcmd', parameters="key1=value1 key2=value2", dag=dag ) assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[1] == "key1=value1" assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[2] == "key2=value2" cmd = "s3distcp --src s3n://airflow/source_hadoopcmd --dest s3n://airflow/destination_hadoopcmd" task = QuboleOperator(task_id=TASK_ID + "_1", command_type='hadoopcmd', dag=dag, sub_command=cmd) assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[1] == "s3distcp" assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[2] == "--src" assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[3] == "s3n://airflow/source_hadoopcmd" assert task.get_hook().create_cmd_args({'run_id': 'dummy'})[4] == "--dest" assert ( task.get_hook().create_cmd_args({'run_id': 'dummy'})[5] == "s3n://airflow/destination_hadoopcmd" )
:param ti: The TaskInstance object. :type ti: airflow.models.TaskInstance :return: True if the files are the same, False otherwise. :rtype: bool """ qubole_result_1 = hive_show_table.get_results(ti) qubole_result_2 = hive_s3_location.get_results(ti) return filecmp.cmp(qubole_result_1, qubole_result_2) hive_show_table = QuboleOperator( task_id='hive_show_table', command_type='hivecmd', query='show tables', cluster_label='{{ params.cluster_label }}', fetch_logs=True, # If `fetch_logs`=true, will fetch qubole command logs and concatenate # them into corresponding airflow task logs tags='airflow_example_run', # To attach tags to qubole command, auto attach 3 tags - dag_id, task_id, run_id params={ 'cluster_label': 'default', }, ) hive_s3_location = QuboleOperator( task_id='hive_s3_location', command_type="hivecmd", script_location= "s3n://public-qubole/qbol-library/scripts/show_table.hql", notify=True, tags=['tag1', 'tag2'], # If the script at s3 location has any qubole specific macros to be replaced
def test_init_with_default_connection(self): op = QuboleOperator(task_id=TASK_ID) self.assertEqual(op.task_id, TASK_ID) self.assertEqual(op.qubole_conn_id, DEFAULT_CONN)
def test_parameter_pool_passed(self): test_pool = 'test_pool' op = QuboleOperator(task_id=TASK_ID, pool=test_pool) self.assertEqual(op.pool, test_pool)
def test_init_with_default_connection(self): op = QuboleOperator(task_id=TASK_ID) assert op.task_id == TASK_ID assert op.qubole_conn_id == DEFAULT_CONN
def test_parameter_include_header_missing(self, mock_get_results): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) qubole_operator = QuboleOperator(task_id=TASK_ID, dag=dag, command_type='prestocmd') qubole_operator.get_results() mock_get_results.asset_called_with('include_headers', False)
def test_parameter_pool_passed(self): test_pool = 'test_pool' op = QuboleOperator(task_id=TASK_ID, pool=test_pool) assert op.pool == test_pool