def test_poke(self, mock_hook): mock_instance = mock_hook.return_value sensor = WasbPrefixSensor(task_id='wasb_sensor', dag=self.dag, check_options={'timeout': 2}, **self._config) sensor.poke(None) mock_instance.check_for_prefix.assert_called_once_with('container', 'prefix', timeout=2)
def test_poke(self, mock_hook): mock_instance = mock_hook.return_value sensor = WasbPrefixSensor( task_id='wasb_sensor', dag=self.dag, check_options={'timeout': 2}, **self._config ) sensor.poke(None) mock_instance.check_for_prefix.assert_called_once_with( 'container', 'prefix', timeout=2 )
def test_init(self): sensor = WasbPrefixSensor(task_id='wasb_sensor', dag=self.dag, **self._config) self.assertEqual(sensor.container_name, self._config['container_name']) self.assertEqual(sensor.prefix, self._config['prefix']) self.assertEqual(sensor.wasb_conn_id, self._config['wasb_conn_id']) self.assertEqual(sensor.check_options, {}) self.assertEqual(sensor.timeout, self._config['timeout']) sensor = WasbPrefixSensor(task_id='wasb_sensor', dag=self.dag, check_options={'timeout': 2}, **self._config) self.assertEqual(sensor.check_options, {'timeout': 2})
def transform(self, src_operator: BaseOperator, parent_fragment: DAGFragment, upstream_fragments: List[DAGFragment]) -> DAGFragment: """ You need to add the ``wasb_conn_id`` to the source operator (or preferably DAG) for this to work. The ``container_name`` and ``prefix`` for the blob based sensors are coped from the ``bucket_name`` and ``bucket_key`` of the s3 sensor, so make sure they are templatized for changing between `s3://` and `wasb://` paths, etc. using config """ s3_key_sensor: S3KeySensor = src_operator wasb_conn_id = s3_key_sensor.params.get('wasb_conn_id', None) if not wasb_conn_id: wasb_conn_id = self.dag.params.get('wasb_conn_id', None) if not wasb_conn_id: raise TransformerException( "Could not find wasb_conn_id in operator or DAG params") if s3_key_sensor.wildcard_match: wasb_sensor_op = WasbWildcardPrefixSensor( task_id=src_operator.task_id, wasb_conn_id=wasb_conn_id, container_name=s3_key_sensor.bucket_name, wildcard_prefix=s3_key_sensor.bucket_key, dag=self.dag) else: wasb_sensor_op = WasbPrefixSensor( task_id=src_operator.task_id, wasb_conn_id=wasb_conn_id, container_name=s3_key_sensor.bucket_name, prefix=s3_key_sensor.bucket_key, dag=self.dag) self.copy_op_attrs(wasb_sensor_op, src_operator) self.sign_op(wasb_sensor_op) return DAGFragment([wasb_sensor_op])
output_container = '222' processing_file_prefix = '' blob_service = WasbHook(wasb_conn_id=wasb_connection_id) dag = DAG( dag_id='azure_blob_reader', default_args=default_args, description='A dag to pull new images from blob and process them', schedule_interval=timedelta(days=1), ) new_files = WasbPrefixSensor( task_id='new_files_sensor', container_name=input_container, prefix=processing_file_prefix, wasb_conn_id=wasb_connection_id, dag=dag, ) def move_blobs_to_processing(**context): results = blob_service.connection.list_blobs( input_container, processing_file_prefix) blobs_moved = 0 blob_urls = [] for blob in results: print("\t Blob name: " + blob.name) # Generate a SAS token for blob access blob_input_url = blob_service.connection.make_blob_url( input_container,
import airflow.utils.dates from airflow.models import DAG from airflow.operators.bash_operator import BashOperator from airflow.contrib.sensors.wasb_sensor import WasbBlobSensor, WasbPrefixSensor dag = DAG(dag_id="azure_blob_sensor", start_date=airflow.utils.dates.days_ago(3), schedule_interval="@once") data_arrival_sensor = WasbBlobSensor(task_id="data_arrival_sensor", container_name="landing", blob_name="raw_data.csv", wasb_conn_id="blob_default", poke_interval=60, timeout=60 * 60 * 24) data_file_prefix_sensor = WasbPrefixSensor(task_id="data_file_prefix_sensor", container_name="landing", prefix="raw_", wasb_conn_id="blob_default", poke_interval=60, timeout=60 * 60 * 24) data_has_arrived = BashOperator(task_id="data_has_arrived", bash_command="echo 'The data has arrived!'", dag=dag) [data_arrival_sensor, data_file_prefix_sensor] >> data_has_arrived