def test_poke(self): op = AwsRedshiftClusterSensor(task_id='test_cluster_sensor', poke_interval=1, timeout=5, aws_conn_id='aws_default', cluster_identifier='test_cluster', target_status='available') self.assertTrue(op.poke(None))
def test_poke_cluster_not_found(self): self._create_cluster() op = AwsRedshiftClusterSensor(task_id='test_cluster_sensor', poke_interval=1, timeout=5, aws_conn_id='aws_default', cluster_identifier='test_cluster_not_found', target_status='cluster_not_found') self.assertTrue(op.poke(None))
schedule_interval=None, description="Create Redshift cluster and tables.", start_date=datetime.utcnow(), ) start_dag_task = DummyOperator(task_id="start_dag", dag=dag) create_redshift_task = CreateRedshiftClusterOperator( task_id="create_redshift_cluster", dag=dag, config=redshift_config, ) wait_for_redshift_task = AwsRedshiftClusterSensor( task_id="wait_for_redshift_cluster", cluster_identifier=redshift_cluster_id, dag=dag, ) save_redshift_endpoint_task = SaveRedshiftHostOperator( task_id="save_redshift_endpoint", cluster_identifier=redshift_cluster_id, dag=dag, config=redshift_config, ) create_schemas_task = PostgresOperator( task_id="create_schemas", sql=[ "create schema if not exists stage;", "create schema if not exists analytics;",
# Create a Redshift cluster create_redshift_cluster = AWSRedshiftOperator( task_id="create_redshift_cluster", dag=dag, conn_id=AWS_CONN_ID, redshift_conn_id=AWS_REDSHIFT_CONN_ID, time_zone=local_tz, cluster_identifier= f"news-nlp-redshift-{datetime.now(local_tz).strftime('%Y-%m-%d-%H-%M')}", ) # Wait for Redshift cluster to be ready redshift_ready_sensor = AwsRedshiftClusterSensor( task_id="sense_redshift_cluster", dag=dag, cluster_identifier= "{{ task_instance.xcom_pull('create_redshift_cluster', key='return_value')[0] }}", target_status='available', aws_conn_id=AWS_CONN_ID, ) # Load the data in star schema format from S3 to Redshift tables = ['dim_date', 'dim_title', 'dim_ner', 'fact_news'] table_load_ops = [ S3ToRedshiftTransfer(task_id=f"upload_{table}_to_redshift", dag=dag, redshift_conn_id=AWS_REDSHIFT_CONN_ID, aws_conn_id=AWS_CONN_ID, schema=os.environ.get('AWS_REDSHIFT_SCHEMA'), table=table, s3_bucket=os.environ.get('AWS_S3_BUCKET'), s3_key=f'{table}.csv',