Exemplo n.º 1
0
	current_time = datetime.today().strftime("%Y%m%d_%H_%M") 
	GCS_dir_archive = "gs://{}/archive/{}/{}".format(bucket_name,cc,current_time)
	tempGCS_filepath = os.path.join(tempGCS_dir_temp,cc)
	t4 = FileToGoogleCloudStorageOperator(task_id='uploadToGCS_{}'.format(cc),
		src=input_t4,
		dst=tempGCS_filepath,
		google_cloud_storage_conn_id = storage_connection_id, 
		gzip = False,
		dag=dag)
	t4_archive = FileToGoogleCloudStorageOperator(task_id='uploadToGCS_archive_{}'.format(cc),
		src=input_t4,
		dst=GCS_dir_archive,
		google_cloud_storage_conn_id = storage_connection_id, 
		gzip = True,
		dag=dag)
	t4.set_upstream(t3)
	t4_archive.set_upstream(t3)
	writeToGCS_task.append(t4)
	tempGCS_dir_paths.append(tempGCS_filepath)



schema = None  # remember to enter schema!

'''
Creating dummy task to merge next set of nodes
'''

dummy_task = DummyOperator(task_id="forkMerge",
	dag=dag)
Exemplo n.º 2
0
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 2,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('site-visits-dag',
          default_args=default_args,
          schedule_interval='@daily')

#t1 = file_sensor(
#    task_id='local_file',
#    fs_conn_id='fs_default',
#    file_path='/Users/raghu/git-hub/demo/incoming/site-visits.json',
#    dag=dag)
t1 = DummyOperator(task_id='op1', dag=dag)
t2 = PythonOperator(task_id='python_task',
                    python_callable=convert_json_to_csv.main,
                    dag=dag)
t3 = FileToGoogleCloudStorageOperator(
    task_id='fileToGCS',
    src='/usr/local/demo/outgoing/site-visits.csv',
    dst='site-visits',
    bucket='springml-demo',
    google_cloud_storage_conn_id='google_cloud_default',
    dag=dag)

t3.set_upstream(t2)
t2.set_upstream(t1)