def main(): """Launches all the steps required to create the data tables in redshift.""" # Launches the creation of an IAM role and redshift clusters if they don't already exists: client, cluster_name = create_rc.main() # Check redshift cluster's availability. If available, create the tables: cluster_status = check_rc.check_cluster_status(client, cluster_name) if cluster_status == 'creating': print( f"Cluster '{cluster_name}' is being created.\n" + "This can take several minutes. Do you want to Wait (W) or Exit (Q)?\n" + "Exiting won't interrupt cluster creation. You can resume later by re-launching main.py" ) valid_choices = ['q', 'Q', 'w', 'W'] waiting = advanced_input(valid_choices) if waiting.lower() == 'q': sys.exit(0) elif waiting.lower() == 'w': print("Waiting...") while cluster_status == 'creating': time.sleep(20) cluster_status = check_rc.check_cluster_status( client, cluster_name) print(f"Waiting... cluster status: {cluster_status}.") if cluster_status == 'available': cluster = client.describe_clusters( ClusterIdentifier=cluster_name)['Clusters'][0] # TODO: remove local reference to 'dwh.cfg'. create_rc.update_section_key('dwh.cfg', 'CLUSTER', 'cl_endpoint', cluster['Endpoint']['Address']) # When cluster available ask the user if she wants to launch the etl process: print( f"Cluster '{cluster_name}' available.\n" + "Do you want to create tables and launch the ETL process? Yes (Y), No (n)?\n" + "This will drop existing tables, re-create them and load data.") valid_choices = ['y', 'Y', 'n', 'N'] launch_etl = advanced_input(valid_choices) if launch_etl.lower() == 'y': create_tables.main() etl.main() else: sys.exit(0) else: print( f"Cluster '{cluster_name}' current status: '{cluster_status}'.\n" "Please activate or repair the cluster and relaunch the program.\n" "Exiting.") sys.exit(1) print("The End.")
def test_rps__global_package__fs(): sys.argv = [ "./etl.py", "--debug", "--config=test", "--task=rps", "--step=e", "--source=global_package", "--dest=fs", ] etl.main()
def execute(): """ Execute the complete ETL process. Create database tables. Insert json files into created tables. """ create_tables.main() print('All tables are created!') print('-' * 70) etl.main() print('-' * 70) print('OMG is done!')
sys.path.insert( 0, '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine' ) sys.path.insert( 0, '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/fancy_urllib' ) sys.path.insert( 0, '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/jinja2-2.6' ) sys.path.insert( 0, '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2' ) sys.path.insert( 0, '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webob-1.2.3' ) sys.path.insert( 0, '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/yaml/lib' ) import argparse import etl if __name__ == '__main__': PARSER = argparse.ArgumentParser(add_help=False, parents=[etl.PARSER]) etl.main(PARSER.parse_args())
import sys sys.path.insert(0,'/home/smaini/BigDataMOOC') sys.path.insert(0,'/home/smaini/google_appengine') sys.path.insert(0,'/home/smaini/google_appengine/lib/fancy_urllib') sys.path.insert(0,'/home/smaini/google_appengine/lib/jinja2-2.6') sys.path.insert(0,'/home/smaini/google_appengine/lib/webapp2-2.5.2') sys.path.insert(0,'/home/smaini/google_appengine/lib/webob-1.2.3') sys.path.insert(0,'/home/smaini/google_appengine/lib/yaml/lib') import argparse import etl if __name__ =='__main__': PARSER = argparse.ArgumentParser(add_help=False, parents=[etl.PARSER]) etl.main(PARSER.parse_args())
import etl etl.main()