def main(): # Parse commandline arguments parser = argparse.ArgumentParser(description='Get layer name, environment and verbosity for gfw-sync.') parser.add_argument('--environment', '-e', default='DEV', choices=('DEV', 'PROD'), help='the environment/config files to use for this run') parser.add_argument('--layer', '-l', required=True, help='the data layer to process; must match a value for tech_title in the config') parser.add_argument('--verbose', '-v', default='debug', choices=('debug', 'info', 'warning', 'error'), help='set verbosity level to print and write to file') args = parser.parse_args() # Instantiate logger; write to {dir}\logs logging = logger.build_logger(args.verbose) logging.info("\n{0}\n{1} v{2}\n{0}\n".format('*' * 50, settings.get_settings(args.environment)['tool_info']['name'], settings.get_settings(args.environment)['tool_info']['version'])) logging.critical('Starting | {0}'.format(args.layer)) # Open the correct sheet of the config table (PROD | DEV) and get the layerdef # Config table: https://docs.google.com/spreadsheets/d/1pkJCLNe9HWAHqxQh__s-tYQr9wJzGCb6rmRBPj8yRWI/edit#gid=0 layerdef = gs.get_layerdef(args.layer, args.environment) # Pass the layerdef to the build_layer function layer = layer_decision_tree.build_layer(layerdef, args.environment) # Update the layer in the output data sources layer.update() # Update the last-updated timestamp in the config table gs.update_gs_timestamp(args.layer, args.environment) # Delete scratch workspace layer.cleanup() logging.critical('Finished | {0}'.format(args.layer))
def __init__(self, layerdef): logging.debug('Starting layer class') self._name = None self.name = layerdef['tech_title'] self._gfw_env = None self.gfw_env = layerdef['gfw_env'] self._scratch_workspace = None self.scratch_workspace = os.path.join(settings.get_settings(self.gfw_env)['paths']['scratch_workspace'], self.name) self._layer_type = None self.layer_type = layerdef['type'] self._field_map = None self.field_map = layerdef['field_map'] self._source = None self.source = layerdef['source'] self._esri_service_output = None self.esri_service_output = layerdef['esri_service_output'] self._cartodb_service_output = None self.cartodb_service_output = layerdef['cartodb_service_output'] self._merge_where_field = None self.merge_where_field = layerdef['merge_where_field'] self._delete_features_input_where_clause = None self.delete_features_input_where_clause = layerdef['delete_features_input_where_clause'] self._archive_output = None self.archive_output = layerdef['archive_output'] self._download_output = None self.download_output = layerdef['download_output'] self._esri_mosaics = None self.esri_mosaics = layerdef['esri_mosaics'] self._transformation = None self.transformation = layerdef['transformation'] self._global_layer = None self.global_layer = layerdef['global_layer'] self._add_country_value = None self.add_country_value = layerdef['add_country_value'] self._vector_to_raster_output = None self.vector_to_raster_output = layerdef['vector_to_raster_output'] self._tile_cache_service = None self.tile_cache_service = layerdef['tile_cache_service'] self._post_process_script = None self.post_process_script = layerdef['post_process_script']
def main(): # Parse commandline arguments parser = argparse.ArgumentParser( description='Get layer name, environment and verbosity for gfw-sync.') parser.add_argument( '--environment', '-e', default='staging', choices=('staging', 'prod'), help='the environment/config files to use for this run') parser.add_argument( '--layer', '-l', required=True, help= 'the data layer to process; must match a value for tech_title in the config' ) parser.add_argument('--verbose', '-v', default='debug', choices=('debug', 'info', 'warning', 'error'), help='set verbosity level to print and write to file') args = parser.parse_args() # Instantiate logger; write to {dir}\logs logging = logger.build_logger(args.verbose) logging.info("\n{0}\n{1} v{2}\n{0}\n".format( '*' * 50, settings.get_settings(args.environment)['tool_info']['name'], settings.get_settings(args.environment)['tool_info']['version'])) logging.critical('Starting | {0}'.format(args.layer)) # Open the correct sheet of the config table (PROD | DEV) and get the layerdef # Config table: https://docs.google.com/spreadsheets/d/1pkJCLNe9HWAHqxQh__s-tYQr9wJzGCb6rmRBPj8yRWI/edit#gid=0 layerdef = gs.get_layerdef(args.layer, args.environment) # Pass the layerdef to the build_layer function layer = layer_decision_tree.build_layer(layerdef, args.environment) # Update the layer in the output data sources layer.update() # Update the last-updated timestamp in the config table gs.update_gs_timestamp(args.layer, args.environment) logging.critical('Finished | {0}'.format(args.layer))
def __init__(self, layerdef): logging.debug('Starting layer class') self._name = None self.name = layerdef['tech_title'] self._gfw_env = None self.gfw_env = layerdef['gfw_env'] self._scratch_workspace = None self.scratch_workspace = os.path.join( settings.get_settings(self.gfw_env)['paths']['scratch_workspace'], self.name) self._layer_type = None self.layer_type = layerdef['type'] self._field_map = None self.field_map = layerdef['field_map'] self._source = None self.source = layerdef['source'] self._esri_service_output = None self.esri_service_output = layerdef['esri_service_output'] self._cartodb_service_output = None self.cartodb_service_output = layerdef['cartodb_service_output'] self._merge_where_field = None self.merge_where_field = layerdef['merge_where_field'] self._delete_features_input_where_clause = None self.delete_features_input_where_clause = layerdef[ 'delete_features_input_where_clause'] self._archive_output = None self.archive_output = layerdef['archive_output'] self._download_output = None self.download_output = layerdef['download_output'] self._transformation = None self.transformation = layerdef['transformation'] self._global_layer = None self.global_layer = layerdef['global_layer'] self._add_country_value = None self.add_country_value = layerdef['add_country_value'] self._vector_to_raster_output = None self.vector_to_raster_output = layerdef['vector_to_raster_output'] self._tile_cache_service = None self.tile_cache_service = layerdef['tile_cache_service'] self._post_process_script = None self.post_process_script = layerdef['post_process_script']
def post_process_script(self, p): if not p: p = None else: root_dir = settings.get_settings(self.gfw_env)['paths']['root_dir'] script_path = os.path.join(root_dir, 'postprocess', p) if not os.path.exists(script_path): logging.error('Post processing script {0} specified, but not ' 'in expected location {1}. Exiting'.format(p, script_path)) sys.exit(1) else: p = script_path self._post_process_script = p
def post_process_script(self, p): if not p: p = None else: root_dir = settings.get_settings(self.gfw_env)['paths']['root_dir'] script_path = os.path.join(root_dir, 'postprocess', p) if not os.path.exists(script_path): logging.error('Post processing script {0} specified, but not ' 'in expected location {1}. Exiting'.format( p, script_path)) sys.exit(1) else: p = script_path self._post_process_script = p
def __init__(self, layerdef): logging.debug('Starting datasource class') self.layerdef = layerdef self._name = None self.name = layerdef['tech_title'] self._data_source = None self.data_source = layerdef['source'] self._gfw_env = None self.gfw_env = layerdef['gfw_env'] self._download_workspace = None self.download_workspace = os.path.join(settings.get_settings(self.gfw_env)['paths']['scratch_workspace'], 'downloads', self.name)
def post_process(layerdef): """ Run the forma_layer script to trigger the post process Postprocess will download the update output from country page analysis, add headers, and reupload to S3. :param layerdef: the layerdef :return: """ logging.debug('Starting PostProcess') #Download GEE output to temp workspace today = datetime.datetime.today().strftime('%Y-%m-%d') gee_path = 'gs://forma-2017/tmp/csv/forma_alerts_2012-01-01_{}.csvee_export.csv'.format( today) download_workspace = os.path.join( settings.get_settings('prod')['paths']['scratch_workspace'], 'downloads', 'forma') gsutil_cmd = 'gsutil cp {0} {1}'.format(gee_path, download_workspace) subprocess.Popen(gsutil_cmd, shell=True, stderr=subprocess.PIPE) logging.debug('Csv copied from Google to File') #Copy GEE output to S3 temp_file = download_workspace + '\\' + os.path.basename(gee_path) cmd = ['aws', 's3', 'cp', temp_file, 's3://gfw2-data/alerts-tsv/forma.csv'] subprocess.check_call(cmd) logging.debug('File copied to S3') #Charlie to trigger country page analyses #>>>>>>>>>>>>>>country analysis<<<<<<<<<<<<<<< #copy output from Country pages down #will be replaced with: current_s3_path = update_elastic.get_current_hadoop_output('forma', 's3') # today_folder = datetime.datetime.today().strftime('%Y%m%d') # current_s3_path = 's3://gfw2-data/alerts-tsv/temp/output-forma-summary-{}/part-'.format(today_folder) current_s3_path = 's3://gfw2-data/alerts-tsv/temp/output-forma-summary-20170630/part-' #add headers to country analysis output header_text = 'alert_delta,lat,long,country_iso,day,value' update_elastic.add_headers_to_s3(layerdef, current_s3_path, header_text) logging.debug('headers added to analysis output')
def __init__(self, layerdef): logging.debug('Starting datasource class') self.layerdef = layerdef self._name = None self.name = layerdef['tech_title'] self._data_source = None self.data_source = layerdef['source'] self._download_output = None self.download_output = layerdef['download_output'] self._carto_table = None self.carto_table = layerdef['cartodb_service_output'] self._gfw_env = None self.gfw_env = layerdef['gfw_env'] self._download_workspace = None self.download_workspace = os.path.join( settings.get_settings(self.gfw_env)['paths']['scratch_workspace'], 'downloads', self.name)
def get_layer(self): """ Download the source rasters from S3 :return: an updated layerdef with the local source for the layer.update() process """ raster_url_list = self.data_source.split(',') # always update if it's GLAD, not using s3 bucket system currently if self.name == 'umd_landsat_alerts': paths_dict = settings.get_settings(self.gfw_env)['paths'] scratch_workspace = os.path.join(paths_dict['scratch_workspace'], self.name) download_glad_gee.download(scratch_workspace) updated_raster_url_list = self.find_updated_data(raster_url_list) if updated_raster_url_list: output_list = [] for ras in updated_raster_url_list: out_file = self.download_file(ras, self.download_workspace) output_list.append(out_file) self.layerdef['source'] = output_list else: # Important for the script that reads the log file and sends an email # Including this 'Checked' message will show that we checked the layer but it didn't need updating logging.debug( 'Checked S3 bucket, no new data as compared to last timestamp in gfw-sync2 config' ) logging.critical('Checked | {0}'.format(self.name)) sys.exit(0) return self.layerdef