def processing_std_spi_monthly(res_queue, pipeline_run_level=0,pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None, logfile=None): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_spi_monthly') proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates, proc_lists=proc_lists) if write2file is not None: fwrite_id=open(write2file,'w') else: fwrite_id=None if pipeline_run_level > 0: spec_logger.info("Run the pipeline %s" % 'processing_std_spi_monthly') pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join(es_constants.log_dir,'.ruffus_history_spi_monthly.sqlite'),\ checksum_level=0) spec_logger.info("After running the pipeline %s" % 'processing_std_precip_spi_monthly') if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id, history_file=os.path.join(es_constants.log_dir,'.ruffus_history_spi_monthly.sqlite'),\ checksum_level=0) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') if write2file is not None: fwrite_id.close() #res_queue.put(proc_lists) return True
def processing_std_seas_cum(res_queue, pipeline_run_level=0,pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', native_mapset='', version='', mapset=None, starting_dates=None, write2file=None, logfile=None): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_seas_cum') proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, native_mapset=native_mapset, version=version, mapset=mapset, starting_dates=starting_dates, proc_lists=proc_lists, logger=spec_logger) if write2file is not None: fwrite_id=open(write2file,'w') else: fwrite_id=None if pipeline_run_level > 0: spec_logger.info("Run the pipeline %s" % 'processing_std_precip') pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join(es_constants.log_dir,'.ruffus_history.sqlite')) tasks = pipeline_get_task_names() spec_logger.info("Run the pipeline %s" % tasks[0]) spec_logger.info("After running the pipeline %s" % 'processing_std_precip') if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') if write2file is not None: fwrite_id.close() return True
def processing_std_gradient(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None, logfile=None, touch_files_only=False): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_gradient') proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates) if write2file is not None: fwrite_id=open(write2file,'w') else: fwrite_id=None if pipeline_run_level > 0: pipeline_run(verbose=pipeline_run_level, logger=spec_logger, history_file=os.path.join(es_constants.log_dir,'.ruffus_history_std_chla_gradient.sqlite'),touch_files_only=touch_files_only, checksum_level=0) if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') #res_queue.put(proc_lists) return None
def clean_corrupted_files(check_directory, logger=None, dry_run=False): # Check logger if logger is None: logger = log.my_logger(__name__) # Get the existing dates for the dataset logger.info("Entering routine %s " % 'clean_corrupted_files') # Get list of files list_files = [] for root, dirnames, filenames in os.walk(check_directory): for filename in fnmatch.filter(filenames, '*.tif'): list_files.append(os.path.join(root, filename)) if len(list_files) > 0: for my_file in list_files: logger.debug('Checking file: {0}'.format(my_file)) # Check the file by using gdalinfo command = ['gdalinfo', my_file] status = subprocess.call(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if status: logger.info('Error in file: {0}'.format(my_file)) if not dry_run: os.remove(my_file) logger.info('File removed: {0}'.format(my_file)) else: logger.info( 'Not removing file {0} - Dry Run'.format(my_file))
def processing_std_monavg(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None, logfile=None): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_monavg') history_file = os.path.join(es_constants.log_dir, '.ruffus_history_{0}_{1}.sqlite').format(prod, starting_sprod) proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates, logger=spec_logger) if write2file is not None: fwrite_id = open(write2file, 'w') else: fwrite_id = None spec_logger.info("Entering routine %s" % 'processing_monavg') if pipeline_run_level > 0: spec_logger.info("Now calling pipeline_run") # Option to be added to pipeline_run to force files to appear up-to-date: touch_files_only = True pipeline_run(verbose=pipeline_run_level, history_file=history_file, checksum_level=0, touch_files_only=False) if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg')
def processing_std_lsasaf_et(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', native_mapset='', mapset='', version='', starting_dates=None, write2file=None, logfile=None): native_mapset = 'MSG-satellite-3km' target_mapset = mapset spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_lsasaf_et') proc_lists = None proc_lists = create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=starting_dates, proc_lists=proc_lists) if write2file is not None: fwrite_id = open(write2file, 'w') else: fwrite_id = None if pipeline_run_level > 0: spec_logger.info("Run the pipeline %s" % 'processing_std_lsasaf_et') pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join( es_constants.log_dir, '.ruffus_history_lsasaf_et.sqlite'), checksum_level=0) tasks = pipeline_get_task_names() spec_logger.info("After running the pipeline %s" % 'processing_std_lsasaf_et') if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') if write2file is not None: fwrite_id.close() #res_queue.put(proc_lists) return True
def delpid(self): # Change to deal with forking in processing (otherwise the pidfile is deleted by child process) logger = log.my_logger("lib.python.daemon") #my_pid=os.getpgid #logger.info("My Pid: %i" % my_pid) #pid_file=open(self.pidfile) #pid = pid_file.read() #logger.info("Pid: %i" % pid) #if pid == my_pid: logger.info("Removing the Pid") os.remove(self.pidfile)
def processing_std_olci_wrr(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, nrt_products=True, write2file=None, logfile=None, touch_files_only=False): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_olci_wrr') proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates, nrt_products=nrt_products, logger=spec_logger) if write2file is not None: fwrite_id = open(write2file, 'w') else: fwrite_id = None if pipeline_run_level > 0: pipeline_run(verbose=pipeline_run_level, logger=spec_logger, touch_files_only=touch_files_only, history_file=os.path.join( es_constants.log_dir, '.ruffus_history_' + prod + '_' + version + '.sqlite'), checksum_level=0) if pipeline_printout_level > 0: pipeline_printout( verbose=pipeline_printout_level) #, output_stream=fout) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') if write2file is not None: fwrite_id.close() #res_queue.put(proc_lists) return True
class TestFunctionsPickle(TestCase): logger = log.my_logger(__name__) processed_info_filename = es_constants.get_eumetcast_processed_list_prefix+'Test_info' processed_info = {'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now()} def test_write_pickle(self): self.logger.info('Pickle filename is: %s', self.processed_info_filename) f.dump_obj_to_pickle(self.processed_info, self.processed_info_filename)
def processing_std_modis_pp(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None, logfile=None, nrt_products=True, update_stats=False): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_msg_mpe') create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, nrt_products=nrt_products, update_stats=update_stats) spec_logger.info("Entering routine %s" % 'processing modis - Primary Production') if pipeline_run_level > 0: spec_logger.info("Now calling pipeline_run") pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join( es_constants.log_dir, '.ruffus_history_modis_pp.sqlite'), checksum_level=0) if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') return True
def processing_std_rain_onset(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None, logfile=None): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_rain_onset') create_pipeline(prod, starting_sprod, mapset, version, starting_dates=starting_dates, proc_lists=None) spec_logger.info("Entering routine %s" % 'processing rain onset') if pipeline_run_level > 0: spec_logger.info("Now calling pipeline_run") pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join( es_constants.log_dir, '.ruffus_history_modis_pp.sqlite'), checksum_level=0, one_second_per_job=True, multiprocess=1, multithread=0) if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg')
def remove_old_files(productcode, subproductcode, version, mapsetcode, product_type, nmonths, logger=None): # Check logger if logger is None: logger = log.my_logger(__name__) # Get the existing dates for the dataset logger.info("Entering routine %s " % 'remove_old_files') # Check the installation type sysSettings = functions.getSystemSettings() if sysSettings['type_installation'] == 'Server': logger.info("File housekeeping not done on Server ") return prod_subdir = functions.set_path_sub_directory(productcode, subproductcode, product_type, version, mapsetcode) prod_dir = es_constants.es2globals[ 'processing_dir'] + os.path.sep + prod_subdir list_files = sorted(glob.glob(prod_dir + os.path.sep + '*.tif')) # Define the earliest date to be kept month_now = datetime.date.today().month year_now = datetime.date.today().year for my_file in list_files: # Extract the date date = functions.get_date_from_path_filename(os.path.basename(my_file)) date_yyyy = int(date[0:4]) date_month = int(date[4:6]) if date_yyyy < year_now or (date_month + nmonths) <= month_now: logger.debug("Deleting file %s " % my_file) os.remove(my_file)
def processing_modis_chla(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None, logfile=None): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_fronts') proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates) if write2file is not None: fwrite_id = open(write2file, 'w') else: fwrite_id = None spec_logger.info("Entering routine %s" % 'processing_modis') if pipeline_run_level > 0: spec_logger.info("Now calling pipeline_run") pipeline_run(verbose=pipeline_run_level) if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg')
def loop_get_internet(dry_run=False, test_one_source=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving data from INTERNET.") while True: output_dir = es_constants.get_internet_output_dir logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): # ToDo: create output_dir - ingest directory logger.fatal( "The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.processed_list_int_dir): os.mkdir(es_constants.processed_list_int_dir) while 1: # Check internet connection (or continue) if not functions.internet_on(): logger.error( "The computer is not currently connected to the internet. Wait 1 minute." ) time.sleep(60) else: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning( "Sleep time not defined. Setting to default=1min. Continue." ) time_sleep = 60 logger.info( "Reading active INTERNET data sources from database") internet_sources_list = querydb.get_active_internet_sources() # Loop over active triggers for internet_source in internet_sources_list: try: if test_one_source and (internet_source.internet_id != test_one_source): logger.info( "Running in test mode, and source is not %s. Continue.", test_one_source) continue execute_trigger = True # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table, # so that it can be exploited by eumetcast triggers as well). It is in minute pull_frequency = internet_source.pull_frequency # Manage the case of files to be continuously downloaded (delay < 0) if pull_frequency < 0: do_not_consider_processed_list = True delay_time_source_minutes = -pull_frequency else: do_not_consider_processed_list = False delay_time_source_minutes = pull_frequency if sys.platform == 'win32': internet_id = str( internet_source.internet_id).replace(':', '_') else: internet_id = str(internet_source.internet_id) logger_spec = log.my_logger('apps.get_internet.' + internet_id) logger.info("Processing internet source %s.", internet_source.descriptive_name) # Create objects for list and info processed_info_filename = es_constants.get_internet_processed_list_prefix + str( internet_id) + '.info' # Restore/Create Info processed_info = None processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) if processed_info is not None: # Check the delay current_delta = datetime.datetime.now( ) - processed_info['time_latest_exec'] current_delta_minutes = int(current_delta.seconds / 60) if current_delta_minutes < delay_time_source_minutes: logger.debug( "Still waiting up to %i minute - since latest execution.", delay_time_source_minutes) execute_trigger = False else: # Create processed_info object processed_info = { 'lenght_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } execute_trigger = True if execute_trigger: # Restore/Create List processed_list = [] if not do_not_consider_processed_list: processed_list_filename = es_constants.get_internet_processed_list_prefix + internet_id + '.list' processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) processed_info[ 'time_latest_exec'] = datetime.datetime.now() logger.debug( "Create current list of file to process for source %s.", internet_source.internet_id) if internet_source.user_name is None: user_name = "anonymous" else: user_name = internet_source.user_name if internet_source.password is None: password = "******" else: password = internet_source.password usr_pwd = str(user_name) + ':' + str(password) logger_spec.debug(" Url is %s.", internet_source.url) logger_spec.debug(" usr/pwd is %s.", usr_pwd) logger_spec.debug( " regex is %s.", internet_source.include_files_expression) internet_type = internet_source.type if internet_type == 'ftp' or internet_type == 'http': # Manage the end_date (added for MODIS_FIRMS) if (internet_source.end_date != ''): end_date = internet_source.end_date else: end_date = None # Note that the following list might contain sub-dirs (it reflects full_regex) try: current_list = get_list_matching_files( str(internet_source.url), str(usr_pwd), str(internet_source. include_files_expression), internet_type, end_date=end_date) except: logger.error( "Error in creating file lists. Continue" ) continue elif internet_type == 'http_tmpl': # Create the full filename from a 'template' which contains try: current_list = build_list_matching_files_tmpl( str(internet_source.url), str(internet_source. include_files_expression), internet_source.start_date, internet_source.end_date, str(internet_source.frequency_id)) except: logger.error( "Error in creating date lists. Continue" ) continue elif internet_type == 'motu_client': # Create the full filename from a 'template' which contains try: current_list = build_list_matching_files_motu( str(internet_source.url), str(internet_source. include_files_expression), internet_source.start_date, internet_source.end_date, str(internet_source.frequency_id), str(internet_source.user_name), str(internet_source.password), str(internet_source. files_filter_expression), ) except: logger.error( "Error in creating motu_client lists. Continue" ) continue # elif internet_type == 'sentinel_sat': # # Create the full filename from a 'template' which contains # try: # current_list = build_list_matching_files_sentinel_sat(str(internet_source.url), # str(internet_source.include_files_expression), # internet_source.start_date, # internet_source.end_date, # str(internet_source.frequency_id), # str(internet_source.user_name), # str(internet_source.password), # #str(internet_source.files_filter_expression), # ) # # except: # logger.error("Error in creating sentinel_sat lists. Continue") # continue elif internet_type == 'local': logger.info( "This internet source is meant to copy data on local filesystem" ) try: current_list = get_list_matching_files_dir_local( str(internet_source.url), str(internet_source. include_files_expression)) except: logger.error( "Error in creating date lists. Continue" ) continue elif internet_type == 'offline': logger.info( "This internet source is meant to work offline (GoogleDrive)" ) current_list = [] else: logger.error( "No correct type for this internet source type: %s" % internet_type) current_list = [] logger_spec.debug( "Number of files currently available for source %s is %i", internet_id, len(current_list)) if len(current_list) > 0: logger_spec.debug( "Number of files already copied for trigger %s is %i", internet_id, len(processed_list)) listtoprocess = [] for current_file in current_list: if len(processed_list) == 0: listtoprocess.append(current_file) else: #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !! if current_file not in processed_list: listtoprocess.append(current_file) logger_spec.debug( "Number of files to be copied for trigger %s is %i", internet_id, len(listtoprocess)) if listtoprocess != set([]): # # Debug # toprint='' # for elem in listtoprocess: # toprint+=elem+',' # logger_spec.info('List in get_list_matching_files: %s' % toprint) logger_spec.debug( "Loop on the found files.") if not dry_run: for filename in list(listtoprocess): logger_spec.debug( "Processing file: " + str(internet_source.url) + os.path.sep + filename) try: if internet_type == 'local': shutil.copyfile( str(internet_source[ 'url']) + os.path.sep + filename, es_constants.ingest_dir + os.path.basename( filename)) result = 0 elif internet_type == 'motu_client': result = get_file_from_motu_command( str(filename), #target_file=internet_source.files_filter_expression, target_dir=es_constants .ingest_dir, userpwd=str(usr_pwd)) # elif internet_type == 'sentinel_sat': # result = get_file_from_sentinelsat_url(str(filename), # target_dir=es_constants.ingest_dir) else: result = get_file_from_url( str(internet_source.url ) + os.path.sep + filename, target_file=os.path. basename(filename), target_dir=es_constants .ingest_dir, userpwd=str(usr_pwd)) if not result: logger_spec.info( "File %s copied.", filename) processed_list.append( filename) else: logger_spec.warning( "File %s not copied: ", filename) except: logger_spec.warning( "Problem while copying file: %s.", filename) else: logger_spec.info( 'Dry_run is set: do not get files') if not dry_run: functions.dump_obj_to_pickle( processed_list, processed_list_filename) functions.dump_obj_to_pickle( processed_info, processed_info_filename) sleep(float(user_def_sleep)) # Loop over sources except Exception as inst: logger.error( "Error while processing source %s. Continue" % internet_source.descriptive_name) sleep(float(user_def_sleep)) exit(0)
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0, input_products='', output_product='', mapset='', logfile=None): if logfile: spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_merge') # Dummy return arguments proc_lists = functions.ProcLists() list_subprods = proc_lists.list_subprods list_subprod_groups = proc_lists.list_subprod_groups es2_data_dir = es_constants.processing_dir+os.path.sep # Do some checks on the integrity of the inputs # Manage output_product data out_product_code = output_product[0].productcode out_sub_product_code = output_product[0].subproductcode out_version = output_product[0].version out_mapset = output_product[0].mapsetcode out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset) out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext) out_dir = es2_data_dir + out_subdir # Check the output product directory functions.check_output_dir(out_dir) # Fill the processing list -> some fields to be taken by innput products output_sprod_group=proc_lists.proc_add_subprod_group("merged") output_sprod=proc_lists.proc_add_subprod(out_sub_product_code, "merged", final=False, descriptive_name='undefined', description='undefined', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='10d', active_default=True) # Loop over the input products: for input in input_products: # Extract info from input product product_code = input.productcode sub_product_code = input.subproductcode version = input.version start_date = input.start_date end_date = input.end_date product_info = querydb.get_product_out_info_connect(productcode=product_code, subproductcode=sub_product_code, version=version) prod_type = product_info[0].product_type in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset) in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext) # Create the list of dates -> returns empty if start==end==None list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date) # If list_dates == None, look at all existing files if list_dates is None: print ('To be Done !!!') # Otherwise, build list of files from list of dates else: for my_date in list_dates: in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident out_file_path = out_dir+my_date+out_prod_ident # Create the link status = functions.create_sym_link(in_file_path, out_file_path, force=False) if status == 0 and logfile: spec_logger.info("Merged file %s created" % out_file_path) return list_subprods, list_subprod_groups
from __future__ import division from future import standard_library standard_library.install_aliases() __author__ = 'analyst' # # purpose: Run the script to copy data from an external disk to /data/processing # author: M.Clerici # date: 13.02.2019 # descr: To be used for feeding an offline computer (e.g. for Training) with a subset from a disk # # history: 1.0 # import sys, os import glob from lib.python import es_logging as log logger = log.my_logger('apps.es2system.ingest_archive') def copy_data_disk(input_dir=None, dry_run=False): target_dir = '/data/processing/exchange/test_data/' # Define the list products/version/mapsets prod_list = [] prod_list.append({ 'prod': 'arc2-rain', 'version': '2.0', 'mapset': 'ARC2-Africa-11km', 'regex': '201*',
_author__ = "Marco Clerici" import sys # import os, time from config import es_constants # from apps.acquisition import ingestion from apps.acquisition import acquisition from lib.python import es_logging as log logger = log.my_logger('apps.acquisition.ingestion') try: command = str(sys.argv[1]) except: logger.fatal("An argument should be provided: status/start/stop") exit(1) # Define pid file and create daemon pid_file = es_constants.ingestion_pid_filename daemon = acquisition.IngestionDaemon(pid_file, dry_run=0) if command == "status": status = daemon.status() print("Current status of the Service: %s" % status) if command == "start": logger.info("Starting ingestion service") daemon.start() if command == "stop": logger.info("Stopping ingestion service") daemon.stop()
_author__ = "Marco Clerici" import sys from config import es_constants from apps.es2system import es2system from lib.python import es_logging as log logger = log.my_logger("apps.es2system.es2system") try: command = str(sys.argv[1]) except: logger.fatal("An argument should be provided: status/start/stop") exit(1) # Define pid file and create daemon pid_file = es_constants.system_pid_filename daemon = es2system.SystemDaemon(pid_file, dry_run=0) if command == "status": status = daemon.status() print("Current status of the Service: %s" % status) if command == "start": logger.info("Starting System service") daemon.start() if command == "stop": logger.info("Stopping System service") daemon.stop()
def loop_eumetcast(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving EUMETCast data.") while True: logger.debug("Check if the EUMETCast input directory : %s exists.", input_dir) if not os.path.exists(input_dir): logger.error("The EUMETCast input directory : %s is not yet mounted.", input_dir) logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir) # TODO Jurvtk: Create the Ingest Server output directory if it doesn't exist! exit(1) if not os.path.exists(es_constants.base_tmp_dir): os.mkdir(es_constants.base_tmp_dir) if not os.path.exists(es_constants.processed_list_base_dir): os.mkdir(es_constants.processed_list_base_dir) if not os.path.exists(es_constants.processed_list_eum_dir): os.mkdir(es_constants.processed_list_eum_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning("Sleep time not defined. Setting to default=1min. Continue.") time_sleep = 60 # try: logger.debug("Reading active EUMETCAST data sources from database") eumetcast_sources_list = querydb.get_eumetcast_sources(echo=echo_query) logger.debug("N. %i active EUMETCAST data sources found", len(eumetcast_sources_list)) # Loop over active triggers for eumetcast_source in eumetcast_sources_list: # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_eumetcast.'+eumetcast_source.eumetcast_id) logger.debug("Processing eumetcast source %s.", eumetcast_source.eumetcast_id) processed_list_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.info' # Create objects for list and info processed_list = [] processed_info = {'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now()} logger.debug("Loading the processed file list for source %s ", eumetcast_source.eumetcast_id) # Restore/Create List processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename) # Restore/Create Info processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec']=datetime.datetime.now() logger.debug("Create current list of file to process for trigger %s.", eumetcast_source.eumetcast_id) current_list = find_files(input_dir, eumetcast_source.filter_expression_jrc) #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) logger_spec.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) if len(current_list) > 0: #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): if os.path.isfile(os.path.join(input_dir, filename)): if os.stat(os.path.join(input_dir, filename)).st_mtime < int(time.time()): logger_spec.debug("Processing file: "+os.path.basename(filename)) if not dry_run: if commands.getstatusoutput("cp " + filename + " " + output_dir + os.sep + os.path.basename(filename))[0] == 0: logger.info("File %s copied.", filename) processed_list.append(filename) # Update processing info processed_info['time_latest_copy']=datetime.datetime.now() processed_info['length_proc_list']=len(processed_list) else: logger_spec.warning("Problem while copying file: %s.", filename) else: logger_spec.info('Dry_run is set: do not get files') else: logger_spec.error("File %s removed by the system before being processed.", filename) else: logger.debug("Nothing to process - go to next trigger.") pass for infile in processed_list: if not os.path.exists(infile): processed_list.remove(infile) if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) time.sleep(float(10)) # except Exception, e: # logger.fatal(str(e)) # exit(1) exit(0)
def processing_modis_pp(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, input_products='', output_product='', write2file=None, logfile=None, nrt_products=True, update_stats=True): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_modis_pp') create_pipeline(input_products, output_product, logfile=logfile, nrt_products=nrt_products, update_stats=update_stats) spec_logger.info("Entering routine %s" % 'processing modis - Primary Production') if pipeline_run_level > 0: spec_logger.info("Now calling pipeline_run") pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join( es_constants.log_dir, '.ruffus_history_modis_pp.sqlite'), checksum_level=0) if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') # def processing_modis_pp_stats_only(res_queue, pipeline_run_level=0, pipeline_printout_level=0, # pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', # starting_dates=None, write2file=None, logfile=None, input_products='', # output_product=''): # result = processing_modis_pp(res_queue, pipeline_run_level=pipeline_run_level, # pipeline_printout_level=pipeline_printout_level, # pipeline_printout_graph_level=pipeline_printout_graph_level, # write2file=write2file, # logfile=logfile, # nrt_products=False, # update_stats=True, # input_products=input_products, # output_product=output_product # ) # # return result # # # def processing_modis_pp_only(res_queue, pipeline_run_level=0, pipeline_printout_level=0, # pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', # starting_dates=None, write2file=None, logfile=None, input_products='', output_product=''): # result = processing_modis_pp(res_queue, pipeline_run_level=pipeline_run_level, # pipeline_printout_level=pipeline_printout_level, # pipeline_printout_graph_level=pipeline_printout_graph_level, # write2file=write2file, # logfile=logfile, # nrt_products=True, # update_stats=False, # input_products=input_products, # output_product=output_product # ) # # return result # # # def processing_modis_pp_all(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, # prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None, # logfile=None, input_products='', output_product=''): # result = processing_modis_pp(res_queue, pipeline_run_level=pipeline_run_level, # pipeline_printout_level=pipeline_printout_level, # pipeline_printout_graph_level=pipeline_printout_graph_level, # write2file=write2file, # logfile=logfile, # nrt_products=True, # update_stats=True, # input_products=input_products, # output_product=output_product # ) # # return result
def create_pipeline(input_products, output_product, logfile=None, nrt_products=True, update_stats=False): proc_lists = None if proc_lists is None: proc_lists = functions.ProcLists() spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_modis_pp') # Set DEFAULTS: all off activate_pp_comput = 0 # PP from Chla, SST, Kd490 and PAR activate_stats_comput = 0 # Stats computation (inter-annual clim, min, max) activate_anomalies_comput = 0 # Anomalies computation (not yet done!!) # switch wrt groups - according to options if nrt_products: activate_pp_comput = 1 # PP from Chla, SST, Kd490 and PAR if update_stats: activate_stats_comput = 1 activate_anomalies_comput = 1 activate_pp_prod_comput = 1 activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 # --------------------------------------------------------------------- # Create lists # my_date='20160601' my_date = '' es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Parse the arguments and extract the 4 input variables # if len(input_products) != 4: spec_logger.error('Modis PP computation requires 4 inputs. Exit') return 1 found_chla = False found_sst = False found_par = False found_kd490 = False for input_product in input_products: if re.search('.*chla.*', input_product.productcode): found_chla = True chla_prod = input_product.productcode chla_version = input_product.version chla_sprod = input_product.subproductcode chla_mapset = input_product.mapsetcode chla_prod_ident = functions.set_path_filename_no_date( chla_prod, chla_sprod, chla_mapset, chla_version, ext) chla_input_dir = es2_data_dir + \ functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version, chla_mapset) if re.search('.*sst.*', input_product.productcode): found_sst = True sst_prod = input_product.productcode sst_version = input_product.version sst_sprod = input_product.subproductcode sst_mapset = input_product.mapsetcode sst_prod_ident = functions.set_path_filename_no_date( sst_prod, sst_sprod, sst_mapset, sst_version, ext) sst_input_dir = es2_data_dir + \ functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset) if re.search('.*kd490.*', input_product.productcode): found_kd490 = True kd490_prod = input_product.productcode kd490_version = input_product.version kd490_sprod = input_product.subproductcode kd490_mapset = input_product.mapsetcode kd490_prod_ident = functions.set_path_filename_no_date( kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext) kd490_input_dir = es2_data_dir + \ functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version, kd490_mapset) if re.search('.*par.*', input_product.productcode): found_par = True par_prod = input_product.productcode par_version = input_product.version par_sprod = input_product.subproductcode par_mapset = input_product.mapsetcode par_prod_ident = functions.set_path_filename_no_date( par_prod, par_sprod, par_mapset, par_version, ext) par_input_dir = es2_data_dir + \ functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset) # Check consistency of inputs if not (found_chla) or not (found_kd490) or not (found_par) or not ( found_sst): spec_logger.error('At least one of 4 expected inputs missing. Exit') return 1 if chla_mapset != sst_mapset or chla_mapset != kd490_mapset or chla_mapset != par_mapset: spec_logger.error('All 4 input mapset must be equals. Exit') return 1 # Read input product nodata chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode=chla_sprod, version=chla_version) chla_product_info = functions.list_to_element(chla_prod_info) chla_nodata = chla_product_info.nodata chla_frequency = chla_product_info.frequency_id sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode=sst_sprod, version=sst_version) sst_product_info = functions.list_to_element(sst_prod_info) sst_nodata = sst_product_info.nodata kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod, subproductcode=kd490_sprod, version=kd490_version) kd_product_info = functions.list_to_element(kd_prod_info) kd_nodata = kd_product_info.nodata par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode=par_sprod, version=par_version) par_product_info = functions.list_to_element(par_prod_info) par_nodata = par_product_info.nodata # Define input files # if starting_dates is not None: # starting_files = [] # for my_date in starting_dates: # starting_files.append(input_dir+my_date+in_prod_ident) # else: # starting_files=input_dir+"*"+in_prod_ident # Define outputs output_nodata = -32767 old = False # NOTE: the prod/mapset/version are taken from the FIRST OUTPUT passed # subprod defined according to the frequency output_prod = output_product[0].productcode output_version = output_product[0].version output_mapset = output_product[0].mapsetcode if old: # Get the first output -> PP subproduct generated (8daysavg or monavg) output_sprod = output_product[0].subproductcode else: # Define the outputs according to the frequency (method in 'functions' to be created !!) if chla_frequency == 'e1month': frequency_string = 'monthly' output_sprod = 'monavg' output_sprod_clim = '1monclim' output_sprod_min = '1monmin' output_sprod_max = '1monmax' sub_product_group = '1monstat' elif chla_frequency == 'e1modis8day': frequency_string = '8 days' output_sprod = '8daysavg' activate_pp_stats_clim_comput = 1 activate_pp_stats_min_comput = 1 activate_pp_stats_max_comput = 1 sub_product_group = '8daysstat' output_sprod_clim = '8daysclim' output_sprod_min = '8daysmin' output_sprod_max = '8daysmax' else: spec_logger.error('Frequency not recognized: %s. Exit!', chla_frequency) return out_prod_ident = functions.set_path_filename_no_date( output_prod, output_sprod, output_mapset, output_version, ext) output_subdir = functions.set_path_sub_directory(output_prod, output_sprod, 'Derived', output_version, output_mapset) # Fixes ES2-36 def generate_input_files_pp(): # Take kd490 as starting point kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident input_files = sorted(glob.glob(kd_files)) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) ancillary_chla = chla_input_dir + mydate + chla_prod_ident ancillary_par = par_input_dir + mydate + par_prod_ident ancillary_sst = sst_input_dir + mydate + sst_prod_ident do_comp = True if not os.path.isfile(ancillary_chla): do_comp = False if not os.path.isfile(ancillary_par): do_comp = False if not os.path.isfile(ancillary_sst): do_comp = False if do_comp is True: output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident my_inputs = (input_file, ancillary_chla, ancillary_par, ancillary_sst) yield (my_inputs, output_file) @active_if(activate_pp_comput) @files(generate_input_files_pp) def modis_pp_comp(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0], "par_file": input_file[2], \ "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \ "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata, "output_format": 'GTIFF', \ "output_type": None, "options": "compress=lzw"} raster_image_math.do_compute_primary_production(**args) # --------------------------------------------------------------------- # Climatology (inter-annual average) prod = output_prod mapset = output_mapset new_input_subprod = output_sprod version = output_version in_prod_ident = functions.set_path_filename_no_date( prod, new_input_subprod, mapset, version, ext) in_prod_subdir = functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset) starting_files = es2_data_dir + in_prod_subdir + "*" + in_prod_ident output_sprod_group = proc_lists.proc_add_subprod_group(sub_product_group) output_sprod = proc_lists.proc_add_subprod( output_sprod_clim, sub_product_group, final=False, descriptive_name='Inter-annual Climatology at ' + frequency_string + ' frequency', description='Inter-annual Climatology at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_clim = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) output_subdir_clim = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_clim + "{MMDD[0]}" + out_prod_ident_clim ] # Fixes ES2-304 def generate_input_files_pp_stats(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_clim + os.path.sep + MMDD_nonleap + out_prod_ident_clim yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @files(generate_input_files_pp_stats) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_clim_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_clim(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_avg_image(**args) # # --------------------------------------------------------------------- # # Minimum output_sprod = proc_lists.proc_add_subprod( output_sprod_min, sub_product_group, final=False, descriptive_name='Inter-annual Minimum at ' + frequency_string + ' frequency', description='Inter-annual Minimum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_min = functions.set_path_filename_no_date( prod, output_sprod_min, mapset, version, ext) output_subdir_min = functions.set_path_sub_directory( prod, output_sprod_min, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_min + "{MMDD[0]}" + out_prod_ident_min ] def generate_input_files_pp_stats_min(): # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314', # '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618', # '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914', # '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211', # '1219', '1227'] MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_min + os.path.sep + MMDD_nonleap + out_prod_ident_min yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @files(generate_input_files_pp_stats_min) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_min_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_min(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_min_image(**args) # # --------------------------------------------------------------------- # # Monthly Maximum output_sprod = proc_lists.proc_add_subprod( output_sprod_max, sub_product_group, final=False, descriptive_name='Inter-annual Maximum at ' + frequency_string + ' frequency', description='Inter-annual Maximum at ' + frequency_string + ' frequency', frequency_id=chla_frequency, date_format='MMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_max = functions.set_path_filename_no_date( prod, output_sprod_max, mapset, version, ext) output_subdir_max = functions.set_path_sub_directory( prod, output_sprod_max, 'Derived', version, mapset) formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_max + "{MMDD[0]}" + out_prod_ident_max ] def generate_input_files_pp_stats_max(): MMDD_nonleap_dict = { '0101': '0101', '0109': '0109', '0117': '0117', '0125': '0125', '0202': '0202', '0210': '0210', '0218': '0218', '0226': '0226', '0306': '0305', '0314': '0313', '0322': '0321', '0330': '0329', '0407': '0406', '0415': '0414', '0423': '0422', '0501': '0430', '0509': '0508', '0517': '0516', '0525': '0524', '0602': '0601', '0610': '0609', '0618': '0617', '0626': '0625', '0704': '0703', '0712': '0711', '0720': '0719', '0728': '0727', '0805': '0804', '0813': '0812', '0821': '0820', '0829': '0828', '0906': '0905', '0914': '0913', '0922': '0921', '0930': '0929', '1008': '1007', '1016': '1015', '1024': '1023', '1101': '1031', '1109': '1108', '1117': '1116', '1125': '1124', '1203': '1202', '1211': '1210', '1219': '1218', '1227': '1226' } # for MMDD_nonleap in MMDD_nonleap_list: for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items(): formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident nonleap_files = sorted(glob.glob(formatter_in_nonleap)) formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident leap_files = sorted(glob.glob(formatter_in_leap)) my_inputs = leap_files + nonleap_files input_files_unique = list(set(my_inputs)) output_file = es_constants.processing_dir + output_subdir_max + os.path.sep + MMDD_nonleap + out_prod_ident_max yield (input_files_unique, output_file) if frequency_string != 'monthly': @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @files(generate_input_files_pp_stats_max) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args) else: @follows(modis_pp_comp) @active_if(activate_stats_comput, activate_pp_stats_max_comput) @collate(starting_files, formatter(formatter_in), formatter_out) def std_yearly_max(input_file, output_file): output_file = functions.list_to_element(output_file) reduced_list = exclude_current_year(input_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": reduced_list, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw" } raster_image_math.do_max_image(**args)
def get_archives_eumetcast_ftp(): # Ad-hoc definitions (to be copied to settings file) source_id = 'MESA:JRC:Archives' filter_expression_mesa_jrc = 'MESA_JRC_.*.tif' # Get Access credentials ftp_eumetcast_url = es_constants.es2globals['ftp_eumetcast_url'] ftp_eumetcast_userpwd = es_constants.es2globals['ftp_eumetcast_userpwd'] # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_archives_eumetcast') logger.info("Retrieving MESA_JRC files from PC1.") if sys.platform == 'win32': source_id = source_id.replace(':', '_') #Pierluigi processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( source_id) + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( source_id) + '.info' # Create objects for list and info processed_list = [] processed_info = { 'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } logger.debug("Loading the processed file list for source %s ", source_id) # Restore/Create List processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) # Restore/Create Info processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec'] = datetime.datetime.now() logger.debug("Create current list of file to process for trigger %s.", source_id) try: current_list = get_list_matching_files(ftp_eumetcast_url, ftp_eumetcast_userpwd, filter_expression_mesa_jrc, 'ftp', my_logger=logger_spec) except: logger.error("Cannot connect to the PC1 via ftp. Wait 1 minute") current_list = [] time.sleep(60) logger_spec.info("Number of files currently on PC1 for trigger %s is %i", source_id, len(current_list)) if len(current_list) > 0: #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug( "Number of files already copied for trigger %s is %i", source_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.info("Number of files to be copied for trigger %s is %i", source_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): try: result = get_file_from_url( str(ftp_eumetcast_url) + os.path.sep + filename, target_file=os.path.basename(filename), target_dir=es_constants.ingest_dir, userpwd=str(ftp_eumetcast_userpwd)) if not result: logger_spec.info("File %s copied.", filename) processed_list.append(filename) else: logger_spec.warning("File %s not copied: ", filename) except: logger_spec.warning("Problem while copying file: %s.", filename) else: logger.debug("Nothing to process - go to next trigger.") pass for infile in processed_list: if not infile in current_list: processed_list.remove(infile) functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename)
_author__ = "Marco Clerici" import sys from config import es_constants from apps.acquisition import acquisition from lib.python import es_logging as log logger = log.my_logger("apps.acquisition.get_eumetcast") try: command = str(sys.argv[1]) except: logger.fatal("An argument should be provided: status/start/stop") exit(1) # Define pid file and create daemon pid_file = es_constants.get_eumetcast_pid_filename daemon = acquisition.GetEumetcastDaemon(pid_file, dry_run=False) if command == "status": status = daemon.status() print("Current status of the Service: %s" % status) if command == "start": logger.info("Starting Get EUMETCast service") daemon.start() if command == "stop": logger.info("Stopping Get EUMETCast service") daemon.stop()
def push_data_ftp(dry_run=False, user=None, psw=None, url=None, trg_dir=None, masked=True): # Synchronized data towards an ftp server (only for JRC) # It replaces, since the new srv-ies-ftp.jrc.it ftp is set, the bash script: mirror_to_ftp.sh # Configuration: it looks at all 'non-masked' products and pushes them # For the mapsets, find what is in the filesystem, and pushes only the 'largest' # It uses a command like: # lftp -e "mirror -RLe /data/processing/vgt-ndvi/sv2-pv2.1/SPOTV-Africa-1km/derived/10dmax-linearx2/ # /narma/eStation_2.0/processing/vgt-ndvi/sv2-pv2.1/SPOTV-Africa-1km/derived/10dmax-linearx2/;exit" # -u narma:JRCVRw2960 sftp://srv-ies-ftp.jrc.it"" >> /eStation2/log/push_data_ftp.log # spec_logger = log.my_logger('apps.es2system.push_data_ftp') try: from config import server_ftp except: logger.warning('Configuration file for ftp sync not found. Exit') return 1 if user is None: user = server_ftp.server['user'] if psw is None: psw = server_ftp.server['psw'] if url is None: url = server_ftp.server['url'] if trg_dir is None: trg_dir = server_ftp.server['data_dir'] # Create an ad-hoc file for the lftp command output (beside the standard logger) logfile = es_constants.es2globals['log_dir'] + 'push_data_ftp.log' message = time.strftime( "%Y-%m-%d %H:%M") + ' INFO: Running the ftp sync now ... \n' logger.debug("Entering routine %s" % 'push_data_ftp') # Loop over 'not-masked' products products = querydb.get_products(masked=False) # produts=products[21:23] # test a subset for row in products: prod_dict = functions.row2dict(row) productcode = prod_dict['productcode'] version = prod_dict['version'] spec_logger.info('Working on product {}/{}'.format( productcode, version)) # TEMP - For testing only # if productcode!='vgt-ndvi' or version !='sv2-pv2.2': # continue # Check it if is in the list of 'exclusions' defined in ./config/server_ftp.py key = '{}/{}'.format(productcode, version) skip = False if key in server_ftp.exclusions: skip = True logger.debug('Do not sync for {}/{}'.format(productcode, version)) p = Product(product_code=productcode, version=version) all_prod_mapsets = p.mapsets all_prod_subproducts = p.subproducts # Check there is at least one mapset and one subproduct if all_prod_mapsets.__len__() > 0 and all_prod_subproducts.__len__( ) > 0 and not skip: # In case of several mapsets, check if there is a 'larger' one if len(all_prod_mapsets) > 1: mapset_to_use = [] for my_mapset in all_prod_mapsets: mapset_info = querydb.get_mapset(mapsetcode=my_mapset, allrecs=False) if hasattr(mapset_info, "mapsetcode"): my_mapobj = MapSet() my_mapobj.assigndb(my_mapset) larger_mapset = my_mapobj.get_larger_mapset() if larger_mapset is not None: if larger_mapset not in mapset_to_use: mapset_to_use.append(larger_mapset) else: if my_mapset not in mapset_to_use: mapset_to_use.append(my_mapset) else: mapset_to_use = all_prod_mapsets # Loop over existing mapset for mapset in mapset_to_use: all_mapset_datasets = p.get_subproducts(mapset=mapset) # Loop over existing subproducts for subproductcode in all_mapset_datasets: # Get info - and ONLY for NOT masked products dataset_info = querydb.get_subproduct( productcode=productcode, version=version, subproductcode=subproductcode, masked=masked) # -> TRUE means only NOT masked sprods if dataset_info is not None: dataset_dict = functions.row2dict(dataset_info) dataset_dict['mapsetcode'] = mapset logger.debug('Working on {}/{}/{}/{}'.format( productcode, version, mapset, subproductcode)) subdir = functions.set_path_sub_directory( productcode, subproductcode, dataset_dict['product_type'], version, mapset) source = data_dir + subdir target = trg_dir + subdir # command = 'lftp -e "mirror -RLe {} {};exit" -u {}:{} {}"" >> {}'.format(source,target,user,psw,url,logfile) command = 'lftp -e "mirror -RLe {} {};exit" -u {}:{} {}"" >> /dev/null'.format( source, target, user, psw, url) logger.debug("Executing %s" % command) spec_logger.info( 'Working on mapset/subproduct {}/{} \n'.format( mapset, subproductcode)) # return try: status = os.system(command) if status: logger.error("Error in executing %s" % command) spec_logger.error("Error in executing %s" % command) except: logger.error( 'Error in executing command: {}'.format( command)) spec_logger.error( 'Error in executing command: {}'.format( command))
from builtins import str from builtins import object import sys import os import time import atexit import io # import signal import psutil if sys.platform != 'win32': from signal import SIGKILL, SIGTERM from lib.python import es_logging as log logger = log.my_logger("lib.python.daemon") from config import es_constants if not os.path.isdir(es_constants.pid_file_dir): os.makedirs(es_constants.pid_file_dir) if not os.path.isdir(es_constants.processed_list_base_dir): os.makedirs(es_constants.processed_list_base_dir) if not os.path.isdir(es_constants.processed_list_eum_dir): os.makedirs(es_constants.processed_list_eum_dir) if not os.path.isdir(es_constants.processed_list_int_dir): os.makedirs(es_constants.processed_list_int_dir) class Daemon(object): """ A generic daemon class.
def create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=None, proc_lists=None): # Create Logger logger = log.my_logger('log.lst') # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_1dmax_comput = 1 activate_10dmax_comput = 1 activate_10d15min_comput = 1 activate_10dmin_comput = 1 es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files ('lst' subproduct) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, native_mapset, version, ext) input_dir = es2_data_dir+ \ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident logger.info("starting_files %s" % starting_files) # ---------------------------------------------------------------------------------------------------------------- # 1dmax # Daily maximum from 15min lst, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("1dmax", "lsasaf-lst", final=False, descriptive_name='1d Maximum', description='Daily Maximum', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod = '1dmax' out_prod_ident_1dmax = functions.set_path_filename_no_date( prod, output_sprod, target_mapset, version, ext) output_subdir_1dmax = functions.set_path_sub_directory( prod, output_sprod, 'Derived', version, target_mapset) formatter_in_1dmax = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident formatter_out_1dmax = "{subpath[0][5]}" + os.path.sep + output_subdir_1dmax + "{YYYYMMDD[0]}" + out_prod_ident_1dmax # @active_if(activate_1dmax_comput) @collate(starting_files, formatter(formatter_in_1dmax), formatter_out_1dmax) def lsasaf_lst_1dmax(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_max_image(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # ---------------------------------------------------------------------------------------------------------------- # 10dmax # 10 Day maximum from daily max, on target mapset output_sprod = proc_lists.proc_add_subprod("10dmax", "lsasaf-lst", final=False, descriptive_name='10d Maximum', description='10d Maximum', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) output_sprod_10dmax = '10dmax' out_prod_ident_10dmax = functions.set_path_filename_no_date( prod, output_sprod_10dmax, target_mapset, version, ext) output_subdir_10dmax = functions.set_path_sub_directory( prod, output_sprod_10dmax, 'Derived', version, target_mapset) # # Define input files in_prod_10dmax = '1dmax' in_prod_ident_10dmax = functions.set_path_filename_no_date( prod, in_prod_10dmax, target_mapset, version, ext) # input_dir_10dmax = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dmax, 'Derived', version, target_mapset) # starting_files_10dmax = input_dir_10dmax + "*" + in_prod_ident_10dmax # def generate_parameters_10dmax(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files_10dmax) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) for input_file in input_files: basename = os.path.basename(input_file) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(input_file) output_file = es_constants.processing_dir + output_subdir_10dmax + os.path.sep + my_dekad_str + out_prod_ident_10dmax yield (file_list, output_file) # @active_if(activate_10dmax_comput) @files(generate_parameters_10dmax) def lsasaf_lst_10dmax(input_file, output_file): # output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_max_image(**args) # --------------------------------------------------------------------- # Dekad maximum for every 15min # NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset output_sprod = proc_lists.proc_add_subprod( "10d15min", "lsasaf-lst", final=False, descriptive_name='10day Maximum over 15 min', description='10day Maximum computed for every 15 min', frequency_id='e15minute', # Is it OK ??????? date_format='YYYYMMDDHHMM', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date( prod, output_sprod, native_mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, native_mapset) def generate_parameters_10d15min(): # Look for all input files in input_dir, and sort them input_files = glob.glob(starting_files) dekad_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd) if mydekad_nbr not in dekad_list: dekad_list.append(mydekad_nbr) dekad_list = sorted(dekad_list) # Compute the 'julian' dakad for the current day today = datetime.date.today() today_str = today.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) # Generate the list of 30 min time in a day timelist = [ datetime.time(h, m).strftime("%H%M") for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 15)) ] for time in timelist: files_for_time = glob.glob(input_dir + os.path.sep + '*' + time + in_prod_ident) for dekad in dekad_list: # Exclude the current dekad if dekad != dekad_now: file_list = [] my_dekad_str = functions.conv_dekad_2_date(dekad) output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident for myfile in files_for_time: basename = os.path.basename(myfile) mydate_yyyymmdd = functions.get_date_from_path_filename( basename) mydekad_nbr = functions.conv_date_2_dekad( mydate_yyyymmdd[0:8]) if mydekad_nbr == dekad: file_list.append(myfile) if len(file_list) > 8: yield (file_list, output_file) @active_if(activate_10d15min_comput) @files(generate_parameters_10d15min) def lsasaf_lst_10d15min(input_file, output_file): output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \ "options": "compress=lzw", "input_nodata":-32768} raster_image_math.do_max_image(**args) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, starting_sprod, version, native_mapset, 'Ingest', number_months_keep) # ---------------------------------------------------------------------------------------------------------------- # 10 day minimum (mm) # NOTE: this product is compute with re-projection, i.e. on the 'target' mapset output_sprod = proc_lists.proc_add_subprod( "10dmin", "lsasaf-et", final=False, descriptive_name='10day Minimum', description='10day minimum', frequency_id='e1dekad', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident_10dmin = functions.set_path_filename_no_date( prod, "10dmin", target_mapset, version, ext) output_subdir_10dmin = functions.set_path_sub_directory( prod, "10dmin", 'Derived', version, target_mapset) # Define input files in_prod_10dmin = '10d15min' in_prod_ident_10dmin = functions.set_path_filename_no_date( prod, in_prod_10dmin, native_mapset, version, ext) input_dir_10dmin = es_constants.processing_dir+ \ functions.set_path_sub_directory(prod, in_prod_10dmin, 'Derived', version, native_mapset) starting_files_10dmin = input_dir_10dmin + "*" + in_prod_ident_10dmin formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dmin formatter_out = [ "{subpath[0][5]}" + os.path.sep + output_subdir_10dmin + "{YYYYMMDD[0]}" + out_prod_ident_10dmin ] @follows(lsasaf_lst_10d15min) @active_if(activate_10dmin_comput) @collate(starting_files_10dmin, formatter(formatter_in), formatter_out) def lsasaf_lst_10dmin(input_file, output_file): output_file = functions.list_to_element(output_file) # Get the number of days of that dekad basename = os.path.basename(output_file) mydate = functions.get_date_from_path_filename(basename) functions.check_output_dir(os.path.dirname(output_file)) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='', dir=es_constants.base_tmp_dir) tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file) args = { "input_file": input_file, "output_file": tmp_output_file, "output_format": 'GTIFF', "options": "compress=lzw", "input_nodata": -32768 } raster_image_math.do_min_image(**args) reproject_output(tmp_output_file, native_mapset, target_mapset) shutil.rmtree(tmpdir) # Do also the house-keeping, by deleting the files older than 6 months number_months_keep = 6 remove_old_files(prod, '10d15min', version, native_mapset, 'Ingest', number_months_keep) return proc_lists
def loop_eumetcast(dry_run=False): global processed_list_filename, processed_list global processed_info_filename, processed_info signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGILL, signal_handler) logger.info("Starting retrieving EUMETCast data.") while True: logger.debug("Check if the EUMETCast input directory : %s exists.", input_dir) if not os.path.exists(input_dir): logger.error( "The EUMETCast input directory : %s is not yet mounted.", input_dir) logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir) if not os.path.exists(output_dir): logger.fatal( "The Ingest Server input directory : %s doesn't exists.", output_dir) exit(1) if not os.path.exists(es_constants.base_tmp_dir): os.mkdir(es_constants.base_tmp_dir) if not os.path.exists(es_constants.processed_list_base_dir): os.mkdir(es_constants.processed_list_base_dir) if not os.path.exists(es_constants.processed_list_eum_dir): os.mkdir(es_constants.processed_list_eum_dir) while 1: try: time_sleep = user_def_sleep logger.debug("Sleep time set to : %s.", time_sleep) except: logger.warning( "Sleep time not defined. Setting to default=1min. Continue." ) time_sleep = 60 # try: logger.debug("Reading active EUMETCAST data sources from database") eumetcast_sources_list = querydb.get_eumetcast_sources() logger.debug("N. %i active EUMETCAST data sources found", len(eumetcast_sources_list)) # Get the EUMETCast MESA_JRC files try: get_archives_eumetcast() except: logger.error( "Error in executing get_archives_eumetcast. Continue") # Loop over active triggers for eumetcast_source in eumetcast_sources_list: # Define a file_handler logger 'source-specific' (for GUI) logger_spec = log.my_logger('apps.get_eumetcast.' + eumetcast_source.eumetcast_id) logger.info("Processing eumetcast source %s.", eumetcast_source.eumetcast_id) if sys.platform == 'win32': # Pierluigi processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id).replace(':', '_') + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id).replace(':', '_') + '.info' else: processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id) + '.list' processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str( eumetcast_source.eumetcast_id) + '.info' # Create objects for list and info processed_list = [] processed_info = { 'length_proc_list': 0, 'time_latest_exec': datetime.datetime.now(), 'time_latest_copy': datetime.datetime.now() } logger.debug("Loading the processed file list for source %s ", eumetcast_source.eumetcast_id) # Restore/Create List processed_list = functions.restore_obj_from_pickle( processed_list, processed_list_filename) # Restore/Create Info processed_info = functions.restore_obj_from_pickle( processed_info, processed_info_filename) # Update processing time (in case it is restored) processed_info['time_latest_exec'] = datetime.datetime.now() logger.debug( "Create current list of file to process for trigger %s.", eumetcast_source.eumetcast_id) current_list = find_files( input_dir, eumetcast_source.filter_expression_jrc) #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) logger_spec.info( "Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list)) if len(current_list) > 0: #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) logger_spec.debug( "Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list)) listtoprocess = [] listtoprocess = set(current_list) - set(processed_list) #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) logger_spec.debug( "Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess)) if listtoprocess != set([]): logger_spec.debug("Loop on the found files.") for filename in list(listtoprocess): if os.path.isfile(os.path.join( input_dir, filename)): if os.stat(os.path.join( input_dir, filename)).st_mtime < int( time.time()): logger_spec.debug( "Processing file: " + os.path.basename(filename)) if not dry_run: if subprocess.getstatusoutput( "cp " + filename + " " + output_dir + os.sep + os.path.basename(filename) )[0] == 0: logger_spec.info( "File %s copied.", filename) processed_list.append(filename) # Update processing info processed_info[ 'time_latest_copy'] = datetime.datetime.now( ) processed_info[ 'length_proc_list'] = len( processed_list) else: logger_spec.warning( "Problem while copying file: %s.", filename) else: logger_spec.info( 'Dry_run is set: do not get files') else: logger_spec.error( "File %s removed by the system before being processed.", filename) else: logger.debug( "Nothing to process - go to next trigger.") pass for infile in processed_list: if not os.path.exists(infile): processed_list.remove(infile) if not dry_run: functions.dump_obj_to_pickle(processed_list, processed_list_filename) functions.dump_obj_to_pickle(processed_info, processed_info_filename) logger.info("End of Get EUMETCast loop. Sleep") time.sleep(float(time_sleep)) exit(0)
def processing_std_msg_mpe(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', native_mapset='', mapset='', version='', starting_dates=None, write2file=None, logfile=None, day_time=None): native_mapset = 'MSG-satellite-3km' target_mapset = mapset spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_msg_mpe') if day_time is None: day_time = '0600' proc_lists = None proc_lists = create_pipeline(prod, starting_sprod, native_mapset, target_mapset, version, starting_dates=starting_dates, proc_lists=proc_lists, day_time=day_time, logger=spec_logger) if write2file is not None: fwrite_id = open(write2file, 'w') else: fwrite_id = None if pipeline_run_level > 0: spec_logger.info("Run the pipeline %s" % 'processing_std_msg_mpe') # Option to be added to pipeline_run to force files to appear up-to-date: touch_files_only = True pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join( es_constants.log_dir, '.ruffus_history_msg_mpe.sqlite'), checksum_level=0) tasks = pipeline_get_task_names() spec_logger.info("After running the pipeline %s" % 'processing_std_msg_mpe') if pipeline_printout_level > 0: pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') if write2file is not None: fwrite_id.close() return True
import time # import eStation2 modules from lib.python import functions from lib.python import es_logging as log from config import es_constants from database import querydb from apps.es2system.GeoPortal import eStationTools as esTools from apps.es2system.GeoPortal import geoserverREST from apps.productmanagement import datasets from apps.productmanagement import products from lib.python.daemon import DaemonDryRunnable standard_library.install_aliases() logger = log.my_logger(__name__) local_data_dir = es_constants.es2globals['processing_dir'] remote_data_dir = geoserverREST.restBaseDir def syncGeoserver(): # # Copy some 'relevant' datasets to GeoServer # Selection of datasets is done on the basis of the product.geoserver table # # Get list of all 'relevant' subproducts (see 2. above) list_active_geoserver = esTools.get_activated_geoserver() # Loop over existing sub_products for geoserver_sprod in list_active_geoserver:
def reproject_output(input_file, native_mapset_id, target_mapset_id, output_dir=None, version=None, logger=None): # Check logger if logger is None: logger = log.my_logger(__name__) # Check output dir if output_dir is None: output_dir = es_constants.es2globals['processing_dir'] # Get the existing dates for the dataset logger.debug("Entering routine %s for file %s" % ('reproject_output', input_file)) ext = es_constants.ES2_OUTFILE_EXTENSION # Test the file/files exists if not os.path.isfile(input_file): logger.error('Input file: %s does not exist' % input_file) return 1 # Instance metadata object (for output_file) sds_meta_out = metadata.SdsMetadata() # Read metadata from input_file sds_meta_in = metadata.SdsMetadata() sds_meta_in.read_from_file(input_file) # Extract info from input file str_date = sds_meta_in.get_item('eStation2_date') product_code = sds_meta_in.get_item('eStation2_product') sub_product_code = sds_meta_in.get_item('eStation2_subProduct') # 22.06.2017 Add the option to force the version if version is None: version = sds_meta_in.get_item('eStation2_product_version') # Define output filename sub_dir = sds_meta_in.get_item('eStation2_subdir') # Fix a bug for 10davg-linearx2 metadata - and make method more robust if re.search('.*derived.*', sub_dir): product_type = 'Derived' elif re.search('.*tif.*', sub_dir): product_type = 'Ingest' # product_type = functions.get_product_type_from_subdir(sub_dir) out_prod_ident = functions.set_path_filename_no_date( product_code, sub_product_code, target_mapset_id, version, ext) output_subdir = functions.set_path_sub_directory(product_code, sub_product_code, product_type, version, target_mapset_id) output_file = output_dir+\ output_subdir +\ str_date +\ out_prod_ident # make sure output dir exists output_dir = os.path.split(output_file)[0] functions.check_output_dir(output_dir) # ------------------------------------------------------------------------- # Manage the geo-referencing associated to input file # ------------------------------------------------------------------------- orig_ds = gdal.Open(input_file, gdal.GA_Update) # Read the data type band = orig_ds.GetRasterBand(1) out_data_type_gdal = band.DataType if native_mapset_id != 'default': native_mapset = MapSet() native_mapset.assigndb(native_mapset_id) orig_cs = osr.SpatialReference( wkt=native_mapset.spatial_ref.ExportToWkt()) # Complement orig_ds info (necessary to Re-project) try: #orig_ds.SetGeoTransform(native_mapset.geo_transform) orig_ds.SetProjection(orig_cs.ExportToWkt()) except: logger.debug('Cannot set the geo-projection .. Continue') else: try: # Read geo-reference from input file orig_cs = osr.SpatialReference() orig_cs.ImportFromWkt(orig_ds.GetProjectionRef()) except: logger.debug('Cannot read geo-reference from file .. Continue') # TODO-M.C.: add a test on the mapset-id in DB table ! trg_mapset = MapSet() trg_mapset.assigndb(target_mapset_id) logger.debug('Target Mapset is: %s' % target_mapset_id) # ------------------------------------------------------------------------- # Generate the output file # ------------------------------------------------------------------------- # Prepare output driver out_driver = gdal.GetDriverByName(es_constants.ES2_OUTFILE_FORMAT) logger.debug('Doing re-projection to target mapset: %s' % trg_mapset.short_name) # Get target SRS from mapset out_cs = trg_mapset.spatial_ref out_size_x = trg_mapset.size_x out_size_y = trg_mapset.size_y # Create target in memory mem_driver = gdal.GetDriverByName('MEM') # Assign mapset to dataset in memory mem_ds = mem_driver.Create('', out_size_x, out_size_y, 1, out_data_type_gdal) mem_ds.SetGeoTransform(trg_mapset.geo_transform) mem_ds.SetProjection(out_cs.ExportToWkt()) # Apply Reproject-Image to the memory-driver orig_wkt = orig_cs.ExportToWkt() res = gdal.ReprojectImage(orig_ds, mem_ds, orig_wkt, out_cs.ExportToWkt(), es_constants.ES2_OUTFILE_INTERP_METHOD) logger.debug('Re-projection to target done.') # Read from the dataset in memory out_data = mem_ds.ReadAsArray() # Write to output_file trg_ds = out_driver.CreateCopy(output_file, mem_ds, 0, [es_constants.ES2_OUTFILE_OPTIONS]) trg_ds.GetRasterBand(1).WriteArray(out_data) # ------------------------------------------------------------------------- # Assign Metadata to the ingested file # ------------------------------------------------------------------------- # Close dataset trg_ds = None sds_meta_out.assign_es2_version() sds_meta_out.assign_mapset(target_mapset_id) sds_meta_out.assign_from_product(product_code, sub_product_code, version) sds_meta_out.assign_date(str_date) sds_meta_out.assign_subdir_from_fullpath(output_dir) sds_meta_out.assign_comput_time_now() # Copy the same input files as in the non-reprojected input file_list = sds_meta_in.get_item('eStation2_input_files') sds_meta_out.assign_input_files(file_list) # Write metadata to file sds_meta_out.write_to_file(output_file) # Return the filename return output_file
from lib.python import functions from lib.python import metadata from lib.python.image_proc import raster_image_math from lib.python.image_proc import recode from database import crud from database import querydb from lib.python import es_logging as log # This is temporary .. to be replace with a DB call from apps.processing.processing_switches import * # Import third-party modules from ruffus import * logger = log.my_logger(__name__) # General definitions for this processing chain prod="modis-pp" mapset='MODIS-IOC-4km' ext='.tif' version='undefined' # Primary Production Monthly activate_pp_1mon_comput=1 def create_pipeline(starting_sprod): # --------------------------------------------------------------------- # Define input files: Chla is the 'driver', sst,kd and par 'ancillary inputs'
def processing_std_dmp(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='', starting_dates=None, update_stats=False, nrt_products=True, write2file=None, logfile=None, touch_only=False): spec_logger = log.my_logger(logfile) spec_logger.info("Entering routine %s" % 'processing_std_dmp') proc_lists = None proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version, starting_dates=starting_dates, proc_lists=proc_lists, update_stats=update_stats, nrt_products=nrt_products) if write2file is not None: fwrite_id = open(write2file, 'w') else: fwrite_id = None if pipeline_run_level > 0: spec_logger.info("Run the pipeline %s" % 'processing_std_dmp') pipeline_run( touch_files_only=touch_only, verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file='/eStation2/log/.ruffus_history_{0}_{1}.sqlite'. format(prod, starting_sprod)) tasks = pipeline_get_task_names() spec_logger.info("Run the pipeline %s" % tasks[0]) spec_logger.info("After running the pipeline %s" % 'processing_std_dmp') if pipeline_printout_level > 0: pipeline_printout( verbose=pipeline_printout_level, output_stream=fwrite_id, history_file='/eStation2/log/.ruffus_history_{0}_{1}.sqlite'. format(prod, starting_sprod)) if pipeline_printout_graph_level > 0: pipeline_printout_graph('flowchart.jpg') if write2file is not None: fwrite_id.close() # res_queue.put(proc_lists) return True