def processing_std_spi_monthly(res_queue, pipeline_run_level=0,pipeline_printout_level=0,
                          pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='',
                          starting_dates=None, write2file=None, logfile=None):

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_spi_monthly')

    proc_lists = None
    proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version,
                                 starting_dates=starting_dates, proc_lists=proc_lists)

    if write2file is not None:
        fwrite_id=open(write2file,'w')
    else:
        fwrite_id=None

    if pipeline_run_level > 0:
        spec_logger.info("Run the pipeline %s" % 'processing_std_spi_monthly')
        pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join(es_constants.log_dir,'.ruffus_history_spi_monthly.sqlite'),\
                     checksum_level=0)
        spec_logger.info("After running the pipeline %s" % 'processing_std_precip_spi_monthly')

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id, history_file=os.path.join(es_constants.log_dir,'.ruffus_history_spi_monthly.sqlite'),\
                     checksum_level=0)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    if write2file is not None:
        fwrite_id.close()

    #res_queue.put(proc_lists)
    return True
Exemplo n.º 2
0
def processing_std_seas_cum(res_queue, pipeline_run_level=0,pipeline_printout_level=0,
                          pipeline_printout_graph_level=0, prod='', starting_sprod='', native_mapset='', version='',
                          mapset=None, starting_dates=None, write2file=None, logfile=None):

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_seas_cum')

    proc_lists = None
    proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, native_mapset=native_mapset, version=version,
                                 mapset=mapset, starting_dates=starting_dates, proc_lists=proc_lists, logger=spec_logger)

    if write2file is not None:
        fwrite_id=open(write2file,'w')
    else:
        fwrite_id=None

    if pipeline_run_level > 0:
        spec_logger.info("Run the pipeline %s" % 'processing_std_precip')
        pipeline_run(verbose=pipeline_run_level, logger=spec_logger, log_exceptions=spec_logger, history_file=os.path.join(es_constants.log_dir,'.ruffus_history.sqlite'))
        tasks = pipeline_get_task_names()
        spec_logger.info("Run the pipeline %s" % tasks[0])
        spec_logger.info("After running the pipeline %s" % 'processing_std_precip')

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    if write2file is not None:
        fwrite_id.close()

    return True
def processing_std_gradient(res_queue, pipeline_run_level=0, pipeline_printout_level=0,
                                 pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='',
                                 starting_dates=None, write2file=None, logfile=None,
                                 touch_files_only=False):

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_gradient')

    proc_lists = None
    proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version,
                                 starting_dates=starting_dates)

    if write2file is not None:
        fwrite_id=open(write2file,'w')
    else:
        fwrite_id=None

    if pipeline_run_level > 0:
        pipeline_run(verbose=pipeline_run_level, logger=spec_logger, history_file=os.path.join(es_constants.log_dir,'.ruffus_history_std_chla_gradient.sqlite'),touch_files_only=touch_files_only,
                     checksum_level=0)

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level, output_stream=fwrite_id)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    #res_queue.put(proc_lists)
    return None
Exemplo n.º 4
0
def clean_corrupted_files(check_directory, logger=None, dry_run=False):

    # Check logger
    if logger is None:
        logger = log.my_logger(__name__)

    # Get the existing dates for the dataset
    logger.info("Entering routine %s " % 'clean_corrupted_files')

    # Get list of files
    list_files = []
    for root, dirnames, filenames in os.walk(check_directory):
        for filename in fnmatch.filter(filenames, '*.tif'):
            list_files.append(os.path.join(root, filename))

    if len(list_files) > 0:
        for my_file in list_files:
            logger.debug('Checking file: {0}'.format(my_file))

            # Check the file by using gdalinfo
            command = ['gdalinfo', my_file]
            status = subprocess.call(command,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if status:
                logger.info('Error in file: {0}'.format(my_file))
                if not dry_run:
                    os.remove(my_file)
                    logger.info('File removed: {0}'.format(my_file))

                else:
                    logger.info(
                        'Not removing file {0} - Dry Run'.format(my_file))
def processing_std_monavg(res_queue, pipeline_run_level=0, pipeline_printout_level=0,
                                pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='',
                                starting_dates=None, write2file=None, logfile=None):
    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_monavg')

    history_file = os.path.join(es_constants.log_dir, '.ruffus_history_{0}_{1}.sqlite').format(prod, starting_sprod)
    proc_lists = None
    proc_lists = create_pipeline(prod=prod, starting_sprod=starting_sprod, mapset=mapset, version=version,
                                 starting_dates=starting_dates, logger=spec_logger)

    if write2file is not None:
        fwrite_id = open(write2file, 'w')
    else:
        fwrite_id = None

    spec_logger.info("Entering routine %s" % 'processing_monavg')
    if pipeline_run_level > 0:
        spec_logger.info("Now calling pipeline_run")
        # Option to be added to pipeline_run to force files to appear up-to-date: touch_files_only = True
        pipeline_run(verbose=pipeline_run_level, history_file=history_file, checksum_level=0, touch_files_only=False)

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')
Exemplo n.º 6
0
def processing_std_lsasaf_et(res_queue,
                             pipeline_run_level=0,
                             pipeline_printout_level=0,
                             pipeline_printout_graph_level=0,
                             prod='',
                             starting_sprod='',
                             native_mapset='',
                             mapset='',
                             version='',
                             starting_dates=None,
                             write2file=None,
                             logfile=None):

    native_mapset = 'MSG-satellite-3km'
    target_mapset = mapset

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_lsasaf_et')

    proc_lists = None
    proc_lists = create_pipeline(prod,
                                 starting_sprod,
                                 native_mapset,
                                 target_mapset,
                                 version,
                                 starting_dates=starting_dates,
                                 proc_lists=proc_lists)
    if write2file is not None:
        fwrite_id = open(write2file, 'w')
    else:
        fwrite_id = None

    if pipeline_run_level > 0:
        spec_logger.info("Run the pipeline %s" % 'processing_std_lsasaf_et')
        pipeline_run(verbose=pipeline_run_level,
                     logger=spec_logger,
                     log_exceptions=spec_logger,
                     history_file=os.path.join(
                         es_constants.log_dir,
                         '.ruffus_history_lsasaf_et.sqlite'),
                     checksum_level=0)
        tasks = pipeline_get_task_names()
        spec_logger.info("After running the pipeline %s" %
                         'processing_std_lsasaf_et')

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level,
                          output_stream=fwrite_id)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    if write2file is not None:
        fwrite_id.close()

    #res_queue.put(proc_lists)
    return True
Exemplo n.º 7
0
    def delpid(self):
        # Change to deal with forking in processing (otherwise the pidfile is deleted by child process)
        logger = log.my_logger("lib.python.daemon")
        #my_pid=os.getpgid
        #logger.info("My Pid: %i" % my_pid)

        #pid_file=open(self.pidfile)
        #pid = pid_file.read()
        #logger.info("Pid: %i" % pid)
        #if pid == my_pid:
        logger.info("Removing the Pid")
        os.remove(self.pidfile)
Exemplo n.º 8
0
def processing_std_olci_wrr(res_queue,
                            pipeline_run_level=0,
                            pipeline_printout_level=0,
                            pipeline_printout_graph_level=0,
                            prod='',
                            starting_sprod='',
                            mapset='',
                            version='',
                            starting_dates=None,
                            nrt_products=True,
                            write2file=None,
                            logfile=None,
                            touch_files_only=False):

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_olci_wrr')

    proc_lists = None
    proc_lists = create_pipeline(prod=prod,
                                 starting_sprod=starting_sprod,
                                 mapset=mapset,
                                 version=version,
                                 starting_dates=starting_dates,
                                 nrt_products=nrt_products,
                                 logger=spec_logger)

    if write2file is not None:
        fwrite_id = open(write2file, 'w')
    else:
        fwrite_id = None

    if pipeline_run_level > 0:
        pipeline_run(verbose=pipeline_run_level,
                     logger=spec_logger,
                     touch_files_only=touch_files_only,
                     history_file=os.path.join(
                         es_constants.log_dir, '.ruffus_history_' + prod +
                         '_' + version + '.sqlite'),
                     checksum_level=0)

    if pipeline_printout_level > 0:
        pipeline_printout(
            verbose=pipeline_printout_level)  #, output_stream=fout)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    if write2file is not None:
        fwrite_id.close()

    #res_queue.put(proc_lists)
    return True
class TestFunctionsPickle(TestCase):

    logger = log.my_logger(__name__)
    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix+'Test_info'
    processed_info = {'length_proc_list': 0,
                      'time_latest_exec': datetime.datetime.now(),
                      'time_latest_copy': datetime.datetime.now()}


    def test_write_pickle(self):

        self.logger.info('Pickle filename is: %s',  self.processed_info_filename)
        f.dump_obj_to_pickle(self.processed_info, self.processed_info_filename)
def processing_std_modis_pp(res_queue,
                            pipeline_run_level=0,
                            pipeline_printout_level=0,
                            pipeline_printout_graph_level=0,
                            prod='',
                            starting_sprod='',
                            mapset='',
                            version='',
                            starting_dates=None,
                            write2file=None,
                            logfile=None,
                            nrt_products=True,
                            update_stats=False):

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_msg_mpe')

    create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    nrt_products=nrt_products,
                    update_stats=update_stats)

    spec_logger.info("Entering routine %s" %
                     'processing modis - Primary Production')
    if pipeline_run_level > 0:
        spec_logger.info("Now calling pipeline_run")
        pipeline_run(verbose=pipeline_run_level,
                     logger=spec_logger,
                     log_exceptions=spec_logger,
                     history_file=os.path.join(
                         es_constants.log_dir,
                         '.ruffus_history_modis_pp.sqlite'),
                     checksum_level=0)

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    return True
Exemplo n.º 11
0
def processing_std_rain_onset(res_queue,
                              pipeline_run_level=0,
                              pipeline_printout_level=0,
                              pipeline_printout_graph_level=0,
                              prod='',
                              starting_sprod='',
                              mapset='',
                              version='',
                              starting_dates=None,
                              write2file=None,
                              logfile=None):

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_rain_onset')

    create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=starting_dates,
                    proc_lists=None)

    spec_logger.info("Entering routine %s" % 'processing rain onset')
    if pipeline_run_level > 0:
        spec_logger.info("Now calling pipeline_run")
        pipeline_run(verbose=pipeline_run_level,
                     logger=spec_logger,
                     log_exceptions=spec_logger,
                     history_file=os.path.join(
                         es_constants.log_dir,
                         '.ruffus_history_modis_pp.sqlite'),
                     checksum_level=0,
                     one_second_per_job=True,
                     multiprocess=1,
                     multithread=0)

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')
Exemplo n.º 12
0
def remove_old_files(productcode,
                     subproductcode,
                     version,
                     mapsetcode,
                     product_type,
                     nmonths,
                     logger=None):

    # Check logger
    if logger is None:
        logger = log.my_logger(__name__)

    # Get the existing dates for the dataset
    logger.info("Entering routine %s " % 'remove_old_files')

    # Check the installation type
    sysSettings = functions.getSystemSettings()
    if sysSettings['type_installation'] == 'Server':
        logger.info("File housekeeping not done on Server ")
        return

    prod_subdir = functions.set_path_sub_directory(productcode, subproductcode,
                                                   product_type, version,
                                                   mapsetcode)
    prod_dir = es_constants.es2globals[
        'processing_dir'] + os.path.sep + prod_subdir
    list_files = sorted(glob.glob(prod_dir + os.path.sep + '*.tif'))

    # Define the earliest date to be kept
    month_now = datetime.date.today().month
    year_now = datetime.date.today().year

    for my_file in list_files:
        # Extract the date
        date = functions.get_date_from_path_filename(os.path.basename(my_file))
        date_yyyy = int(date[0:4])
        date_month = int(date[4:6])

        if date_yyyy < year_now or (date_month + nmonths) <= month_now:
            logger.debug("Deleting file %s " % my_file)
            os.remove(my_file)
Exemplo n.º 13
0
def processing_modis_chla(res_queue,
                          pipeline_run_level=0,
                          pipeline_printout_level=0,
                          pipeline_printout_graph_level=0,
                          prod='',
                          starting_sprod='',
                          mapset='',
                          version='',
                          starting_dates=None,
                          write2file=None,
                          logfile=None):

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_fronts')

    proc_lists = None
    proc_lists = create_pipeline(prod=prod,
                                 starting_sprod=starting_sprod,
                                 mapset=mapset,
                                 version=version,
                                 starting_dates=starting_dates)

    if write2file is not None:
        fwrite_id = open(write2file, 'w')
    else:
        fwrite_id = None

    spec_logger.info("Entering routine %s" % 'processing_modis')
    if pipeline_run_level > 0:
        spec_logger.info("Now calling pipeline_run")
        pipeline_run(verbose=pipeline_run_level)

    if pipeline_printout_level > 0:

        pipeline_printout(verbose=pipeline_printout_level)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')
Exemplo n.º 14
0
def loop_get_internet(dry_run=False, test_one_source=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving data from INTERNET.")

    while True:
        output_dir = es_constants.get_internet_output_dir
        logger.debug("Check if the Ingest Server input directory : %s exists.",
                     output_dir)
        if not os.path.exists(output_dir):
            # ToDo: create output_dir - ingest directory
            logger.fatal(
                "The Ingest Server input directory : %s doesn't exists.",
                output_dir)
            exit(1)

        if not os.path.exists(es_constants.processed_list_int_dir):
            os.mkdir(es_constants.processed_list_int_dir)

        while 1:

            # Check internet connection (or continue)
            if not functions.internet_on():
                logger.error(
                    "The computer is not currently connected to the internet. Wait 1 minute."
                )
                time.sleep(60)

            else:
                try:
                    time_sleep = user_def_sleep
                    logger.debug("Sleep time set to : %s.", time_sleep)
                except:
                    logger.warning(
                        "Sleep time not defined. Setting to default=1min. Continue."
                    )
                    time_sleep = 60

                logger.info(
                    "Reading active INTERNET data sources from database")
                internet_sources_list = querydb.get_active_internet_sources()

                # Loop over active triggers
                for internet_source in internet_sources_list:
                    try:

                        if test_one_source and (internet_source.internet_id !=
                                                test_one_source):
                            logger.info(
                                "Running in test mode, and source is not %s. Continue.",
                                test_one_source)
                            continue
                        execute_trigger = True
                        # Get this from the pads database table (move from internet_source 'pull_frequency' to the pads table,
                        # so that it can be exploited by eumetcast triggers as well). It is in minute
                        pull_frequency = internet_source.pull_frequency

                        # Manage the case of files to be continuously downloaded (delay < 0)
                        if pull_frequency < 0:
                            do_not_consider_processed_list = True
                            delay_time_source_minutes = -pull_frequency
                        else:
                            do_not_consider_processed_list = False
                            delay_time_source_minutes = pull_frequency

                        if sys.platform == 'win32':
                            internet_id = str(
                                internet_source.internet_id).replace(':', '_')
                        else:
                            internet_id = str(internet_source.internet_id)

                        logger_spec = log.my_logger('apps.get_internet.' +
                                                    internet_id)
                        logger.info("Processing internet source  %s.",
                                    internet_source.descriptive_name)

                        # Create objects for list and info
                        processed_info_filename = es_constants.get_internet_processed_list_prefix + str(
                            internet_id) + '.info'

                        # Restore/Create Info
                        processed_info = None
                        processed_info = functions.restore_obj_from_pickle(
                            processed_info, processed_info_filename)
                        if processed_info is not None:
                            # Check the delay
                            current_delta = datetime.datetime.now(
                            ) - processed_info['time_latest_exec']
                            current_delta_minutes = int(current_delta.seconds /
                                                        60)
                            if current_delta_minutes < delay_time_source_minutes:
                                logger.debug(
                                    "Still waiting up to %i minute - since latest execution.",
                                    delay_time_source_minutes)
                                execute_trigger = False
                        else:
                            # Create processed_info object
                            processed_info = {
                                'lenght_proc_list': 0,
                                'time_latest_exec': datetime.datetime.now(),
                                'time_latest_copy': datetime.datetime.now()
                            }
                            execute_trigger = True

                        if execute_trigger:
                            # Restore/Create List
                            processed_list = []
                            if not do_not_consider_processed_list:
                                processed_list_filename = es_constants.get_internet_processed_list_prefix + internet_id + '.list'
                                processed_list = functions.restore_obj_from_pickle(
                                    processed_list, processed_list_filename)

                            processed_info[
                                'time_latest_exec'] = datetime.datetime.now()

                            logger.debug(
                                "Create current list of file to process for source %s.",
                                internet_source.internet_id)
                            if internet_source.user_name is None:
                                user_name = "anonymous"
                            else:
                                user_name = internet_source.user_name

                            if internet_source.password is None:
                                password = "******"
                            else:
                                password = internet_source.password

                            usr_pwd = str(user_name) + ':' + str(password)

                            logger_spec.debug("              Url is %s.",
                                              internet_source.url)
                            logger_spec.debug("              usr/pwd is %s.",
                                              usr_pwd)
                            logger_spec.debug(
                                "              regex   is %s.",
                                internet_source.include_files_expression)

                            internet_type = internet_source.type

                            if internet_type == 'ftp' or internet_type == 'http':
                                # Manage the end_date (added for MODIS_FIRMS)
                                if (internet_source.end_date != ''):
                                    end_date = internet_source.end_date
                                else:
                                    end_date = None
                                # Note that the following list might contain sub-dirs (it reflects full_regex)
                                try:
                                    current_list = get_list_matching_files(
                                        str(internet_source.url),
                                        str(usr_pwd),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_type,
                                        end_date=end_date)
                                except:
                                    logger.error(
                                        "Error in creating file lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'http_tmpl':
                                # Create the full filename from a 'template' which contains
                                try:
                                    current_list = build_list_matching_files_tmpl(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_source.start_date,
                                        internet_source.end_date,
                                        str(internet_source.frequency_id))
                                except:
                                    logger.error(
                                        "Error in creating date lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'motu_client':
                                # Create the full filename from a 'template' which contains
                                try:
                                    current_list = build_list_matching_files_motu(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression),
                                        internet_source.start_date,
                                        internet_source.end_date,
                                        str(internet_source.frequency_id),
                                        str(internet_source.user_name),
                                        str(internet_source.password),
                                        str(internet_source.
                                            files_filter_expression),
                                    )

                                except:
                                    logger.error(
                                        "Error in creating motu_client lists. Continue"
                                    )
                                    continue

                            # elif internet_type == 'sentinel_sat':
                            #     # Create the full filename from a 'template' which contains
                            #     try:
                            #         current_list = build_list_matching_files_sentinel_sat(str(internet_source.url),
                            #                                                     str(internet_source.include_files_expression),
                            #                                                     internet_source.start_date,
                            #                                                     internet_source.end_date,
                            #                                                     str(internet_source.frequency_id),
                            #                                                     str(internet_source.user_name),
                            #                                                     str(internet_source.password),
                            #                                                     #str(internet_source.files_filter_expression),
                            #                                                       )
                            #
                            #     except:
                            #         logger.error("Error in creating sentinel_sat lists. Continue")
                            #         continue

                            elif internet_type == 'local':
                                logger.info(
                                    "This internet source is meant to copy data on local filesystem"
                                )
                                try:
                                    current_list = get_list_matching_files_dir_local(
                                        str(internet_source.url),
                                        str(internet_source.
                                            include_files_expression))
                                except:
                                    logger.error(
                                        "Error in creating date lists. Continue"
                                    )
                                    continue

                            elif internet_type == 'offline':
                                logger.info(
                                    "This internet source is meant to work offline (GoogleDrive)"
                                )
                                current_list = []
                            else:
                                logger.error(
                                    "No correct type for this internet source type: %s"
                                    % internet_type)
                                current_list = []
                            logger_spec.debug(
                                "Number of files currently available for source %s is %i",
                                internet_id, len(current_list))

                            if len(current_list) > 0:
                                logger_spec.debug(
                                    "Number of files already copied for trigger %s is %i",
                                    internet_id, len(processed_list))
                                listtoprocess = []
                                for current_file in current_list:
                                    if len(processed_list) == 0:
                                        listtoprocess.append(current_file)
                                    else:
                                        #if os.path.basename(current_file) not in processed_list: -> save in .list subdirs as well !!
                                        if current_file not in processed_list:
                                            listtoprocess.append(current_file)

                                logger_spec.debug(
                                    "Number of files to be copied for trigger %s is %i",
                                    internet_id, len(listtoprocess))
                                if listtoprocess != set([]):
                                    # # Debug
                                    # toprint=''
                                    # for elem in listtoprocess:
                                    #    toprint+=elem+','
                                    #    logger_spec.info('List in get_list_matching_files: %s' % toprint)

                                    logger_spec.debug(
                                        "Loop on the found files.")
                                    if not dry_run:
                                        for filename in list(listtoprocess):
                                            logger_spec.debug(
                                                "Processing file: " +
                                                str(internet_source.url) +
                                                os.path.sep + filename)
                                            try:
                                                if internet_type == 'local':
                                                    shutil.copyfile(
                                                        str(internet_source[
                                                            'url']) +
                                                        os.path.sep + filename,
                                                        es_constants.ingest_dir
                                                        + os.path.basename(
                                                            filename))
                                                    result = 0
                                                elif internet_type == 'motu_client':
                                                    result = get_file_from_motu_command(
                                                        str(filename),
                                                        #target_file=internet_source.files_filter_expression,
                                                        target_dir=es_constants
                                                        .ingest_dir,
                                                        userpwd=str(usr_pwd))

                                                # elif internet_type == 'sentinel_sat':
                                                #     result = get_file_from_sentinelsat_url(str(filename),
                                                #                                            target_dir=es_constants.ingest_dir)
                                                else:
                                                    result = get_file_from_url(
                                                        str(internet_source.url
                                                            ) + os.path.sep +
                                                        filename,
                                                        target_file=os.path.
                                                        basename(filename),
                                                        target_dir=es_constants
                                                        .ingest_dir,
                                                        userpwd=str(usr_pwd))
                                                if not result:
                                                    logger_spec.info(
                                                        "File %s copied.",
                                                        filename)
                                                    processed_list.append(
                                                        filename)
                                                else:
                                                    logger_spec.warning(
                                                        "File %s not copied: ",
                                                        filename)
                                            except:
                                                logger_spec.warning(
                                                    "Problem while copying file: %s.",
                                                    filename)
                                    else:
                                        logger_spec.info(
                                            'Dry_run is set: do not get files')

                            if not dry_run:
                                functions.dump_obj_to_pickle(
                                    processed_list, processed_list_filename)
                                functions.dump_obj_to_pickle(
                                    processed_info, processed_info_filename)

                        sleep(float(user_def_sleep))
                    # Loop over sources
                    except Exception as inst:
                        logger.error(
                            "Error while processing source %s. Continue" %
                            internet_source.descriptive_name)
                sleep(float(user_def_sleep))

    exit(0)
Exemplo n.º 15
0
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0,
                     input_products='', output_product='', mapset='', logfile=None):

    if logfile:
        spec_logger = log.my_logger(logfile)
        spec_logger.info("Entering routine %s" % 'processing_merge')

    # Dummy return arguments
    proc_lists = functions.ProcLists()
    list_subprods = proc_lists.list_subprods
    list_subprod_groups = proc_lists.list_subprod_groups

    es2_data_dir = es_constants.processing_dir+os.path.sep

    # Do some checks on the integrity of the inputs

    # Manage output_product data
    out_product_code = output_product[0].productcode
    out_sub_product_code = output_product[0].subproductcode
    out_version = output_product[0].version
    out_mapset = output_product[0].mapsetcode

    out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset)
    out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext)
    out_dir = es2_data_dir + out_subdir

    # Check the output product directory
    functions.check_output_dir(out_dir)
    # Fill the processing list -> some fields to be taken by innput products
    output_sprod_group=proc_lists.proc_add_subprod_group("merged")
    output_sprod=proc_lists.proc_add_subprod(out_sub_product_code, "merged", final=False,
                                             descriptive_name='undefined',
                                             description='undefined',
                                             frequency_id='e1dekad',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)

    # Loop over the input products:
    for input in input_products:

        # Extract info from input product
        product_code = input.productcode
        sub_product_code = input.subproductcode
        version = input.version
        start_date = input.start_date
        end_date = input.end_date
        product_info = querydb.get_product_out_info_connect(productcode=product_code,
                                                   subproductcode=sub_product_code,
                                                   version=version)
        prod_type = product_info[0].product_type

        in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset)
        in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext)

        # Create the list of dates -> returns empty if start==end==None
        list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version,
                                                               start_date=start_date, end_date=end_date)
        # If list_dates == None, look at all existing files
        if list_dates is None:
            print ('To be Done !!!')
        # Otherwise, build list of files from list of dates
        else:
            for my_date in list_dates:
                in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident
                out_file_path = out_dir+my_date+out_prod_ident

                # Create the link
                status = functions.create_sym_link(in_file_path, out_file_path, force=False)
                if status == 0 and logfile:
                    spec_logger.info("Merged file %s created" % out_file_path)

    return list_subprods, list_subprod_groups
Exemplo n.º 16
0
from __future__ import division
from future import standard_library
standard_library.install_aliases()
__author__ = 'analyst'
#
#	purpose: Run the script to copy data from an external disk to /data/processing
#	author:  M.Clerici
#	date:	 13.02.2019
#   descr:	 To be used for feeding an offline computer (e.g. for Training) with a subset from a disk
#
#	history: 1.0
#
import sys, os
import glob
from lib.python import es_logging as log
logger = log.my_logger('apps.es2system.ingest_archive')


def copy_data_disk(input_dir=None, dry_run=False):

    target_dir = '/data/processing/exchange/test_data/'

    #   Define the list  products/version/mapsets

    prod_list = []

    prod_list.append({
        'prod': 'arc2-rain',
        'version': '2.0',
        'mapset': 'ARC2-Africa-11km',
        'regex': '201*',
Exemplo n.º 17
0
_author__ = "Marco Clerici"

import sys
# import os, time
from config import es_constants
# from apps.acquisition import ingestion
from apps.acquisition import acquisition
from lib.python import es_logging as log
logger = log.my_logger('apps.acquisition.ingestion')

try:
    command = str(sys.argv[1])
except:
    logger.fatal("An argument should be provided: status/start/stop")
    exit(1)

# Define pid file and create daemon
pid_file = es_constants.ingestion_pid_filename
daemon = acquisition.IngestionDaemon(pid_file, dry_run=0)

if command == "status":
    status = daemon.status()
    print("Current status of the Service: %s" % status)

if command == "start":
    logger.info("Starting ingestion service")
    daemon.start()

if command == "stop":
    logger.info("Stopping ingestion service")
    daemon.stop()
Exemplo n.º 18
0
_author__ = "Marco Clerici"

import sys
from config import es_constants
from apps.es2system import es2system
from lib.python import es_logging as log
logger = log.my_logger("apps.es2system.es2system")

try:
    command = str(sys.argv[1])
except: 
    logger.fatal("An argument should be provided: status/start/stop") 
    exit(1)

# Define pid file and create daemon
pid_file = es_constants.system_pid_filename
daemon = es2system.SystemDaemon(pid_file, dry_run=0)

if command == "status":
        status = daemon.status()
        print("Current status of the Service: %s" % status)
    
if command == "start":
        logger.info("Starting System service") 
        daemon.start()
        
if command == "stop":
        logger.info("Stopping System service") 
        daemon.stop()
Exemplo n.º 19
0
def loop_eumetcast(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving EUMETCast data.")

    while True:
        logger.debug("Check if the EUMETCast input directory : %s exists.", input_dir)
        if not os.path.exists(input_dir):
            logger.error("The EUMETCast input directory : %s is not yet mounted.", input_dir)

        logger.debug("Check if the Ingest Server input directory : %s exists.", output_dir)
        if not os.path.exists(output_dir):
            logger.fatal("The Ingest Server input directory : %s doesn't exists.", output_dir)
            # TODO Jurvtk: Create the Ingest Server output directory if it doesn't exist!
            exit(1)

        if not os.path.exists(es_constants.base_tmp_dir):
            os.mkdir(es_constants.base_tmp_dir)

        if not os.path.exists(es_constants.processed_list_base_dir):
            os.mkdir(es_constants.processed_list_base_dir)

        if not os.path.exists(es_constants.processed_list_eum_dir):
            os.mkdir(es_constants.processed_list_eum_dir)

        while 1:
            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning("Sleep time not defined. Setting to default=1min. Continue.")
                time_sleep = 60

            # try:
            logger.debug("Reading active EUMETCAST data sources from database")
            eumetcast_sources_list = querydb.get_eumetcast_sources(echo=echo_query)
            logger.debug("N. %i active EUMETCAST data sources found", len(eumetcast_sources_list))

            # Loop over active triggers
            for eumetcast_source in eumetcast_sources_list:

                # Define a file_handler logger 'source-specific' (for GUI)
                logger_spec = log.my_logger('apps.get_eumetcast.'+eumetcast_source.eumetcast_id)
                logger.debug("Processing eumetcast source  %s.", eumetcast_source.eumetcast_id)

                processed_list_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.list'
                processed_info_filename = es_constants.get_eumetcast_processed_list_prefix+str(eumetcast_source.eumetcast_id)+'.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {'length_proc_list': 0,
                                  'time_latest_exec': datetime.datetime.now(),
                                  'time_latest_copy': datetime.datetime.now()}

                logger.debug("Loading the processed file list for source %s ", eumetcast_source.eumetcast_id)

                # Restore/Create List
                processed_list=functions.restore_obj_from_pickle(processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info=functions.restore_obj_from_pickle(processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec']=datetime.datetime.now()

                logger.debug("Create current list of file to process for trigger %s.", eumetcast_source.eumetcast_id)
                current_list = find_files(input_dir, eumetcast_source.filter_expression_jrc)
                #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list))
                logger_spec.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list))
                if len(current_list) > 0:

                    #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
                    logger_spec.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
                    listtoprocess = []
                    listtoprocess = set(current_list) - set(processed_list)
                    #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
                    logger_spec.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
                    if listtoprocess != set([]):
                        logger_spec.debug("Loop on the found files.")
                        for filename in list(listtoprocess):
                            if os.path.isfile(os.path.join(input_dir, filename)):
                                if os.stat(os.path.join(input_dir, filename)).st_mtime < int(time.time()):
                                    logger_spec.debug("Processing file: "+os.path.basename(filename))
                                    if not dry_run:
                                        if commands.getstatusoutput("cp " + filename + " " + output_dir + os.sep + os.path.basename(filename))[0] == 0:
                                            logger.info("File %s copied.", filename)
                                            processed_list.append(filename)
                                            # Update processing info
                                            processed_info['time_latest_copy']=datetime.datetime.now()
                                            processed_info['length_proc_list']=len(processed_list)
                                        else:
                                            logger_spec.warning("Problem while copying file: %s.", filename)
                                    else:
                                        logger_spec.info('Dry_run is set: do not get files')
                            else:
                                logger_spec.error("File %s removed by the system before being processed.", filename)
                    else:
                        logger.debug("Nothing to process - go to next trigger.")
                        pass

                for infile in processed_list:
                       if not os.path.exists(infile):
                           processed_list.remove(infile)

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info, processed_info_filename)

            time.sleep(float(10))

        # except Exception, e:
        #     logger.fatal(str(e))
        #     exit(1)
    exit(0)
Exemplo n.º 20
0
def processing_modis_pp(res_queue,
                        pipeline_run_level=0,
                        pipeline_printout_level=0,
                        pipeline_printout_graph_level=0,
                        input_products='',
                        output_product='',
                        write2file=None,
                        logfile=None,
                        nrt_products=True,
                        update_stats=True):
    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_modis_pp')

    create_pipeline(input_products,
                    output_product,
                    logfile=logfile,
                    nrt_products=nrt_products,
                    update_stats=update_stats)

    spec_logger.info("Entering routine %s" %
                     'processing modis - Primary Production')
    if pipeline_run_level > 0:
        spec_logger.info("Now calling pipeline_run")
        pipeline_run(verbose=pipeline_run_level,
                     logger=spec_logger,
                     log_exceptions=spec_logger,
                     history_file=os.path.join(
                         es_constants.log_dir,
                         '.ruffus_history_modis_pp.sqlite'),
                     checksum_level=0)

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')


# def processing_modis_pp_stats_only(res_queue, pipeline_run_level=0, pipeline_printout_level=0,
#                                    pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='',
#                                    starting_dates=None, write2file=None, logfile=None, input_products='',
#                                    output_product=''):
#     result = processing_modis_pp(res_queue, pipeline_run_level=pipeline_run_level,
#                                  pipeline_printout_level=pipeline_printout_level,
#                                  pipeline_printout_graph_level=pipeline_printout_graph_level,
#                                  write2file=write2file,
#                                  logfile=logfile,
#                                  nrt_products=False,
#                                  update_stats=True,
#                                  input_products=input_products,
#                                  output_product=output_product
#                                  )
#
#     return result
#
#
# def processing_modis_pp_only(res_queue, pipeline_run_level=0, pipeline_printout_level=0,
#                              pipeline_printout_graph_level=0, prod='', starting_sprod='', mapset='', version='',
#                              starting_dates=None, write2file=None, logfile=None, input_products='', output_product=''):
#     result = processing_modis_pp(res_queue, pipeline_run_level=pipeline_run_level,
#                                  pipeline_printout_level=pipeline_printout_level,
#                                  pipeline_printout_graph_level=pipeline_printout_graph_level,
#                                  write2file=write2file,
#                                  logfile=logfile,
#                                  nrt_products=True,
#                                  update_stats=False,
#                                  input_products=input_products,
#                                  output_product=output_product
#                                  )
#
#     return result
#
#
# def processing_modis_pp_all(res_queue, pipeline_run_level=0, pipeline_printout_level=0, pipeline_printout_graph_level=0,
#                             prod='', starting_sprod='', mapset='', version='', starting_dates=None, write2file=None,
#                             logfile=None, input_products='', output_product=''):
#     result = processing_modis_pp(res_queue, pipeline_run_level=pipeline_run_level,
#                                  pipeline_printout_level=pipeline_printout_level,
#                                  pipeline_printout_graph_level=pipeline_printout_graph_level,
#                                  write2file=write2file,
#                                  logfile=logfile,
#                                  nrt_products=True,
#                                  update_stats=True,
#                                  input_products=input_products,
#                                  output_product=output_product
#                                  )
#
#     return result
Exemplo n.º 21
0
def create_pipeline(input_products,
                    output_product,
                    logfile=None,
                    nrt_products=True,
                    update_stats=False):
    proc_lists = None

    if proc_lists is None:
        proc_lists = functions.ProcLists()

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_modis_pp')

    # Set DEFAULTS: all off
    activate_pp_comput = 0  # PP from Chla, SST, Kd490 and PAR

    activate_stats_comput = 0  # Stats computation (inter-annual clim, min, max)
    activate_anomalies_comput = 0  # Anomalies computation (not yet done!!)

    #   switch wrt groups - according to options
    if nrt_products:
        activate_pp_comput = 1  # PP from Chla, SST, Kd490 and PAR

    if update_stats:
        activate_stats_comput = 1
        activate_anomalies_comput = 1

    activate_pp_prod_comput = 1
    activate_pp_stats_clim_comput = 1
    activate_pp_stats_min_comput = 1
    activate_pp_stats_max_comput = 1

    #   ---------------------------------------------------------------------
    #   Create lists

    # my_date='20160601'
    my_date = ''
    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #    Parse the arguments and extract the 4 input variables
    #
    if len(input_products) != 4:
        spec_logger.error('Modis PP computation requires 4 inputs. Exit')
        return 1

    found_chla = False
    found_sst = False
    found_par = False
    found_kd490 = False

    for input_product in input_products:

        if re.search('.*chla.*', input_product.productcode):
            found_chla = True
            chla_prod = input_product.productcode
            chla_version = input_product.version
            chla_sprod = input_product.subproductcode
            chla_mapset = input_product.mapsetcode
            chla_prod_ident = functions.set_path_filename_no_date(
                chla_prod, chla_sprod, chla_mapset, chla_version, ext)
            chla_input_dir = es2_data_dir + \
                             functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version,
                                                              chla_mapset)

        if re.search('.*sst.*', input_product.productcode):
            found_sst = True
            sst_prod = input_product.productcode
            sst_version = input_product.version
            sst_sprod = input_product.subproductcode
            sst_mapset = input_product.mapsetcode
            sst_prod_ident = functions.set_path_filename_no_date(
                sst_prod, sst_sprod, sst_mapset, sst_version, ext)
            sst_input_dir = es2_data_dir + \
                            functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset)

        if re.search('.*kd490.*', input_product.productcode):
            found_kd490 = True
            kd490_prod = input_product.productcode
            kd490_version = input_product.version
            kd490_sprod = input_product.subproductcode
            kd490_mapset = input_product.mapsetcode
            kd490_prod_ident = functions.set_path_filename_no_date(
                kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext)
            kd490_input_dir = es2_data_dir + \
                              functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version,
                                                               kd490_mapset)

        if re.search('.*par.*', input_product.productcode):
            found_par = True
            par_prod = input_product.productcode
            par_version = input_product.version
            par_sprod = input_product.subproductcode
            par_mapset = input_product.mapsetcode
            par_prod_ident = functions.set_path_filename_no_date(
                par_prod, par_sprod, par_mapset, par_version, ext)
            par_input_dir = es2_data_dir + \
                            functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset)

    # Check consistency of inputs
    if not (found_chla) or not (found_kd490) or not (found_par) or not (
            found_sst):
        spec_logger.error('At least one of 4 expected inputs missing. Exit')
        return 1

    if chla_mapset != sst_mapset or chla_mapset != kd490_mapset or chla_mapset != par_mapset:
        spec_logger.error('All 4 input mapset must be equals. Exit')
        return 1

    # Read input product nodata

    chla_prod_info = querydb.get_product_out_info(productcode=chla_prod,
                                                  subproductcode=chla_sprod,
                                                  version=chla_version)
    chla_product_info = functions.list_to_element(chla_prod_info)
    chla_nodata = chla_product_info.nodata
    chla_frequency = chla_product_info.frequency_id

    sst_prod_info = querydb.get_product_out_info(productcode=sst_prod,
                                                 subproductcode=sst_sprod,
                                                 version=sst_version)
    sst_product_info = functions.list_to_element(sst_prod_info)
    sst_nodata = sst_product_info.nodata

    kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod,
                                                subproductcode=kd490_sprod,
                                                version=kd490_version)
    kd_product_info = functions.list_to_element(kd_prod_info)
    kd_nodata = kd_product_info.nodata

    par_prod_info = querydb.get_product_out_info(productcode=par_prod,
                                                 subproductcode=par_sprod,
                                                 version=par_version)
    par_product_info = functions.list_to_element(par_prod_info)
    par_nodata = par_product_info.nodata

    # Define input files
    # if starting_dates is not None:
    #     starting_files = []
    #     for my_date in starting_dates:
    #         starting_files.append(input_dir+my_date+in_prod_ident)
    # else:
    #     starting_files=input_dir+"*"+in_prod_ident

    # Define outputs

    output_nodata = -32767

    old = False

    # NOTE: the prod/mapset/version are taken from the FIRST OUTPUT passed
    #       subprod defined according to the frequency

    output_prod = output_product[0].productcode
    output_version = output_product[0].version
    output_mapset = output_product[0].mapsetcode

    if old:
        # Get the first output -> PP subproduct generated (8daysavg or monavg)
        output_sprod = output_product[0].subproductcode
    else:
        # Define the outputs according to the frequency (method in 'functions' to be created !!)
        if chla_frequency == 'e1month':
            frequency_string = 'monthly'
            output_sprod = 'monavg'
            output_sprod_clim = '1monclim'
            output_sprod_min = '1monmin'
            output_sprod_max = '1monmax'
            sub_product_group = '1monstat'
        elif chla_frequency == 'e1modis8day':
            frequency_string = '8 days'
            output_sprod = '8daysavg'
            activate_pp_stats_clim_comput = 1
            activate_pp_stats_min_comput = 1
            activate_pp_stats_max_comput = 1
            sub_product_group = '8daysstat'
            output_sprod_clim = '8daysclim'
            output_sprod_min = '8daysmin'
            output_sprod_max = '8daysmax'
        else:
            spec_logger.error('Frequency not recognized: %s. Exit!',
                              chla_frequency)
            return

    out_prod_ident = functions.set_path_filename_no_date(
        output_prod, output_sprod, output_mapset, output_version, ext)
    output_subdir = functions.set_path_sub_directory(output_prod, output_sprod,
                                                     'Derived', output_version,
                                                     output_mapset)

    # Fixes ES2-36
    def generate_input_files_pp():

        # Take kd490 as starting point
        kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident
        input_files = sorted(glob.glob(kd_files))

        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)

            ancillary_chla = chla_input_dir + mydate + chla_prod_ident
            ancillary_par = par_input_dir + mydate + par_prod_ident
            ancillary_sst = sst_input_dir + mydate + sst_prod_ident

            do_comp = True
            if not os.path.isfile(ancillary_chla):
                do_comp = False
            if not os.path.isfile(ancillary_par):
                do_comp = False
            if not os.path.isfile(ancillary_sst):
                do_comp = False

            if do_comp is True:
                output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident
                my_inputs = (input_file, ancillary_chla, ancillary_par,
                             ancillary_sst)
                yield (my_inputs, output_file)

    @active_if(activate_pp_comput)
    @files(generate_input_files_pp)
    def modis_pp_comp(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],
                "par_file": input_file[2], \
                "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \
                "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata,
                "output_format": 'GTIFF', \
                "output_type": None, "options": "compress=lzw"}
        raster_image_math.do_compute_primary_production(**args)

    #   ---------------------------------------------------------------------
    #   Climatology (inter-annual average)

    prod = output_prod
    mapset = output_mapset
    new_input_subprod = output_sprod
    version = output_version
    in_prod_ident = functions.set_path_filename_no_date(
        prod, new_input_subprod, mapset, version, ext)
    in_prod_subdir = functions.set_path_sub_directory(prod, new_input_subprod,
                                                      'Derived', version,
                                                      mapset)
    starting_files = es2_data_dir + in_prod_subdir + "*" + in_prod_ident

    output_sprod_group = proc_lists.proc_add_subprod_group(sub_product_group)
    output_sprod = proc_lists.proc_add_subprod(
        output_sprod_clim,
        sub_product_group,
        final=False,
        descriptive_name='Inter-annual Climatology at ' + frequency_string +
        ' frequency',
        description='Inter-annual Climatology at ' + frequency_string +
        ' frequency',
        frequency_id=chla_frequency,
        date_format='MMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident_clim = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir_clim = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_clim + "{MMDD[0]}" +
        out_prod_ident_clim
    ]

    # Fixes ES2-304
    def generate_input_files_pp_stats():

        # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314',
        #                        '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618',
        #                        '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914',
        #                        '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211',
        #                        '1219', '1227']

        MMDD_nonleap_dict = {
            '0101': '0101',
            '0109': '0109',
            '0117': '0117',
            '0125': '0125',
            '0202': '0202',
            '0210': '0210',
            '0218': '0218',
            '0226': '0226',
            '0306': '0305',
            '0314': '0313',
            '0322': '0321',
            '0330': '0329',
            '0407': '0406',
            '0415': '0414',
            '0423': '0422',
            '0501': '0430',
            '0509': '0508',
            '0517': '0516',
            '0525': '0524',
            '0602': '0601',
            '0610': '0609',
            '0618': '0617',
            '0626': '0625',
            '0704': '0703',
            '0712': '0711',
            '0720': '0719',
            '0728': '0727',
            '0805': '0804',
            '0813': '0812',
            '0821': '0820',
            '0829': '0828',
            '0906': '0905',
            '0914': '0913',
            '0922': '0921',
            '0930': '0929',
            '1008': '1007',
            '1016': '1015',
            '1024': '1023',
            '1101': '1031',
            '1109': '1108',
            '1117': '1116',
            '1125': '1124',
            '1203': '1202',
            '1211': '1210',
            '1219': '1218',
            '1227': '1226'
        }
        # for MMDD_nonleap in MMDD_nonleap_list:
        for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items():
            formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident
            nonleap_files = sorted(glob.glob(formatter_in_nonleap))
            formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident
            leap_files = sorted(glob.glob(formatter_in_leap))

            my_inputs = leap_files + nonleap_files
            input_files_unique = list(set(my_inputs))
            output_file = es_constants.processing_dir + output_subdir_clim + os.path.sep + MMDD_nonleap + out_prod_ident_clim
            yield (input_files_unique, output_file)

    if frequency_string != 'monthly':

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_clim_comput)
        @files(generate_input_files_pp_stats)
        def std_yearly_clim(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_avg_image(**args)

    else:

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_clim_comput)
        @collate(starting_files, formatter(formatter_in), formatter_out)
        def std_yearly_clim(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_avg_image(**args)

    # #   ---------------------------------------------------------------------
    # #   Minimum
    output_sprod = proc_lists.proc_add_subprod(
        output_sprod_min,
        sub_product_group,
        final=False,
        descriptive_name='Inter-annual Minimum at ' + frequency_string +
        ' frequency',
        description='Inter-annual Minimum at ' + frequency_string +
        ' frequency',
        frequency_id=chla_frequency,
        date_format='MMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident_min = functions.set_path_filename_no_date(
        prod, output_sprod_min, mapset, version, ext)
    output_subdir_min = functions.set_path_sub_directory(
        prod, output_sprod_min, 'Derived', version, mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_min + "{MMDD[0]}" +
        out_prod_ident_min
    ]

    def generate_input_files_pp_stats_min():

        # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314',
        #                        '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618',
        #                        '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914',
        #                        '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211',
        #                        '1219', '1227']

        MMDD_nonleap_dict = {
            '0101': '0101',
            '0109': '0109',
            '0117': '0117',
            '0125': '0125',
            '0202': '0202',
            '0210': '0210',
            '0218': '0218',
            '0226': '0226',
            '0306': '0305',
            '0314': '0313',
            '0322': '0321',
            '0330': '0329',
            '0407': '0406',
            '0415': '0414',
            '0423': '0422',
            '0501': '0430',
            '0509': '0508',
            '0517': '0516',
            '0525': '0524',
            '0602': '0601',
            '0610': '0609',
            '0618': '0617',
            '0626': '0625',
            '0704': '0703',
            '0712': '0711',
            '0720': '0719',
            '0728': '0727',
            '0805': '0804',
            '0813': '0812',
            '0821': '0820',
            '0829': '0828',
            '0906': '0905',
            '0914': '0913',
            '0922': '0921',
            '0930': '0929',
            '1008': '1007',
            '1016': '1015',
            '1024': '1023',
            '1101': '1031',
            '1109': '1108',
            '1117': '1116',
            '1125': '1124',
            '1203': '1202',
            '1211': '1210',
            '1219': '1218',
            '1227': '1226'
        }
        # for MMDD_nonleap in MMDD_nonleap_list:
        for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items():
            formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident
            nonleap_files = sorted(glob.glob(formatter_in_nonleap))
            formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident
            leap_files = sorted(glob.glob(formatter_in_leap))

            my_inputs = leap_files + nonleap_files
            input_files_unique = list(set(my_inputs))
            output_file = es_constants.processing_dir + output_subdir_min + os.path.sep + MMDD_nonleap + out_prod_ident_min
            yield (input_files_unique, output_file)

    if frequency_string != 'monthly':

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_min_comput)
        @files(generate_input_files_pp_stats_min)
        def std_yearly_min(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_min_image(**args)

    else:

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_min_comput)
        @collate(starting_files, formatter(formatter_in), formatter_out)
        def std_yearly_min(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_min_image(**args)

    # #   ---------------------------------------------------------------------
    # #   Monthly Maximum
    output_sprod = proc_lists.proc_add_subprod(
        output_sprod_max,
        sub_product_group,
        final=False,
        descriptive_name='Inter-annual Maximum at ' + frequency_string +
        ' frequency',
        description='Inter-annual Maximum at ' + frequency_string +
        ' frequency',
        frequency_id=chla_frequency,
        date_format='MMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident_max = functions.set_path_filename_no_date(
        prod, output_sprod_max, mapset, version, ext)
    output_subdir_max = functions.set_path_sub_directory(
        prod, output_sprod_max, 'Derived', version, mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_max + "{MMDD[0]}" +
        out_prod_ident_max
    ]

    def generate_input_files_pp_stats_max():

        MMDD_nonleap_dict = {
            '0101': '0101',
            '0109': '0109',
            '0117': '0117',
            '0125': '0125',
            '0202': '0202',
            '0210': '0210',
            '0218': '0218',
            '0226': '0226',
            '0306': '0305',
            '0314': '0313',
            '0322': '0321',
            '0330': '0329',
            '0407': '0406',
            '0415': '0414',
            '0423': '0422',
            '0501': '0430',
            '0509': '0508',
            '0517': '0516',
            '0525': '0524',
            '0602': '0601',
            '0610': '0609',
            '0618': '0617',
            '0626': '0625',
            '0704': '0703',
            '0712': '0711',
            '0720': '0719',
            '0728': '0727',
            '0805': '0804',
            '0813': '0812',
            '0821': '0820',
            '0829': '0828',
            '0906': '0905',
            '0914': '0913',
            '0922': '0921',
            '0930': '0929',
            '1008': '1007',
            '1016': '1015',
            '1024': '1023',
            '1101': '1031',
            '1109': '1108',
            '1117': '1116',
            '1125': '1124',
            '1203': '1202',
            '1211': '1210',
            '1219': '1218',
            '1227': '1226'
        }
        # for MMDD_nonleap in MMDD_nonleap_list:
        for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items():
            formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident
            nonleap_files = sorted(glob.glob(formatter_in_nonleap))
            formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident
            leap_files = sorted(glob.glob(formatter_in_leap))

            my_inputs = leap_files + nonleap_files
            input_files_unique = list(set(my_inputs))
            output_file = es_constants.processing_dir + output_subdir_max + os.path.sep + MMDD_nonleap + out_prod_ident_max
            yield (input_files_unique, output_file)

    if frequency_string != 'monthly':

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_max_comput)
        @files(generate_input_files_pp_stats_max)
        def std_yearly_max(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_max_image(**args)

    else:

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_max_comput)
        @collate(starting_files, formatter(formatter_in), formatter_out)
        def std_yearly_max(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_max_image(**args)
Exemplo n.º 22
0
def get_archives_eumetcast_ftp():

    # Ad-hoc definitions (to be copied to settings file)
    source_id = 'MESA:JRC:Archives'
    filter_expression_mesa_jrc = 'MESA_JRC_.*.tif'

    # Get Access credentials
    ftp_eumetcast_url = es_constants.es2globals['ftp_eumetcast_url']
    ftp_eumetcast_userpwd = es_constants.es2globals['ftp_eumetcast_userpwd']

    # Define a file_handler logger 'source-specific' (for GUI)
    logger_spec = log.my_logger('apps.get_archives_eumetcast')
    logger.info("Retrieving MESA_JRC files from PC1.")

    if sys.platform == 'win32':
        source_id = source_id.replace(':', '_')  #Pierluigi
    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
        source_id) + '.list'
    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
        source_id) + '.info'

    # Create objects for list and info
    processed_list = []
    processed_info = {
        'length_proc_list': 0,
        'time_latest_exec': datetime.datetime.now(),
        'time_latest_copy': datetime.datetime.now()
    }

    logger.debug("Loading the processed file list for source %s ", source_id)

    # Restore/Create List
    processed_list = functions.restore_obj_from_pickle(
        processed_list, processed_list_filename)
    # Restore/Create Info
    processed_info = functions.restore_obj_from_pickle(
        processed_info, processed_info_filename)
    # Update processing time (in case it is restored)
    processed_info['time_latest_exec'] = datetime.datetime.now()

    logger.debug("Create current list of file to process for trigger %s.",
                 source_id)
    try:
        current_list = get_list_matching_files(ftp_eumetcast_url,
                                               ftp_eumetcast_userpwd,
                                               filter_expression_mesa_jrc,
                                               'ftp',
                                               my_logger=logger_spec)
    except:
        logger.error("Cannot connect to the PC1 via ftp. Wait 1 minute")
        current_list = []
        time.sleep(60)

    logger_spec.info("Number of files currently on PC1 for trigger %s is %i",
                     source_id, len(current_list))

    if len(current_list) > 0:

        #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
        logger_spec.debug(
            "Number of files already copied for trigger %s is %i", source_id,
            len(processed_list))
        listtoprocess = []
        listtoprocess = set(current_list) - set(processed_list)
        #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
        logger_spec.info("Number of files to be copied for trigger %s is %i",
                         source_id, len(listtoprocess))
        if listtoprocess != set([]):
            logger_spec.debug("Loop on the found files.")
            for filename in list(listtoprocess):
                try:
                    result = get_file_from_url(
                        str(ftp_eumetcast_url) + os.path.sep + filename,
                        target_file=os.path.basename(filename),
                        target_dir=es_constants.ingest_dir,
                        userpwd=str(ftp_eumetcast_userpwd))
                    if not result:
                        logger_spec.info("File %s copied.", filename)
                        processed_list.append(filename)
                    else:
                        logger_spec.warning("File %s not copied: ", filename)
                except:
                    logger_spec.warning("Problem while copying file: %s.",
                                        filename)
        else:
            logger.debug("Nothing to process - go to next trigger.")
            pass

    for infile in processed_list:
        if not infile in current_list:
            processed_list.remove(infile)

    functions.dump_obj_to_pickle(processed_list, processed_list_filename)
    functions.dump_obj_to_pickle(processed_info, processed_info_filename)
Exemplo n.º 23
0
_author__ = "Marco Clerici"

import sys

from config import es_constants
from apps.acquisition import acquisition
from lib.python import es_logging as log
logger = log.my_logger("apps.acquisition.get_eumetcast")

try:
    command = str(sys.argv[1])
except:
    logger.fatal("An argument should be provided: status/start/stop")
    exit(1)

# Define pid file and create daemon
pid_file = es_constants.get_eumetcast_pid_filename
daemon = acquisition.GetEumetcastDaemon(pid_file, dry_run=False)

if command == "status":
    status = daemon.status()
    print("Current status of the Service: %s" % status)

if command == "start":
    logger.info("Starting Get EUMETCast service")
    daemon.start()

if command == "stop":
    logger.info("Stopping Get EUMETCast service")
    daemon.stop()
Exemplo n.º 24
0
def push_data_ftp(dry_run=False,
                  user=None,
                  psw=None,
                  url=None,
                  trg_dir=None,
                  masked=True):

    #   Synchronized data towards an ftp server (only for JRC)
    #   It replaces, since the new srv-ies-ftp.jrc.it ftp is set, the bash script: mirror_to_ftp.sh
    #   Configuration:  it looks at all 'non-masked' products and pushes them
    #                   For the mapsets, find what is in the filesystem, and pushes only the 'largest'
    #   It uses a command like:
    #       lftp -e "mirror -RLe /data/processing/vgt-ndvi/sv2-pv2.1/SPOTV-Africa-1km/derived/10dmax-linearx2/
    #                            /narma/eStation_2.0/processing/vgt-ndvi/sv2-pv2.1/SPOTV-Africa-1km/derived/10dmax-linearx2/;exit"
    #                            -u narma:JRCVRw2960 sftp://srv-ies-ftp.jrc.it"" >> /eStation2/log/push_data_ftp.log
    #

    spec_logger = log.my_logger('apps.es2system.push_data_ftp')

    try:
        from config import server_ftp
    except:
        logger.warning('Configuration file for ftp sync not found. Exit')
        return 1

    if user is None:
        user = server_ftp.server['user']
    if psw is None:
        psw = server_ftp.server['psw']
    if url is None:
        url = server_ftp.server['url']
    if trg_dir is None:
        trg_dir = server_ftp.server['data_dir']

    # Create an ad-hoc file for the lftp command output (beside the standard logger)
    logfile = es_constants.es2globals['log_dir'] + 'push_data_ftp.log'
    message = time.strftime(
        "%Y-%m-%d %H:%M") + ' INFO: Running the ftp sync now ... \n'

    logger.debug("Entering routine %s" % 'push_data_ftp')

    # Loop over 'not-masked' products
    products = querydb.get_products(masked=False)
    # produts=products[21:23]               # test a subset
    for row in products:

        prod_dict = functions.row2dict(row)
        productcode = prod_dict['productcode']
        version = prod_dict['version']
        spec_logger.info('Working on product {}/{}'.format(
            productcode, version))

        # TEMP - For testing only
        # if productcode!='vgt-ndvi' or version !='sv2-pv2.2':
        #     continue

        # Check it if is in the list of 'exclusions' defined in ./config/server_ftp.py
        key = '{}/{}'.format(productcode, version)
        skip = False
        if key in server_ftp.exclusions:
            skip = True
            logger.debug('Do not sync for {}/{}'.format(productcode, version))

        p = Product(product_code=productcode, version=version)

        all_prod_mapsets = p.mapsets
        all_prod_subproducts = p.subproducts

        # Check there is at least one mapset and one subproduct
        if all_prod_mapsets.__len__() > 0 and all_prod_subproducts.__len__(
        ) > 0 and not skip:

            # In case of several mapsets, check if there is a 'larger' one
            if len(all_prod_mapsets) > 1:
                mapset_to_use = []
                for my_mapset in all_prod_mapsets:
                    mapset_info = querydb.get_mapset(mapsetcode=my_mapset,
                                                     allrecs=False)
                    if hasattr(mapset_info, "mapsetcode"):
                        my_mapobj = MapSet()
                        my_mapobj.assigndb(my_mapset)

                        larger_mapset = my_mapobj.get_larger_mapset()
                        if larger_mapset is not None:
                            if larger_mapset not in mapset_to_use:
                                mapset_to_use.append(larger_mapset)
                        else:
                            if my_mapset not in mapset_to_use:
                                mapset_to_use.append(my_mapset)
            else:
                mapset_to_use = all_prod_mapsets
            # Loop over existing mapset
            for mapset in mapset_to_use:
                all_mapset_datasets = p.get_subproducts(mapset=mapset)

                # Loop over existing subproducts
                for subproductcode in all_mapset_datasets:
                    # Get info - and ONLY for NOT masked products
                    dataset_info = querydb.get_subproduct(
                        productcode=productcode,
                        version=version,
                        subproductcode=subproductcode,
                        masked=masked)  # -> TRUE means only NOT masked sprods

                    if dataset_info is not None:
                        dataset_dict = functions.row2dict(dataset_info)
                        dataset_dict['mapsetcode'] = mapset

                        logger.debug('Working on {}/{}/{}/{}'.format(
                            productcode, version, mapset, subproductcode))

                        subdir = functions.set_path_sub_directory(
                            productcode, subproductcode,
                            dataset_dict['product_type'], version, mapset)
                        source = data_dir + subdir
                        target = trg_dir + subdir

                        # command = 'lftp -e "mirror -RLe {} {};exit" -u {}:{} {}"" >> {}'.format(source,target,user,psw,url,logfile)
                        command = 'lftp -e "mirror -RLe {} {};exit" -u {}:{} {}"" >> /dev/null'.format(
                            source, target, user, psw, url)
                        logger.debug("Executing %s" % command)
                        spec_logger.info(
                            'Working on mapset/subproduct {}/{} \n'.format(
                                mapset, subproductcode))

                        # return
                        try:
                            status = os.system(command)
                            if status:
                                logger.error("Error in executing %s" % command)
                                spec_logger.error("Error in executing %s" %
                                                  command)
                        except:
                            logger.error(
                                'Error in executing command: {}'.format(
                                    command))
                            spec_logger.error(
                                'Error in executing command: {}'.format(
                                    command))
Exemplo n.º 25
0
from builtins import str
from builtins import object

import sys
import os
import time
import atexit
import io
# import signal
import psutil

if sys.platform != 'win32':
    from signal import SIGKILL, SIGTERM
from lib.python import es_logging as log

logger = log.my_logger("lib.python.daemon")
from config import es_constants

if not os.path.isdir(es_constants.pid_file_dir):
    os.makedirs(es_constants.pid_file_dir)
if not os.path.isdir(es_constants.processed_list_base_dir):
    os.makedirs(es_constants.processed_list_base_dir)
if not os.path.isdir(es_constants.processed_list_eum_dir):
    os.makedirs(es_constants.processed_list_eum_dir)
if not os.path.isdir(es_constants.processed_list_int_dir):
    os.makedirs(es_constants.processed_list_int_dir)


class Daemon(object):
    """
    A generic daemon class.
Exemplo n.º 26
0
def create_pipeline(prod,
                    starting_sprod,
                    native_mapset,
                    target_mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None):

    # Create Logger
    logger = log.my_logger('log.lst')
    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all ON
    activate_1dmax_comput = 1
    activate_10dmax_comput = 1
    activate_10d15min_comput = 1
    activate_10dmin_comput = 1

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files ('lst' subproduct)
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, native_mapset, version, ext)

    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = input_dir + "*" + in_prod_ident

    logger.info("starting_files %s" % starting_files)

    # ----------------------------------------------------------------------------------------------------------------
    # 1dmax
    # Daily maximum from 15min lst, re-projected on target mapset
    output_sprod = proc_lists.proc_add_subprod("1dmax",
                                               "lsasaf-lst",
                                               final=False,
                                               descriptive_name='1d Maximum',
                                               description='Daily Maximum',
                                               frequency_id='e1day',
                                               date_format='YYYYMMDD',
                                               masked=False,
                                               timeseries_role='',
                                               active_default=True)
    output_sprod = '1dmax'
    out_prod_ident_1dmax = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset, version, ext)
    output_subdir_1dmax = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, target_mapset)

    formatter_in_1dmax = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident
    formatter_out_1dmax = "{subpath[0][5]}" + os.path.sep + output_subdir_1dmax + "{YYYYMMDD[0]}" + out_prod_ident_1dmax
    #
    @active_if(activate_1dmax_comput)
    @collate(starting_files, formatter(formatter_in_1dmax),
             formatter_out_1dmax)
    def lsasaf_lst_1dmax(input_file, output_file):
        #
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        tmpdir = tempfile.mkdtemp(prefix=__name__,
                                  suffix='',
                                  dir=es_constants.base_tmp_dir)

        tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file)

        args = {
            "input_file": input_file,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }

        raster_image_math.do_max_image(**args)

        reproject_output(tmp_output_file, native_mapset, target_mapset)

        shutil.rmtree(tmpdir)

    # ----------------------------------------------------------------------------------------------------------------
    # 10dmax
    # 10 Day maximum from daily max, on target mapset
    output_sprod = proc_lists.proc_add_subprod("10dmax",
                                               "lsasaf-lst",
                                               final=False,
                                               descriptive_name='10d Maximum',
                                               description='10d Maximum',
                                               frequency_id='e1dekad',
                                               date_format='YYYYMMDD',
                                               masked=False,
                                               timeseries_role='',
                                               active_default=True)
    output_sprod_10dmax = '10dmax'
    out_prod_ident_10dmax = functions.set_path_filename_no_date(
        prod, output_sprod_10dmax, target_mapset, version, ext)
    output_subdir_10dmax = functions.set_path_sub_directory(
        prod, output_sprod_10dmax, 'Derived', version, target_mapset)

    # #   Define input files
    in_prod_10dmax = '1dmax'
    in_prod_ident_10dmax = functions.set_path_filename_no_date(
        prod, in_prod_10dmax, target_mapset, version, ext)
    #
    input_dir_10dmax = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, in_prod_10dmax, 'Derived', version, target_mapset)
    #
    starting_files_10dmax = input_dir_10dmax + "*" + in_prod_ident_10dmax

    #
    def generate_parameters_10dmax():

        #   Look for all input files in input_dir, and sort them
        input_files = glob.glob(starting_files_10dmax)
        dekad_list = []

        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        for dekad in dekad_list:
            # Exclude the current dekad
            if dekad != dekad_now:
                file_list = []
                my_dekad_str = functions.conv_dekad_2_date(dekad)
                for input_file in input_files:

                    basename = os.path.basename(input_file)
                    mydate_yyyymmdd = functions.get_date_from_path_filename(
                        basename)
                    mydekad_nbr = functions.conv_date_2_dekad(
                        mydate_yyyymmdd[0:8])
                    if mydekad_nbr == dekad:
                        file_list.append(input_file)

                    output_file = es_constants.processing_dir + output_subdir_10dmax + os.path.sep + my_dekad_str + out_prod_ident_10dmax

                yield (file_list, output_file)


#

    @active_if(activate_10dmax_comput)
    @files(generate_parameters_10dmax)
    def lsasaf_lst_10dmax(input_file, output_file):
        #
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }

        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   Dekad maximum for every 15min
    #   NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset

    output_sprod = proc_lists.proc_add_subprod(
        "10d15min",
        "lsasaf-lst",
        final=False,
        descriptive_name='10day Maximum over 15 min',
        description='10day Maximum computed for every 15 min',
        frequency_id='e15minute',  # Is it OK ???????
        date_format='YYYYMMDDHHMM',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, native_mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     native_mapset)

    def generate_parameters_10d15min():

        #   Look for all input files in input_dir, and sort them
        input_files = glob.glob(starting_files)
        dekad_list = []
        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        # Generate the list of 30 min time in a day
        timelist = [
            datetime.time(h, m).strftime("%H%M")
            for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 15))
        ]

        for time in timelist:
            files_for_time = glob.glob(input_dir + os.path.sep + '*' + time +
                                       in_prod_ident)
            for dekad in dekad_list:
                # Exclude the current dekad
                if dekad != dekad_now:
                    file_list = []
                    my_dekad_str = functions.conv_dekad_2_date(dekad)
                    output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident

                    for myfile in files_for_time:
                        basename = os.path.basename(myfile)
                        mydate_yyyymmdd = functions.get_date_from_path_filename(
                            basename)
                        mydekad_nbr = functions.conv_date_2_dekad(
                            mydate_yyyymmdd[0:8])
                        if mydekad_nbr == dekad:
                            file_list.append(myfile)
                    if len(file_list) > 8:
                        yield (file_list, output_file)

    @active_if(activate_10d15min_comput)
    @files(generate_parameters_10d15min)
    def lsasaf_lst_10d15min(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
                "options": "compress=lzw", "input_nodata":-32768}

        raster_image_math.do_max_image(**args)

        # Do also the house-keeping, by deleting the files older than 6 months
        number_months_keep = 6
        remove_old_files(prod, starting_sprod, version, native_mapset,
                         'Ingest', number_months_keep)

    # ----------------------------------------------------------------------------------------------------------------
    #   10 day minimum (mm)
    #   NOTE: this product is compute with re-projection, i.e. on the 'target' mapset

    output_sprod = proc_lists.proc_add_subprod(
        "10dmin",
        "lsasaf-et",
        final=False,
        descriptive_name='10day Minimum',
        description='10day minimum',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident_10dmin = functions.set_path_filename_no_date(
        prod, "10dmin", target_mapset, version, ext)
    output_subdir_10dmin = functions.set_path_sub_directory(
        prod, "10dmin", 'Derived', version, target_mapset)

    #   Define input files
    in_prod_10dmin = '10d15min'
    in_prod_ident_10dmin = functions.set_path_filename_no_date(
        prod, in_prod_10dmin, native_mapset, version, ext)

    input_dir_10dmin = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, in_prod_10dmin, 'Derived', version, native_mapset)

    starting_files_10dmin = input_dir_10dmin + "*" + in_prod_ident_10dmin

    formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dmin
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_10dmin +
        "{YYYYMMDD[0]}" + out_prod_ident_10dmin
    ]

    @follows(lsasaf_lst_10d15min)
    @active_if(activate_10dmin_comput)
    @collate(starting_files_10dmin, formatter(formatter_in), formatter_out)
    def lsasaf_lst_10dmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        # Get the number of days of that dekad
        basename = os.path.basename(output_file)
        mydate = functions.get_date_from_path_filename(basename)
        functions.check_output_dir(os.path.dirname(output_file))

        tmpdir = tempfile.mkdtemp(prefix=__name__,
                                  suffix='',
                                  dir=es_constants.base_tmp_dir)

        tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file)

        args = {
            "input_file": input_file,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }

        raster_image_math.do_min_image(**args)

        reproject_output(tmp_output_file, native_mapset, target_mapset)

        shutil.rmtree(tmpdir)

        # Do also the house-keeping, by deleting the files older than 6 months
        number_months_keep = 6
        remove_old_files(prod, '10d15min', version, native_mapset, 'Ingest',
                         number_months_keep)

    return proc_lists
Exemplo n.º 27
0
def loop_eumetcast(dry_run=False):

    global processed_list_filename, processed_list
    global processed_info_filename, processed_info

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGILL, signal_handler)

    logger.info("Starting retrieving EUMETCast data.")

    while True:
        logger.debug("Check if the EUMETCast input directory : %s exists.",
                     input_dir)
        if not os.path.exists(input_dir):
            logger.error(
                "The EUMETCast input directory : %s is not yet mounted.",
                input_dir)

        logger.debug("Check if the Ingest Server input directory : %s exists.",
                     output_dir)
        if not os.path.exists(output_dir):
            logger.fatal(
                "The Ingest Server input directory : %s doesn't exists.",
                output_dir)
            exit(1)

        if not os.path.exists(es_constants.base_tmp_dir):
            os.mkdir(es_constants.base_tmp_dir)

        if not os.path.exists(es_constants.processed_list_base_dir):
            os.mkdir(es_constants.processed_list_base_dir)

        if not os.path.exists(es_constants.processed_list_eum_dir):
            os.mkdir(es_constants.processed_list_eum_dir)

        while 1:
            try:
                time_sleep = user_def_sleep
                logger.debug("Sleep time set to : %s.", time_sleep)
            except:
                logger.warning(
                    "Sleep time not defined. Setting to default=1min. Continue."
                )
                time_sleep = 60

            # try:
            logger.debug("Reading active EUMETCAST data sources from database")
            eumetcast_sources_list = querydb.get_eumetcast_sources()
            logger.debug("N. %i active EUMETCAST data sources found",
                         len(eumetcast_sources_list))

            # Get the EUMETCast MESA_JRC files
            try:
                get_archives_eumetcast()
            except:
                logger.error(
                    "Error in executing get_archives_eumetcast. Continue")

            # Loop over active triggers
            for eumetcast_source in eumetcast_sources_list:

                # Define a file_handler logger 'source-specific' (for GUI)
                logger_spec = log.my_logger('apps.get_eumetcast.' +
                                            eumetcast_source.eumetcast_id)
                logger.info("Processing eumetcast source  %s.",
                            eumetcast_source.eumetcast_id)

                if sys.platform == 'win32':  # Pierluigi
                    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id).replace(':',
                                                               '_') + '.list'
                    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id).replace(':',
                                                               '_') + '.info'

                else:
                    processed_list_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id) + '.list'
                    processed_info_filename = es_constants.get_eumetcast_processed_list_prefix + str(
                        eumetcast_source.eumetcast_id) + '.info'

                # Create objects for list and info
                processed_list = []
                processed_info = {
                    'length_proc_list': 0,
                    'time_latest_exec': datetime.datetime.now(),
                    'time_latest_copy': datetime.datetime.now()
                }

                logger.debug("Loading the processed file list for source %s ",
                             eumetcast_source.eumetcast_id)

                # Restore/Create List
                processed_list = functions.restore_obj_from_pickle(
                    processed_list, processed_list_filename)
                # Restore/Create Info
                processed_info = functions.restore_obj_from_pickle(
                    processed_info, processed_info_filename)
                # Update processing time (in case it is restored)
                processed_info['time_latest_exec'] = datetime.datetime.now()

                logger.debug(
                    "Create current list of file to process for trigger %s.",
                    eumetcast_source.eumetcast_id)
                current_list = find_files(
                    input_dir, eumetcast_source.filter_expression_jrc)
                #logger.debug("Number of files currently on PC1 for trigger %s is %i", eumetcast_source.eumetcast_id, len(current_list))
                logger_spec.info(
                    "Number of files currently on PC1 for trigger %s is %i",
                    eumetcast_source.eumetcast_id, len(current_list))
                if len(current_list) > 0:

                    #logger.debug("Number of files already copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(processed_list))
                    logger_spec.debug(
                        "Number of files already copied for trigger %s is %i",
                        eumetcast_source.eumetcast_id, len(processed_list))
                    listtoprocess = []
                    listtoprocess = set(current_list) - set(processed_list)
                    #logger.debug("Number of files to be copied for trigger %s is %i", eumetcast_source.eumetcast_id, len(listtoprocess))
                    logger_spec.debug(
                        "Number of files to be copied for trigger %s is %i",
                        eumetcast_source.eumetcast_id, len(listtoprocess))
                    if listtoprocess != set([]):
                        logger_spec.debug("Loop on the found files.")
                        for filename in list(listtoprocess):
                            if os.path.isfile(os.path.join(
                                    input_dir, filename)):
                                if os.stat(os.path.join(
                                        input_dir, filename)).st_mtime < int(
                                            time.time()):
                                    logger_spec.debug(
                                        "Processing file: " +
                                        os.path.basename(filename))
                                    if not dry_run:
                                        if subprocess.getstatusoutput(
                                                "cp " + filename + " " +
                                                output_dir + os.sep +
                                                os.path.basename(filename)
                                        )[0] == 0:
                                            logger_spec.info(
                                                "File %s copied.", filename)
                                            processed_list.append(filename)
                                            # Update processing info
                                            processed_info[
                                                'time_latest_copy'] = datetime.datetime.now(
                                                )
                                            processed_info[
                                                'length_proc_list'] = len(
                                                    processed_list)
                                        else:
                                            logger_spec.warning(
                                                "Problem while copying file: %s.",
                                                filename)
                                    else:
                                        logger_spec.info(
                                            'Dry_run is set: do not get files')
                            else:
                                logger_spec.error(
                                    "File %s removed by the system before being processed.",
                                    filename)
                    else:
                        logger.debug(
                            "Nothing to process - go to next trigger.")
                        pass

                for infile in processed_list:
                    if not os.path.exists(infile):
                        processed_list.remove(infile)

                if not dry_run:
                    functions.dump_obj_to_pickle(processed_list,
                                                 processed_list_filename)
                    functions.dump_obj_to_pickle(processed_info,
                                                 processed_info_filename)

            logger.info("End of Get EUMETCast loop. Sleep")
            time.sleep(float(time_sleep))

    exit(0)
Exemplo n.º 28
0
def processing_std_msg_mpe(res_queue,
                           pipeline_run_level=0,
                           pipeline_printout_level=0,
                           pipeline_printout_graph_level=0,
                           prod='',
                           starting_sprod='',
                           native_mapset='',
                           mapset='',
                           version='',
                           starting_dates=None,
                           write2file=None,
                           logfile=None,
                           day_time=None):

    native_mapset = 'MSG-satellite-3km'
    target_mapset = mapset

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_msg_mpe')

    if day_time is None:
        day_time = '0600'

    proc_lists = None
    proc_lists = create_pipeline(prod,
                                 starting_sprod,
                                 native_mapset,
                                 target_mapset,
                                 version,
                                 starting_dates=starting_dates,
                                 proc_lists=proc_lists,
                                 day_time=day_time,
                                 logger=spec_logger)
    if write2file is not None:
        fwrite_id = open(write2file, 'w')
    else:
        fwrite_id = None

    if pipeline_run_level > 0:
        spec_logger.info("Run the pipeline %s" % 'processing_std_msg_mpe')
        # Option to be added to pipeline_run to force files to appear up-to-date: touch_files_only = True
        pipeline_run(verbose=pipeline_run_level,
                     logger=spec_logger,
                     log_exceptions=spec_logger,
                     history_file=os.path.join(
                         es_constants.log_dir,
                         '.ruffus_history_msg_mpe.sqlite'),
                     checksum_level=0)
        tasks = pipeline_get_task_names()
        spec_logger.info("After running the pipeline %s" %
                         'processing_std_msg_mpe')

    if pipeline_printout_level > 0:
        pipeline_printout(verbose=pipeline_printout_level,
                          output_stream=fwrite_id)

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    if write2file is not None:
        fwrite_id.close()

    return True
Exemplo n.º 29
0
import time

# import eStation2 modules
from lib.python import functions
from lib.python import es_logging as log
from config import es_constants
from database import querydb
from apps.es2system.GeoPortal import eStationTools as esTools
from apps.es2system.GeoPortal import geoserverREST
from apps.productmanagement import datasets
from apps.productmanagement import products
from lib.python.daemon import DaemonDryRunnable

standard_library.install_aliases()

logger = log.my_logger(__name__)
local_data_dir = es_constants.es2globals['processing_dir']
remote_data_dir = geoserverREST.restBaseDir


def syncGeoserver():
    #
    #   Copy some 'relevant' datasets to GeoServer
    #   Selection of datasets is done on the basis of the product.geoserver table
    #

    # Get list of all 'relevant' subproducts (see 2. above)
    list_active_geoserver = esTools.get_activated_geoserver()

    # Loop over existing sub_products
    for geoserver_sprod in list_active_geoserver:
Exemplo n.º 30
0
def reproject_output(input_file,
                     native_mapset_id,
                     target_mapset_id,
                     output_dir=None,
                     version=None,
                     logger=None):

    # Check logger
    if logger is None:
        logger = log.my_logger(__name__)

    # Check output dir
    if output_dir is None:
        output_dir = es_constants.es2globals['processing_dir']

    # Get the existing dates for the dataset
    logger.debug("Entering routine %s for file %s" %
                 ('reproject_output', input_file))
    ext = es_constants.ES2_OUTFILE_EXTENSION

    # Test the file/files exists
    if not os.path.isfile(input_file):
        logger.error('Input file: %s does not exist' % input_file)
        return 1

    # Instance metadata object (for output_file)
    sds_meta_out = metadata.SdsMetadata()

    # Read metadata from input_file
    sds_meta_in = metadata.SdsMetadata()
    sds_meta_in.read_from_file(input_file)

    # Extract info from input file
    str_date = sds_meta_in.get_item('eStation2_date')
    product_code = sds_meta_in.get_item('eStation2_product')
    sub_product_code = sds_meta_in.get_item('eStation2_subProduct')
    # 22.06.2017 Add the option to force the version
    if version is None:
        version = sds_meta_in.get_item('eStation2_product_version')

    # Define output filename
    sub_dir = sds_meta_in.get_item('eStation2_subdir')
    # Fix a bug for 10davg-linearx2 metadata - and make method more robust
    if re.search('.*derived.*', sub_dir):
        product_type = 'Derived'
    elif re.search('.*tif.*', sub_dir):
        product_type = 'Ingest'
    # product_type = functions.get_product_type_from_subdir(sub_dir)

    out_prod_ident = functions.set_path_filename_no_date(
        product_code, sub_product_code, target_mapset_id, version, ext)
    output_subdir = functions.set_path_sub_directory(product_code,
                                                     sub_product_code,
                                                     product_type, version,
                                                     target_mapset_id)

    output_file = output_dir+\
                  output_subdir +\
                  str_date +\
                  out_prod_ident

    # make sure output dir exists
    output_dir = os.path.split(output_file)[0]
    functions.check_output_dir(output_dir)

    # -------------------------------------------------------------------------
    # Manage the geo-referencing associated to input file
    # -------------------------------------------------------------------------
    orig_ds = gdal.Open(input_file, gdal.GA_Update)

    # Read the data type
    band = orig_ds.GetRasterBand(1)
    out_data_type_gdal = band.DataType

    if native_mapset_id != 'default':
        native_mapset = MapSet()
        native_mapset.assigndb(native_mapset_id)
        orig_cs = osr.SpatialReference(
            wkt=native_mapset.spatial_ref.ExportToWkt())

        # Complement orig_ds info (necessary to Re-project)
        try:
            #orig_ds.SetGeoTransform(native_mapset.geo_transform)
            orig_ds.SetProjection(orig_cs.ExportToWkt())
        except:
            logger.debug('Cannot set the geo-projection .. Continue')
    else:
        try:
            # Read geo-reference from input file
            orig_cs = osr.SpatialReference()
            orig_cs.ImportFromWkt(orig_ds.GetProjectionRef())
        except:
            logger.debug('Cannot read geo-reference from file .. Continue')

    # TODO-M.C.: add a test on the mapset-id in DB table !
    trg_mapset = MapSet()
    trg_mapset.assigndb(target_mapset_id)
    logger.debug('Target Mapset is: %s' % target_mapset_id)

    # -------------------------------------------------------------------------
    # Generate the output file
    # -------------------------------------------------------------------------
    # Prepare output driver
    out_driver = gdal.GetDriverByName(es_constants.ES2_OUTFILE_FORMAT)

    logger.debug('Doing re-projection to target mapset: %s' %
                 trg_mapset.short_name)
    # Get target SRS from mapset
    out_cs = trg_mapset.spatial_ref
    out_size_x = trg_mapset.size_x
    out_size_y = trg_mapset.size_y

    # Create target in memory
    mem_driver = gdal.GetDriverByName('MEM')

    # Assign mapset to dataset in memory
    mem_ds = mem_driver.Create('', out_size_x, out_size_y, 1,
                               out_data_type_gdal)

    mem_ds.SetGeoTransform(trg_mapset.geo_transform)
    mem_ds.SetProjection(out_cs.ExportToWkt())

    # Apply Reproject-Image to the memory-driver
    orig_wkt = orig_cs.ExportToWkt()
    res = gdal.ReprojectImage(orig_ds, mem_ds, orig_wkt, out_cs.ExportToWkt(),
                              es_constants.ES2_OUTFILE_INTERP_METHOD)

    logger.debug('Re-projection to target done.')

    # Read from the dataset in memory
    out_data = mem_ds.ReadAsArray()

    # Write to output_file
    trg_ds = out_driver.CreateCopy(output_file, mem_ds, 0,
                                   [es_constants.ES2_OUTFILE_OPTIONS])
    trg_ds.GetRasterBand(1).WriteArray(out_data)

    # -------------------------------------------------------------------------
    # Assign Metadata to the ingested file
    # -------------------------------------------------------------------------
    # Close dataset
    trg_ds = None

    sds_meta_out.assign_es2_version()
    sds_meta_out.assign_mapset(target_mapset_id)
    sds_meta_out.assign_from_product(product_code, sub_product_code, version)
    sds_meta_out.assign_date(str_date)
    sds_meta_out.assign_subdir_from_fullpath(output_dir)
    sds_meta_out.assign_comput_time_now()
    # Copy the same input files as in the non-reprojected input
    file_list = sds_meta_in.get_item('eStation2_input_files')
    sds_meta_out.assign_input_files(file_list)

    # Write metadata to file
    sds_meta_out.write_to_file(output_file)

    # Return the filename
    return output_file
Exemplo n.º 31
0
from lib.python import functions
from lib.python import metadata
from lib.python.image_proc import raster_image_math
from lib.python.image_proc import recode
from database import crud
from database import querydb
from lib.python import es_logging as log

# This is temporary .. to be replace with a DB call
from apps.processing.processing_switches import *

# Import third-party modules
from ruffus import *

logger = log.my_logger(__name__)

#   General definitions for this processing chain
prod="modis-pp"
mapset='MODIS-IOC-4km'
ext='.tif'
version='undefined'

# Primary Production Monthly
activate_pp_1mon_comput=1


def create_pipeline(starting_sprod):

    #   ---------------------------------------------------------------------
    #   Define input files: Chla is the 'driver', sst,kd and par 'ancillary inputs'
Exemplo n.º 32
0
def processing_std_dmp(res_queue,
                       pipeline_run_level=0,
                       pipeline_printout_level=0,
                       pipeline_printout_graph_level=0,
                       prod='',
                       starting_sprod='',
                       mapset='',
                       version='',
                       starting_dates=None,
                       update_stats=False,
                       nrt_products=True,
                       write2file=None,
                       logfile=None,
                       touch_only=False):
    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_std_dmp')

    proc_lists = None
    proc_lists = create_pipeline(prod=prod,
                                 starting_sprod=starting_sprod,
                                 mapset=mapset,
                                 version=version,
                                 starting_dates=starting_dates,
                                 proc_lists=proc_lists,
                                 update_stats=update_stats,
                                 nrt_products=nrt_products)

    if write2file is not None:
        fwrite_id = open(write2file, 'w')
    else:
        fwrite_id = None

    if pipeline_run_level > 0:
        spec_logger.info("Run the pipeline %s" % 'processing_std_dmp')
        pipeline_run(
            touch_files_only=touch_only,
            verbose=pipeline_run_level,
            logger=spec_logger,
            log_exceptions=spec_logger,
            history_file='/eStation2/log/.ruffus_history_{0}_{1}.sqlite'.
            format(prod, starting_sprod))
        tasks = pipeline_get_task_names()
        spec_logger.info("Run the pipeline %s" % tasks[0])
        spec_logger.info("After running the pipeline %s" %
                         'processing_std_dmp')

    if pipeline_printout_level > 0:
        pipeline_printout(
            verbose=pipeline_printout_level,
            output_stream=fwrite_id,
            history_file='/eStation2/log/.ruffus_history_{0}_{1}.sqlite'.
            format(prod, starting_sprod))

    if pipeline_printout_graph_level > 0:
        pipeline_printout_graph('flowchart.jpg')

    if write2file is not None:
        fwrite_id.close()

    # res_queue.put(proc_lists)
    return True