示例#1
0
    def test_get_list_dates_for_dataset(self):
        productcode = 'lsasaf-et'
        productversion = 'undefined'
        subproductcode = '10d30min'
        input_file = self.test_procfunc_dir+os.path.sep+productcode+os.path.sep+subproductcode+os.path.sep+'202004010000_lsasaf-et_10d30min_MSG-satellite-3km_undefined.tif'
        start_date = 202004010000
        end_date = 202004010120
        proc_functions.get_list_dates_for_dataset(productcode, subproductcode, productversion,start_date, end_date)

        self.assertEqual(1, 1)
def my_proc_std_modis_firms(start_date=None,
                            end_date=None,
                            pipe_run=0,
                            pipe_print=3,
                            start_date_stats=None,
                            end_date_stats=None,
                            touch_files_only=False):

    # Create the list of dates -> returns empty if start==end==None
    if start_date is not None and end_date is not None:
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'modis-firms',
            '1day',
            'v6.0',
            start_date=start_date,
            end_date=end_date)
    else:
        starting_dates = None

    if start_date_stats is not None and end_date_stats is not None:
        starting_dates_stats = proc_functions.get_list_dates_for_dataset(
            'modis-firms',
            '10dcount',
            'v6.0',
            start_date=start_date_stats,
            end_date=end_date_stats)
    else:
        starting_dates_stats = None

    target_mapset = 'SPOTV-Africa-1km'

    touch_files_only = False

    args = {
        'pipeline_run_level': pipe_run,
        'pipeline_printout_level': pipe_print,
        'pipeline_printout_graph_level': 0,
        'prod': 'modis-firms',
        'starting_sprod': '1day',
        'starting_dates': starting_dates,
        'starting_dates_stats': starting_dates_stats,
        'mapset': target_mapset,
        'version': 'v6.0',
        'logfile': 'log-modis-firms.log',
        'update_stats': True,
        'nrt_products': True,
        'touch_files_only': touch_files_only
    }

    res_queue = None
    proc_lists = processing_std_modis_firms(res_queue, **args)
    print(proc_lists)
def my_proc_std_spi_monthly(pipe_run=0,
                            pipe_print=3,
                            start_date=None,
                            end_date=None,
                            touch_files_only=False):
    start_date = '19830301'
    end_date = '19830310'
    starting_dates = proc_functions.get_list_dates_for_dataset(
        'arc2-rain', '1day', '2.0', start_date=start_date, end_date=end_date)
    starting_dates = None
    mapset = 'ARC2-Africa-11km'
    # #
    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'arc2-rain',\
            'starting_sprod':'1mon',\
            'starting_dates': starting_dates,\
            'mapset': mapset,\
            'version':'2.0',
            'logfile':'log-arc2-rain.log'}

    res_queue = None
    proc_lists = processing_std_spi_monthly(res_queue, **args)
    print(proc_lists)
def my_proc_std_rain_onset(pipe_run=0,
                           pipe_print=3,
                           start_date=None,
                           end_date=None,
                           touch_files_only=False):

    #   ---------------------------------------------------------------------
    # onset computation
    #   ---------------------------------------------------------------------

    start_date = '20160901'
    end_date = '20161011'
    starting_dates = proc_functions.get_list_dates_for_dataset(
        'fewsnet-rfe', '10d', '2.0', start_date=start_date, end_date=end_date)

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'fewsnet-rfe',\
            'starting_sprod':'10d',\
            'mapset': 'FEWSNET-Africa-8km',\
            'version':'2.0',
            'logfile':'rain-onset',
            'starting_dates':starting_dates
            }
    res_queue = None
    processing_std_rain_onset(res_queue, **args)
def my_proc_std_lsasaf_et(pipe_run=3,
                          pipe_print=0,
                          start_date=None,
                          end_date=None,
                          touch_files_only=False):

    # # Create the list of dates -> returns empty if start==end==None
    if (start_date) or (end_date):
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'lsasaf-et',
            'et',
            'undefined',
            start_date=start_date,
            end_date=end_date)
    else:
        starting_dates = None

    native_mapset = 'MSG-satellite-3km'
    target_mapset = 'SPOTV-Africa-1km'

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'lsasaf-et',\
            'starting_sprod':'et',\
            'starting_dates': starting_dates,\
            'native_mapset': native_mapset,\
            'mapset': target_mapset,\
            'version':'undefined',
            'logfile':'log-lsasaf-et.log'}

    res_queue = None
    proc_lists = processing_std_lsasaf_et(res_queue, **args)
    print(proc_lists)
def my_proc_fewsnet_rfe(pipe_run=0,
                        pipe_print=3,
                        start_date=None,
                        end_date=None,
                        touch_files_only=False):

    # Create the list of dates -> returns empty if start==end==None
    if start_date is not None and end_date is not None:
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'fewsnet-rfe',
            '10d',
            '2.0',
            start_date=start_date,
            end_date=end_date)
    else:
        starting_dates = None

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'fewsnet-rfe',\
            'starting_sprod':'10d',\
            'starting_dates': starting_dates,\
            'mapset': 'FEWSNET-Africa-8km',\
            'version':'2.0',
            'logfile':'log-fewsnet.log'}

    res_queue = None
    proc_lists = processing_std_precip_stats_only(res_queue, **args)
    print(proc_lists)
def my_proc_std_ndvi(pipe_run=0, pipe_print=3, touch_files_only=False):
    #(pipe_run=0, pipe_print=3, start_date=None, end_date=None, touch_files_only=False):

    productcode = 'vgt-ndvi'
    subproductcode = 'ndv'
    version = 'sv2-pv2.2'
    start_date = '20180101'
    end_date = None

    list_dates = proc_functions.get_list_dates_for_dataset(
        productcode,
        subproductcode,
        version,
        start_date=start_date,
        end_date=end_date)

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': productcode,\
            'starting_sprod':subproductcode,\
            'mapset': 'SPOTV-Africa-1km',\
            'version': version,
            'starting_dates': list_dates,
            'logfile':'test_processing_ndvi',
            'touch_files_only':touch_files_only}

    #res_queue = Queue()
    res_queue = None
    proc_lists = processing_std_ndvi_prods_only(res_queue, **args)
def test_subprocess_vgt_lai(pipe_run=4, pipe_print=0, touch_files_only=False):
    #     # Create the list of dates -> returns empty if start==end==None
    start_date = '19990101'
    end_date = '20181221'

    if start_date is not None and end_date is not None:
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'vgt-lai', 'lai', 'V2.0', start_date=start_date, end_date=end_date)
    else:
        starting_dates = None

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'vgt-lai',\
            'starting_sprod':'lai',\
            'mapset': 'SPOTV-Africa-1km',\
            'version':'V2.0',
            'logfile':'vgt-lai',
            'starting_dates':starting_dates,
            'upsert_db' : False,
            'touch_only':touch_files_only
            }
    res_queue = None

    processing_std_vgt_stats_only(res_queue, **args)
def my_proc_msg_mpe(start_date=None,
                    end_date=None,
                    pipe_run=0,
                    pipe_print=3,
                    start_date_stats=None,
                    end_date_stats=None,
                    touch_files_only=False):

    # Create the list of dates -> returns empty if start==end==None
    if start_date is not None and end_date is not None:
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'msg-mpe',
            '10dcum',
            'undefined',
            start_date=start_date,
            end_date=end_date)
    else:
        starting_dates = None

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'msg-mpe',\
            'starting_sprod':'10dcum',\
            'starting_dates': starting_dates,\
            'mapset': 'SPOTV-Africa-1km',\
            'version':'undefined',
            'logfile':'ruffus-chirps',
            'touch_only':touch_files_only}

    res_queue = None
    proc_lists = processing_std_msg_mpe(res_queue, **args)
    print(proc_lists)
示例#10
0
def my_proc_olci_wrr(start_date=None,
                     end_date=None,
                     pipe_run=0,
                     pipe_print=3,
                     start_date_stats=None,
                     end_date_stats=None,
                     touch_files_only=False):

    # Create the list of dates -> returns empty if start==end==None
    if start_date is not None and end_date is not None:
        #starting_dates = proc_functions.get_list_dates_for_dataset('olci-wrr', 'chl-nn', 'V02.0', start_date=start_date, end_date=end_date)
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'olci-wrr',
            'chl-oc4me',
            'V02.0',
            start_date=start_date,
            end_date=end_date)
    else:
        starting_dates = None

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'olci-wrr',\
            'starting_sprod':'chl-oc4me',\
            'starting_dates': starting_dates,\
            'mapset': 'SPOTV-Africa-1km',\
            'version':'V02.0',
            'logfile':'ruffus-chirps'}

    res_queue = None
    proc_lists = processing_std_olci_wrr(res_queue, **args)
    print(proc_lists)
示例#11
0
def my_proc_vgt_dmp(pipe_run=0,
                    pipe_print=3,
                    start_date=None,
                    end_date=None,
                    touch_files_only=False):

    # Create the list of dates -> returns empty if start==end==None
    if start_date is not None and end_date is not None:
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'vgt-dmp', 'dmp', 'V2.0', start_date=start_date, end_date=end_date)
    else:
        starting_dates = None

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'vgt-dmp',\
            'starting_sprod':'dmp',\
            'starting_dates': starting_dates,\
            'mapset': 'SPOTV-Africa-1km',\
            'version':'V2.0',
            'logfile':'ruffus-chirps',
            'touch_only':touch_files_only}

    request_queue = Queue()
    proc_lists = processing_std_dmp_all(request_queue, **args)
示例#12
0
def test_subprocess_vgt_fapar(pipe_run=4,
                              pipe_print=0,
                              touch_files_only=False):
    start_date = None
    end_date = None

    if start_date is not None and end_date is not None:
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'vgt-fapar',
            'fapar',
            'V2.0',
            start_date=start_date,
            end_date=end_date)
    else:
        starting_dates = None
    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'vgt-fapar',\
            'starting_sprod':'fapar',\
            'mapset': 'SPOTV-Africa-1km',\
            'version':'V2.0',
            'starting_dates': starting_dates,
            'logfile':'vgt-fapar',
            'upsert_db' : False,
            'touch_only':touch_files_only
            }
    res_queue = None

    processing_std_vgt_prods_only(res_queue, **args)
示例#13
0
def my_proc_arc2rain_dekad(pipe_run=0,
                           pipe_print=3,
                           start_date=None,
                           end_date=None,
                           upsert_db=False,
                           touch_files_only=False):

    # Create the list of dates -> returns empty if start==end==None
    if start_date is not None and end_date is not None:
        starting_dates = proc_functions.get_list_dates_for_dataset(
            'chirps-dekad',
            '10d',
            '2.0',
            start_date=start_date,
            end_date=end_date)
    else:
        starting_dates = None

    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'arc2-rain',\
            'starting_sprod':'10d',\
            'starting_dates': starting_dates,\
            'mapset': 'CHIRP-Africa-5km',\
            'version':'2.0',
            'logfile':'ruffus-chirps',
            'upsert_db': upsert_db,
            'touch_only':touch_files_only}

    request_queue = Queue()
    proc_lists = processing_std_precip_stats_only(request_queue, **args)
示例#14
0
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0,
                     input_products='', output_product='', mapset=''):


    # Dummy return arguments
    proc_lists = functions.ProcLists()
    list_subprods = proc_lists.list_subprods
    list_subprod_groups = proc_lists.list_subprod_groups

    es2_data_dir = es_constants.processing_dir+os.path.sep

    # Do some checks on the integrity of the inputs

    # Manage output_product data
    out_product_code = output_product[0].productcode
    out_sub_product_code = output_product[0].subproductcode
    out_version = output_product[0].version
    out_mapset = output_product[0].mapsetcode

    out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset)
    out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext)
    out_dir = es2_data_dir + out_subdir

    # Check the output product directory
    functions.check_output_dir(out_dir)

    # Loop over the input products:
    for input in input_products:

        # Extract info from input product
        product_code = input.productcode
        sub_product_code = input.subproductcode
        version = input.version
        start_date = input.start_date
        end_date = input.end_date
        product_info = querydb.get_product_out_info_connect(productcode=product_code,
                                                   subproductcode=sub_product_code,
                                                   version=version)
        prod_type = product_info[0].product_type

        in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset)
        in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext)

        # Create the list of dates -> returns empty if start==end==None
        list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version,
                                                               start_date=start_date, end_date=end_date)
        # If list_dates == None, look at all existing files
        if list_dates is None:
            print 'To be Done !!!'
        # Otherwise, build list of files from list of dates
        else:
            for my_date in list_dates:
                in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident
                out_file_path = out_dir+my_date+out_prod_ident

                # Create the link
                functions.create_sym_link(in_file_path, out_file_path, force=False)

    return list_subprods, list_subprod_groups
示例#15
0
    def generate_parameters_cum():

        starting_files.sort()
        # Convert from string to in (for comparison)
        dekad_start = int(start_season)
        dekad_end = int(end_season)

        # Loop over all input files
        for file_t0 in starting_files:
            # Get current date (in format '19980901')
            date_t0 = functions.get_date_from_path_full(file_t0)

            # Extract from date-string the dekad/year as integer
            dekad_t0 = int(date_t0[4:])
            year_t0 = int(date_t0[0:4])
            in_season = False

            # Check if season goes across two years -> define year1/2
            if dekad_start < dekad_end:
                if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end:
                    year_sos = year_t0
                    in_season = True
            else:
                if dekad_t0 >= dekad_start:
                    year_sos = year_t0
                    in_season = True
                if dekad_t0 <= dekad_end:
                    year_sos = year_t0 - 1
                    in_season = True

            # Detect the end of the season and trigger processing
            if in_season:

                # Define output filename
                output_file = es2_data_dir + subdir_ident_cum + date_t0 + prod_ident_cum

                # Get list of dates from start of season to end of season
                list_dates = proc_functions.get_list_dates_for_dataset(
                    prod,
                    starting_sprod,
                    version,
                    start_date=str(year_sos) + start_season,
                    end_date=date_t0)
                input_files = []
                missing_file = False
                for ldate in list_dates:
                    # Append the file to list if it exists ...
                    if os.path.isfile(input_dir + ldate + in_prod_ident):
                        input_files.append(input_dir + ldate + in_prod_ident)
                    # ... otherwise raise a warning and break
                    else:
                        logger.warning(
                            'Missing file for date {0}. Season not computed.'.
                            format(ldate))
                        missing_file = True
                        break

                if not missing_file:
                    yield (input_files, output_file)
示例#16
0
def my_proc_std_gsod(pipe_run=0,
                     pipe_print=3,
                     start_date=None,
                     end_date=None,
                     touch_files_only=False):
    start_date = '20160503'
    end_date = '20160505'
    starting_dates = proc_functions.get_list_dates_for_dataset(
        'gsod-rain', '1dmeas', '1.0', start_date=start_date, end_date=end_date)
    args = {'pipeline_run_level':pipe_run, \
            'pipeline_printout_level':pipe_print, \
            'pipeline_printout_graph_level': 0, \
            'prod': 'gsod-rain',\
            'starting_sprod':'1dmeas',\
            'mapset': 'SPOTV-SADC-1km',\
            'version':'1.0',
            'logfile':'gsod.log',
            'starting_dates':starting_dates
            }
    res_queue = None
    processing_std_gsod(res_queue, **args)
示例#17
0
def build_date_list_from_datasource(datasource_descr, product_in_info,
                                    ingest_mapset):
    dates_list = []

    start_datetime = datetime.datetime.strptime(
        str(datasource_descr.start_date), "%Y%m%d")
    if datasource_descr.end_date is None:
        end_datetime = datetime.date.today()
    else:
        end_datetime = datetime.datetime.strptime(
            str(datasource_descr.end_date), "%Y%m%d")

    all_starting_dates = proc_functions.get_list_dates_for_dataset(product_in_info.productcode, \
                                                                   product_in_info.subproductcode, \
                                                                   product_in_info.version, \
                                                                   start_date=datasource_descr.start_date,
                                                                   end_date=datasource_descr.end_date)

    my_dataset = products.Dataset(product_in_info.productcode,
                                  product_in_info.subproductcode,
                                  ingest_mapset,
                                  version=product_in_info.version,
                                  from_date=start_datetime,
                                  to_date=end_datetime)
    my_dates = my_dataset.get_dates()

    my_formatted_dates = []
    for my_date in my_dates:
        my_formatted_dates.append(my_dataset._frequency.format_date(my_date))

    my_missing_dates = []
    for curr_date in all_starting_dates:
        if curr_date not in my_formatted_dates:
            my_missing_dates.append(curr_date)

    dates_list = sorted(my_missing_dates, reverse=False)

    return dates_list
示例#18
0
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0,
                     input_products='', output_product='', mapset='', logfile=None):

    if logfile:
        spec_logger = log.my_logger(logfile)
        spec_logger.info("Entering routine %s" % 'processing_merge')

    # Dummy return arguments
    proc_lists = functions.ProcLists()
    list_subprods = proc_lists.list_subprods
    list_subprod_groups = proc_lists.list_subprod_groups

    es2_data_dir = es_constants.processing_dir+os.path.sep

    # Do some checks on the integrity of the inputs

    # Manage output_product data
    out_product_code = output_product[0].productcode
    out_sub_product_code = output_product[0].subproductcode
    out_version = output_product[0].version
    out_mapset = output_product[0].mapsetcode

    out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset)
    out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext)
    out_dir = es2_data_dir + out_subdir

    # Check the output product directory
    functions.check_output_dir(out_dir)
    # Fill the processing list -> some fields to be taken by innput products
    output_sprod_group=proc_lists.proc_add_subprod_group("merged")
    output_sprod=proc_lists.proc_add_subprod(out_sub_product_code, "merged", final=False,
                                             descriptive_name='undefined',
                                             description='undefined',
                                             frequency_id='e1dekad',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)

    # Loop over the input products:
    for input in input_products:

        # Extract info from input product
        product_code = input.productcode
        sub_product_code = input.subproductcode
        version = input.version
        start_date = input.start_date
        end_date = input.end_date
        product_info = querydb.get_product_out_info_connect(productcode=product_code,
                                                   subproductcode=sub_product_code,
                                                   version=version)
        prod_type = product_info[0].product_type

        in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset)
        in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext)

        # Create the list of dates -> returns empty if start==end==None
        list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version,
                                                               start_date=start_date, end_date=end_date)
        # If list_dates == None, look at all existing files
        if list_dates is None:
            print ('To be Done !!!')
        # Otherwise, build list of files from list of dates
        else:
            for my_date in list_dates:
                in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident
                out_file_path = out_dir+my_date+out_prod_ident

                # Create the link
                status = functions.create_sym_link(in_file_path, out_file_path, force=False)
                if status == 0 and logfile:
                    spec_logger.info("Merged file %s created" % out_file_path)

    return list_subprods, list_subprod_groups
示例#19
0
def loop_processing(dry_run=False):

#    Driver of the process service
#    Reads configuration from the database
#    Creates the pipelines for the active processing
#    Calls the active pipelines with the relevant argument
#    Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run()
#                       -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run()

    # Clean dir with locks
    if os.path.isdir(es_constants.processing_tasks_dir):
        shutil.rmtree(es_constants.processing_tasks_dir)
    logger.info("Entering routine %s" % 'loop_processing')
    echo_query = False
    functions.check_output_dir(es_constants.processing_tasks_dir)
    while True :

        logger.debug("Entering infinite loop")
        # Get all active processing chains from the database.
        active_processing_chains = querydb.get_active_processing_chains()

        # Manage dry_run
        if dry_run:
            pipeline_run_level = 0
            pipeline_printout_level = 3
        else:
            pipeline_run_level = 3
            pipeline_printout_level = 0

        for chain in active_processing_chains:

            logger.debug("Processing Chain N.:%s" % str(chain.process_id))

            derivation_method = chain.derivation_method             # name of the method in the module
            algorithm = chain.algorithm                             # name of the .py module
            mapset = chain.output_mapsetcode
            process_id = chain.process_id

            # Get input products
            input_products = querydb.get_processing_chain_products(chain.process_id,type='input')

            # Case of a 'std_' (i.e. ruffus) processing -> get all info from 1st INPUT and manage dates
            if re.search('^std_.*',algorithm):

                product_code = input_products[0].productcode
                sub_product_code = input_products[0].subproductcode
                version = input_products[0].version
                start_date = input_products[0].start_date
                end_date = input_products[0].end_date

                # Manage the dates
                list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date)

                # Prepare arguments
                args = {'pipeline_run_level':pipeline_run_level, \
                        'pipeline_printout_level':pipeline_printout_level,\
                        'starting_sprod': sub_product_code, \
                        'prod': product_code, \
                        'mapset':mapset,\
                        'starting_dates': list_dates,\
                        'version':version}

            # Case of no 'std' (i.e. ruffus processing) -> get output products and pass everything to function
            else:
                output_products = querydb.get_processing_chain_products(chain.process_id,type='output')
                # Prepare arguments
                args = {'pipeline_run_level':pipeline_run_level, \
                        'pipeline_printout_level':pipeline_printout_level,\
                        'input_products': input_products, \
                        'output_product': output_products}

            # Define an id from a combination of fields
            processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock'
            processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id

            if not os.path.isfile(processing_unique_lock):
                logger.debug("Launching processing for ID: %s" % processing_unique_id)
                open(processing_unique_lock,'a').close()

                # Define the module name and function()
                module_name = 'processing_'+algorithm
                function_name = 'processing_'+derivation_method
                # Enter the module and walk until to the name of the function() to be executed
                proc_dir = __import__("apps.processing")
                proc_pck = getattr(proc_dir, "processing")
                proc_mod = getattr(proc_pck, module_name)
                proc_func= getattr(proc_mod, function_name)

                # fork and call the std_precip 'generic' processing
                pid = os.fork()
                if pid == 0:
                    # Call to the processing pipeline
                    [list_subprods, list_subprod_groups] = proc_func(**args)
                    # Simulate longer processing (TEMP)
                    logger.info("Going to sleep for a while - to be removed")
                    time.sleep(50)
                    os.remove(processing_unique_lock)
                    sys.exit(0)
                else:
                    # Qui sono il padre
                    pass
                    #os.wait()
            else:
                logger.debug("Processing already running for ID: %s " % processing_unique_id)

        logger.info("End of the loop ... wait a while")
        time.sleep(5)
示例#20
0
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    starting_dates_stats=None,
                    update_stats=False,
                    nrt_products=True):
    #   ---------------------------------------------------------------------
    #   Create lists to store definition of the derived products, and their
    #   groups.
    #   Two starting dates ranges are passed:
    #
    #       starting_dates: range - 1d frequency - for 1day -> 10dcount
    #                       Normally not used: only for tests (the number of 1day files i large!)
    #
    #       starting_dates_stats: range - 10d frequency - for 10dcount -> 10dcountmin/max/avg
    #                             Used to define a specific range for stats, normally 20030101 -> <prev-year>1221
    #
    #   For the 10d products anomalies (both 1km and 10km) ALL available files are used for anomaly computation
    #
    #   ---------------------------------------------------------------------

    if proc_lists is None:
        proc_lists = functions.ProcLists()

    #   ---------------------------------------------------------------------
    #   Define and assign the flags to control the individual derived products
    #   and the groups. NOT to be changed by the User
    #   ---------------------------------------------------------------------

    # Set DEFAULTS: all off
    activate_10dcount_comput = 0  # 2.a - 10d count
    activate_10dstats_comput = 0  # 2.b - 10d stats
    activate_10danomalies_comput = 0  # 2.c - 10d anomalies
    activate_10d_10k_comput = 0  # 3.a - 10d on 10km cells
    activate_10d_10k_stats_comput = 0  # 3.b - 10d on 10km statistics
    activate_10d_10k_anom_comput = 0  # 3.c - 10d on 10km anomalies

    #   switch wrt groups - according to options
    if nrt_products:
        activate_10dcount_comput = 1  # 10d count
        activate_10danomalies_comput = 1  # 10d anomalies
        activate_10d_10k_comput = 1  # 10d on 10k
        activate_10d_10k_anom_comput = 1  # 10d on 10km anomalies

    if update_stats:
        activate_10dstats_comput = 1  # 10d stats
        activate_10d_10k_stats_comput = 1  # 10d on 10km statistics

    #   Switch wrt single products: not to be changed !!

    # 2.b -> 10d stats
    activate_10dcountavg_comput = 1
    activate_10dcountmin_comput = 1
    activate_10dcountmax_comput = 1

    # 2.c -> 10d anomalies
    activate_10ddiff_comput = 1

    # 3.a -> 10d on 10 km
    activate_10dcount10k_comput = 1

    # 3.b -> 10d on 10 km stats
    activate_10dcount10kavg_comput = 1
    activate_10dcount10kmin_comput = 1
    activate_10dcount10kmax_comput = 1

    # 3.c -> 10d on 10 km anomalies
    activate_10dcount10kdiff_comput = 1
    activate_10dcount10kperc_comput = 1
    activate_10dcount10kratio_comput = 1

    #   ---------------------------------------------------------------------
    #   Define the 'grid' file for the 10k count conversion
    #   If it does not exists, disable computation
    #   ---------------------------------------------------------------------

    grid_mapset_name = 'SPOTV-Africa-1km'
    # grid_file='/eStation2/layers/Mask_Africa_SPOTV_10km.tif'
    grid_file = es_constants.es2globals[
        'estation2_layers_dir'] + os.path.sep + 'Mask_Africa_SPOTV_10km.tif'

    if not os.path.isfile(grid_file):
        activate_10d_10k_comput = 0  # 10d on 10km
        activate_10d_10k_anom_comput = 0  # 10d on 10km anomalies
        activate_10d_10k_stats_comput = 0  # 10d on 10km statistics

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files from the starting_sprod and starting_dates arguments
    #   ---------------------------------------------------------------------

    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    # logger.debug('Base data directory is: %s' % es2_data_dir)
    input_dir = es2_data_dir + \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    # starting_dates -> 1 day
    if starting_dates is not None:
        starting_files_1day = []
        for my_date in starting_dates:
            starting_files_1day.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files_1day = glob.glob(input_dir + "*" + in_prod_ident)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount
    #   ---------------------------------------------------------------------

    output_sprod_group = proc_lists.proc_add_subprod_group("10dcount")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount",
        "10dcount",
        final=False,
        descriptive_name='10d Count',
        description='Fire Count for dekad',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident_10dcount = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir_10dcount = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, mapset)

    def generate_parameters_10dcount():

        #   Look for all input files in input_dir, and sort them
        input_files = starting_files_1day
        dekad_list = []

        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        for dekad in dekad_list:
            # Exclude the current dekad
            if dekad != dekad_now:
                file_list = []
                my_dekad_str = functions.conv_dekad_2_date(dekad)
                for input_file in input_files:

                    basename = os.path.basename(input_file)
                    mydate_yyyymmdd = functions.get_date_from_path_filename(
                        basename)
                    mydekad_nbr = functions.conv_date_2_dekad(
                        mydate_yyyymmdd[0:8])
                    if mydekad_nbr == dekad:
                        file_list.append(input_file)

                    output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount

                yield (file_list, output_file)

    @active_if(activate_10dcount_comput)
    @files(generate_parameters_10dcount)
    def std_fire_10dcount(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_cumulate(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcountavg
    #   ---------------------------------------------------------------------

    if starting_dates_stats is not None:
        files_10dcount_4stats = []
        for my_date in starting_dates_stats:
            files_10dcount_4stats.append(es2_data_dir +
                                         output_subdir_10dcount + my_date +
                                         out_prod_ident_10dcount)
    else:
        files_10dcount_4stats = es2_data_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount

    output_sprod_group = proc_lists.proc_add_subprod_group("10dstats")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcountavg",
        "10dstats",
        final=False,
        descriptive_name='10d Fire Average',
        description='Average fire for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dcountavg_comput)
    @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out)
    @follows(std_fire_10dcount)
    def std_fire_10dcountavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Float32',
            'input_nodata': -32768
        }
        # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':0}
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcountmin
    #   ---------------------------------------------------------------------

    output_sprod = proc_lists.proc_add_subprod(
        "10dcountmin",
        "10dstats",
        final=False,
        descriptive_name='10d Fire Minimum',
        description='Minimum Fire for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dcountmin_comput)
    @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out)
    @follows(std_fire_10dcountavg)
    def std_fire_10dcountmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        # The coded value (nodata=0) leads to the wrong result
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcountmax
    #   ---------------------------------------------------------------------
    output_sprod = proc_lists.proc_add_subprod(
        "10dcountmax",
        "10dstats",
        final=False,
        descriptive_name='10d Maximum',
        description='Maximum rainfall for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dcountmax_comput)
    @collate(files_10dcount_4stats, formatter(formatter_in), formatter_out)
    @follows(std_fire_10dcountmin)
    def std_fire_10dcountmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dDiff
    #   ---------------------------------------------------------------------

    #   Define the input files for conversion to 10k on the basis of the 'starting_dates' (not 'starting_dates_stats')
    if starting_dates is not None:
        files_10dcount_4anom = []
        use_dates_10dcount = proc_functions.get_list_dates_for_dataset(
            prod,
            '10dcount',
            version,
            start_date=starting_dates[0],
            end_date=starting_dates[-1])

        for my_date in use_dates_10dcount:
            files_10dcount_4anom.append(es2_data_dir + output_subdir_10dcount +
                                        my_date + out_prod_ident_10dcount)
    else:
        files_10dcount_4anom = glob.glob(es2_data_dir +
                                         output_subdir_10dcount + "*" +
                                         out_prod_ident_10dcount)

    output_sprod_group = proc_lists.proc_add_subprod_group("10danomalies")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcountdiff",
        "10danomalies",
        final=False,
        descriptive_name='10d Absolute Difference',
        description='10d Absolute Difference vs. LTA',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod = "10dcountavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident

    # @follows(std_fire_10dcountavg)
    @active_if(activate_10danomalies_comput, activate_10ddiff_comput)
    @transform(files_10dcount_4anom, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    @follows(std_fire_10dcountmax)
    def std_fire_10dcountdiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768}
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Float32',
            'input_nodata': -32768,
            'output_nodata': -32768
        }
        raster_image_math.do_oper_subtraction(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount10km
    #   ---------------------------------------------------------------------
    #
    target_mapset_name = 'SPOTV-Africa-10km'

    output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10k")
    output_sprod_10dcount10k = proc_lists.proc_add_subprod(
        "10dcount10k",
        "10dcount10k",
        final=False,
        descriptive_name='10d Gridded at 10 km',
        description='10d Count Gridded at 10 km',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident_10dcount10k = functions.set_path_filename_no_date(
        prod, output_sprod_10dcount10k, target_mapset_name, version, ext)
    output_subdir_10dcount10k = functions.set_path_sub_directory(
        prod, output_sprod_10dcount10k, 'Derived', version, target_mapset_name)

    #   Starting files + avg
    formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + out_prod_ident_10dcount
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir_10dcount10k + "{YYYYMMDD[0]}" + out_prod_ident_10dcount10k

    @active_if(activate_10d_10k_comput, activate_10dcount10k_comput)
    @transform(files_10dcount_4anom, formatter(formatter_in), formatter_out)
    @follows(std_fire_10dcountdiff)
    def std_fire_10dcount10k(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        tmpdir = tempfile.mkdtemp(prefix=__name__,
                                  suffix='_' + os.path.basename(output_file),
                                  dir=es_constants.base_tmp_dir)

        # Temporary (not masked) file
        output_file_temp = tmpdir + os.path.sep + os.path.basename(output_file)
        input_mapset_name = mapset

        operation = 'sum'
        args = {
            "input_file": input_file,
            "grid_file": grid_file,
            "output_file": output_file_temp,
            "operation": operation,
            "input_mapset_name": input_mapset_name,
            "grid_mapset_name": grid_mapset_name,
            "output_format": None,
            'nodata': -32768,
            "options": "compress=lzw",
            "output_type": 'Int16'
        }

        raster_image_math.do_stats_4_raster(**args)

        args = {
            "inputfile": output_file_temp,
            "output_file": output_file,
            "native_mapset_name": grid_mapset_name,
            "target_mapset_name": target_mapset_name
        }

        raster_image_math.do_reproject(**args)

        shutil.rmtree(tmpdir)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount10kavg
    #   ---------------------------------------------------------------------

    if starting_dates_stats is not None:
        files_10dcount10k_4stats = []
        for my_date in starting_dates_stats:
            files_10dcount10k_4stats.append(es2_data_dir +
                                            output_subdir_10dcount10k +
                                            my_date +
                                            out_prod_ident_10dcount10k)
    else:
        files_10dcount10k_4stats = es2_data_dir + output_subdir_10dcount10k + "*" + out_prod_ident_10dcount10k

    output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount10kavg",
        "10dcount10kstats",
        final=False,
        descriptive_name='10d Fire count 10km Average',
        description='10d Fire count 10km Average',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset_name, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     target_mapset_name)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10d_10k_stats_comput, activate_10dcount10kavg_comput)
    @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out)
    @follows(std_fire_10dcount10k)
    def std_fire_10dcount10kavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Float32',
            'input_nodata': -32768
        }
        # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Float32', 'input_nodata': 0}
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount10kmin
    #   ---------------------------------------------------------------------

    output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount10kmin",
        "10dcount10kstats",
        final=False,
        descriptive_name='10d Fire count 10km minimum',
        description='10d Fire count 10km minimum',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset_name, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     target_mapset_name)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10d_10k_stats_comput, activate_10dcount10kmin_comput)
    @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out)
    @follows(std_fire_10dcount10kavg)
    def std_fire_10dcount10kmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Int16',
            'input_nodata': -32768
        }
        # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': 0}
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount10kmax
    #   ---------------------------------------------------------------------

    output_sprod_group = proc_lists.proc_add_subprod_group("10dcount10kstats")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount10kmax",
        "10dcount10kstats",
        final=False,
        descriptive_name='10d Fire count 10km maximum',
        description='10d Fire count 10km maximum',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset_name, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     target_mapset_name)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident

    @active_if(activate_10d_10k_stats_comput, activate_10dcount10kmax_comput)
    @collate(files_10dcount10k_4stats, formatter(formatter_in), formatter_out)
    @follows(std_fire_10dcount10kmin)
    def std_fire_10dcount10kmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Int16',
            'input_nodata': -32768
        }
        # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type': 'Int16', 'input_nodata': 0}
        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount10kdiff
    #   ---------------------------------------------------------------------

    #   Define the input files for conversion to 10k on the basis of the 'starting_dates' (not 'starting_dates_stats')
    if starting_dates is not None:
        files_10dcount10k_4anom = []
        use_dates_10dcount10k = proc_functions.get_list_dates_for_dataset(
            prod,
            '10dcount10k',
            version,
            start_date=starting_dates[0],
            end_date=starting_dates[-1])

        for my_date in use_dates_10dcount10k:
            files_10dcount10k_4anom.append(es2_data_dir +
                                           output_subdir_10dcount10k +
                                           my_date +
                                           out_prod_ident_10dcount10k)
    else:
        files_10dcount10k_4anom = glob.glob(es2_data_dir +
                                            output_subdir_10dcount10k + "*" +
                                            out_prod_ident_10dcount10k)

    output_sprod_group = proc_lists.proc_add_subprod_group(
        "10dcount10kanomalies")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount10kdiff",
        "10dcount10kanomalies",
        final=False,
        descriptive_name='10d 10 km Absolute Difference',
        description='10d 10 km Absolute Difference vs. LTA',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset_name, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     target_mapset_name)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod = "10dcount10kavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, target_mapset_name, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, target_mapset_name)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident

    @active_if(activate_10d_10k_anom_comput, activate_10dcount10kdiff_comput)
    @transform(files_10dcount10k_4anom, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    @follows(std_fire_10dcount10kmax)
    def std_fire_10dcount10kdiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Float32',
            'input_nodata': -32768,
            'output_nodata': -32768
        }
        # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768}
        raster_image_math.do_oper_subtraction(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount10kperc
    #   ---------------------------------------------------------------------

    output_sprod_group = proc_lists.proc_add_subprod_group(
        "10dcount10kanomalies")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount10kperc",
        "10dcount10kanomalies",
        final=False,
        descriptive_name='10d 10 km Percent Difference',
        description='10d 10 km Percent Difference vs. LTA',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset_name, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     target_mapset_name)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod = "10dcount10kavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, target_mapset_name, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, target_mapset_name)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident

    @active_if(activate_10d_10k_anom_comput, activate_10dcount10kperc_comput)
    @transform(files_10dcount10k_4anom, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    @follows(std_fire_10dcount10kdiff)
    def std_fire_10dcount10kperc(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        # args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768}
        args = {
            "input_file": input_file[0],
            "avg_file": input_file[1],
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Float32',
            'input_nodata': -32768,
            'output_nodata': -32768
        }
        raster_image_math.do_compute_perc_diff_vs_avg(**args)

    #   ---------------------------------------------------------------------
    #   Derived product: 10dcount10kratio
    #   ---------------------------------------------------------------------

    output_sprod_group = proc_lists.proc_add_subprod_group(
        "10dcount10kanomalies")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount10kratio",
        "10dcount10kanomalies",
        final=False,
        descriptive_name='10d 10 km Ratio with AVG',
        description='10d 10 km Ratio with LTA AVG',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset_name, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     target_mapset_name)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount10k
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod = "10dcount10kavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, target_mapset_name, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, target_mapset_name)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident

    @active_if(activate_10d_10k_anom_comput, activate_10dcount10kratio_comput)
    @transform(files_10dcount10k_4anom, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    @follows(std_fire_10dcount10kperc)
    def std_fire_10dcount10kratio(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        # args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw", 'output_type':'Float32', 'input_nodata':-32768}
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Float32',
            'input_nodata': -32768,
            'output_nodata': -32768
        }
        raster_image_math.do_oper_division_perc(**args)

    #
    # End of pipeline definition
    return proc_lists
示例#21
0
def loop_processing(dry_run=False, serialize=False):

#    Driver of the process service
#    Reads configuration from the database
#    Creates the pipelines for the active processing
#    Calls the active pipelines with the relevant argument
#    Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run()
#                       -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run()
#               serialize -> False (default): detach the process and work in parallel
#                         -> True: do NOT detach processes and work in series (mainly for debugging)

    # Clean dir with locks
    if os.path.isdir(es_constants.processing_tasks_dir):
        shutil.rmtree(es_constants.processing_tasks_dir)
    logger.info("Entering routine %s" % 'loop_processing')
    echo_query = False
    functions.check_output_dir(es_constants.processing_tasks_dir)
    while True:

        logger.debug("Entering infinite loop")
        # Get all active processing chains from the database.
        active_processing_chains = querydb.get_active_processing_chains()

        # Manage dry_run
        if dry_run:
            pipeline_run_level = 0
            pipeline_printout_level = 3
        else:
            pipeline_run_level = 3
            pipeline_printout_level = 0

        for chain in active_processing_chains:

            logger.debug("Processing Chain N.:%s" % str(chain.process_id))

            derivation_method = chain.derivation_method             # name of the method in the module
            algorithm = chain.algorithm                             # name of the .py module
            mapset = chain.output_mapsetcode
            process_id = chain.process_id

            # Get input products
            input_products = querydb.get_processing_chain_products(chain.process_id,type='input')
            product_code = input_products[0].productcode
            sub_product_code = input_products[0].subproductcode
            version = input_products[0].version

            # Get product metadata for output products (from first input)
            input_product_info = querydb.get_product_out_info(productcode=product_code,
                                                              subproductcode=sub_product_code,
                                                              version=version)

            # Case of a 'std_' (i.e. ruffus with 1 input) processing -> get all info from 1st INPUT and manage dates
            if re.search('^std_.*',algorithm):

                start_date = input_products[0].start_date
                end_date = input_products[0].end_date

                # Manage the dates
                list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date)

                # Prepare arguments
                args = {'pipeline_run_level':pipeline_run_level, \
                        'pipeline_printout_level':pipeline_printout_level,\
                        'starting_sprod': sub_product_code, \
                        'prod': product_code, \
                        'mapset':mapset,\
                        'starting_dates': list_dates,\
                        'version':version}

            # Case of no 'std' (e.g. merge processing) -> get output products and pass everything to function
            else:
                output_products = querydb.get_processing_chain_products(chain.process_id,type='output')
                # Prepare arguments
                args = {'pipeline_run_level':pipeline_run_level, \
                        'pipeline_printout_level':pipeline_printout_level,\
                        'input_products': input_products, \
                        'output_product': output_products}

            # Define an id from a combination of fields
            processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock'
            processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id

            if not os.path.isfile(processing_unique_lock):
                logger.debug("Launching processing for ID: %s" % processing_unique_id)
                open(processing_unique_lock,'a').close()

                # Define the module name and function()
                module_name = 'processing_'+algorithm
                function_name = 'processing_'+derivation_method
                # Enter the module and walk until to the name of the function() to be executed
                proc_dir = __import__("apps.processing")
                proc_pck = getattr(proc_dir, "processing")
                proc_mod = getattr(proc_pck, module_name)
                proc_func= getattr(proc_mod, function_name)

                #  Fork and call the std_precip 'generic' processing
                if serialize==False:
                    pid = os.fork()
                    if pid == 0:
                        # Here I'm the child process ->  call to the processing pipeline
                        proc_lists = proc_func(**args)
                        # Upsert database
                        upsert_database(process_id, product_code, version, mapset, proc_lists, input_product_info)
                        # Simulate longer processing (TEMP)
                        logger.info("Going to sleep for a while - to be removed")
                        time.sleep(2)
                        logger.info("Waking-up now, and removing the .lock")
                        os.remove(processing_unique_lock)
                        sys.exit(0)
                    else:
                        # Here I'm the parent process -> just go on ..
                        pass
                # Do NOT detach process (work in series)
                else:
                    proc_lists = proc_func(**args)
                    logger.info("Going to sleep for a while - to be removed")
                    # Upsert database
                    upsert_database(process_id, product_code, version, mapset, proc_lists, input_product_info)
                    time.sleep(2)
                    logger.info("Waking-up now, and removing the .lock")
                    os.remove(processing_unique_lock)
            else:
                logger.debug("Processing already running for ID: %s " % processing_unique_id)
        #
        logger.info("End of the loop ... wait a while")
        time.sleep(5)
示例#22
0
def processing_merge(pipeline_run_level=0,
                     pipeline_printout_level=0,
                     input_products='',
                     output_product='',
                     mapset=''):

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    # Do some checks on the integrity of the inputs

    # Manage output_product data
    out_product_code = output_product[0].productcode
    out_sub_product_code = output_product[0].subproductcode
    out_version = output_product[0].version
    out_mapset = output_product[0].mapsetcode

    out_subdir = functions.set_path_sub_directory(out_product_code,
                                                  out_sub_product_code,
                                                  'Ingest', out_version,
                                                  out_mapset)
    out_prod_ident = functions.set_path_filename_no_date(
        out_product_code, out_sub_product_code, out_mapset, out_version, ext)
    out_dir = es2_data_dir + out_subdir

    # Check the output product directory
    functions.check_output_dir(out_dir)

    # Loop over the input products:
    for input in input_products:

        # Extract info from input product
        product_code = input.productcode
        sub_product_code = input.subproductcode
        version = input.version
        start_date = input.start_date
        end_date = input.end_date
        product_info = querydb.get_product_out_info(
            productcode=product_code,
            subproductcode=sub_product_code,
            version=version)
        prod_type = product_info[0].product_type

        in_subdir = functions.set_path_sub_directory(product_code,
                                                     sub_product_code,
                                                     prod_type, version,
                                                     out_mapset)
        in_prod_ident = functions.set_path_filename_no_date(
            out_product_code, out_sub_product_code, out_mapset, version, ext)

        # Create the list of dates -> returns empty if start==end==None
        list_dates = proc_functions.get_list_dates_for_dataset(
            product_code,
            sub_product_code,
            version,
            start_date=start_date,
            end_date=end_date)
        # If list_dates == None, look at all existing files
        if list_dates is None:
            print 'To be Done !!!'
        # Otherwise, build list of files from list of dates
        else:
            for my_date in list_dates:
                in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident
                out_file_path = out_dir + my_date + out_prod_ident

                # Create the link
                print in_file_path
                print out_file_path
                functions.create_sym_link(in_file_path,
                                          out_file_path,
                                          force=False)
示例#23
0
def loop_processing(dry_run=False, serialize=False, test_one_product=None):

#    Driver of the process service
#    Reads configuration from the database
#    Creates the pipelines for the active processing
#    Calls the active pipelines with the relevant argument
#    Arguments: dry_run -> if > 0, it triggers pipeline_printout() rather than pipeline_run()
#                       -> if < 0, it triggers pipeline_printout_graph() rather than pipeline_run()
#               serialize -> False (default): detach the process and work in parallel
#                         -> True: do NOT detach processes and work in series (mainly for debugging)

    # Clean dir with locks at restart
    if os.path.isdir(es_constants.processing_tasks_dir):
        shutil.rmtree(es_constants.processing_tasks_dir)

    logger.info("Entering routine %s" % 'loop_processing')
    functions.check_output_dir(es_constants.processing_tasks_dir)

    # Read sleep time (used by each processing chain)
    sleep_time=es_constants.processing_sleep_time_sec

    while True:

        logger.debug("Entering infinite loop")
        # Get all active processing chains from the database.
        active_processing_chains = querydb.get_active_processing_chains()

        # Manage dry_run
        if dry_run:
            pipeline_run_level = 0
            pipeline_printout_level = 3
        else:
            pipeline_run_level = 3
            pipeline_printout_level = 0

        logger.debug("Pipeline run level: %i" % pipeline_run_level)
        logger.debug("Pipeline printout level: %i" % pipeline_printout_level)

        for chain in active_processing_chains:

            derivation_method = chain.derivation_method             # name of the method in the module
            algorithm = chain.algorithm                             # name of the .py module
            mapset = chain.output_mapsetcode
            process_id = chain.process_id

            do_processing_singleproduct = False
            if test_one_product:
                if process_id != test_one_product:
                    do_processing_singleproduct = True

            if do_processing_singleproduct:
                continue

            # Get input products
            input_products = querydb.get_processing_chain_products(chain.process_id,type='input')
            product_code = input_products[0].productcode
            sub_product_code = input_products[0].subproductcode
            version = input_products[0].version
            native_mapset=input_products[0].mapsetcode

            logger.info("Algorithm %s applied to [%s]/[%s]" % (str(algorithm), str(product_code),str(sub_product_code)))

            # Get product metadata for output products (from first input)
            input_product_info = querydb.get_product_out_info(productcode=product_code,
                                                              subproductcode=sub_product_code,
                                                              version=version)

            # Define a standard logfile associated to the processing chain
            processing_unique_id='ID='+str(process_id)+'_PROD='+product_code+'_METHOD='+derivation_method+'_ALGO='+algorithm
            logfile='apps.processing.'+processing_unique_id

            # Case of a 'std_' processing (i.e. ruffus with 1 input) -> get all info from 1st INPUT and manage dates
            if re.search('^std_.*',algorithm):
                logger.debug("Processing Chain is standard type")

                # Define dates interval from input product
                start_date = input_products[0].start_date
                end_date = input_products[0].end_date

                # Manage the dates
                list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version, start_date=start_date, end_date=end_date)

                # Prepare arguments
                args = {'pipeline_run_level':pipeline_run_level, \
                        'pipeline_printout_level':pipeline_printout_level,\
                        'starting_sprod': sub_product_code, \
                        'prod': product_code, \
                        'mapset':mapset,\
                        'starting_dates': list_dates,\
                        'version':version,
                        'logfile':logfile}
                        # 'native_mapset':native_mapset}

                logger.debug('RL:{pipeline_run_level}; PL:{pipeline_printout_level},prod:{prod}, sprod:{starting_sprod},mapset:{mapset},\
                            dates:{starting_dates},version:{version}'.format(**args))

                # Define an id from a combination of fields
                processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id+'.lock'

                # Check the processing chain is not locked
                if not os.path.isfile(processing_unique_lock):

                    # # Perform sanity check on the output files
                    # processing_base_directory = es_constants.es2globals['processing_dir']+\
                    #                             os.path.sep+product_code+\
                    #                             os.path.sep+version+\
                    #                             os.path.sep+mapset+os.path.sep+'derived'
                    #
                    # proc_functions.clean_corrupted_files(processing_base_directory, dry_run=True)

                    open(processing_unique_lock,'a').close()
                    logger.debug("Unique lock created: % s" % processing_unique_id)
                    # Define the module name and function()
                    module_name = 'processing_'+algorithm
                    function_name = 'processing_'+derivation_method
                    # Enter the module and walk until to the name of the function() to be executed
                    proc_dir = __import__("apps.processing")
                    try:
                        proc_pck = getattr(proc_dir, "processing")
                    except:
                        logger.error("Error in loading module apps.processing.processing")
                        return
                    try:
                        proc_mod = getattr(proc_pck, module_name)
                    except:
                        logger.error("Error in loading module [%s]" % module_name)
                        return
                    try:
                        proc_func= getattr(proc_mod, function_name)
                    except:
                        logger.error("Error in loading algoritm [%s] for module [%s]" % (function_name,module_name))
                        return

                    #  Check serialize option
                    if serialize==False:

                        # Call to the processing pipeline
                        logger.debug("Launching the pipeline")

                        #proc_lists = proc_func(**args)
                        results_queue = Queue()
                        p = Process(target=proc_func, args=(results_queue,), kwargs=args)
                        #p.daemon = True
                        logger.debug("Before starting the process .. %i", p.is_alive())

                        p.start()
                        logger.debug("After start  .. %i", p.is_alive())
                        #proc_lists=results_queue.get()
                        p.join()
                        logger.debug("After join  .. %i", p.is_alive())
                        # Sleep time to be read from processing
                        time.sleep(float(sleep_time))
                        logger.debug("Execution finished - remove lock")
                        try:
                            os.remove(processing_unique_lock)
                        except:
                            logger.warning("Lock not removed: %s" % processing_unique_lock)

                    # Do NOT detach process (work in series)
                    else:
                        logger.info("Work in series - do not detach process")
                        results_queue = Queue()
                        proc_lists = proc_func(results_queue, **args)
                        os.remove(processing_unique_lock)
                        time.sleep(float(sleep_time))
                else:
                    logger.debug("Lock already exist: %s" % processing_unique_id)


            # Case of no 'std' (e.g. merge processing - or more than 1 input) -> get output products and pass everything to function
            else:
                output_products = querydb.get_processing_chain_products(chain.process_id,type='output')
                # Prepare arguments
                args = {'pipeline_run_level':pipeline_run_level,
                        'pipeline_printout_level':pipeline_printout_level,
                        'input_products': input_products,
                        'output_product': output_products,
                        'logfile': logfile}

                # Define an id from a combination of fields
                processing_unique_id='ID='+str(process_id)+'_METHOD='+derivation_method+'_ALGO='+algorithm+'.lock'
                processing_unique_lock=es_constants.processing_tasks_dir+processing_unique_id

                if not os.path.isfile(processing_unique_lock):
                    logger.debug("Launching processing for ID: %s" % processing_unique_id)
                    open(processing_unique_lock,'a').close()

                    # Define the module name and function()
                    module_name = 'processing_'+algorithm
                    function_name = 'processing_'+derivation_method
                    # Enter the module and walk until to the name of the function() to be executed
                    proc_dir = __import__("apps.processing")
                    proc_pck = getattr(proc_dir, "processing")
                    proc_mod = getattr(proc_pck, module_name)
                    proc_func= getattr(proc_mod, function_name)

                    if re.search('.*merge.*',algorithm):
                        logger.debug("Processing Chain is merge type")
                        # Do NOT detach process (work in series)
                        proc_lists = proc_func(**args)

                        time.sleep(float(sleep_time))
                        logger.info("Waking-up now, and removing the .lock")
                        os.remove(processing_unique_lock)
                    else:
                        logger.info("Processing Chain is more-inputs type (e.g. modis-pp)")

                        # We have to 'detach' the process for avoiding ruffus exception 'error_duplicate_task_name'
                        results_queue = Queue()
                        p = Process(target=proc_func, args=(results_queue,), kwargs=args)
                        p.start()
                        p.join()

                        # Sleep time to be read from processing
                        time.sleep(float(sleep_time))
                        logger.debug("Execution finished - remove lock")
                        try:
                            os.remove(processing_unique_lock)
                        except:
                            logger.warning("Lock not removed: %s" % processing_unique_lock)

                else:
                    logger.debug("Processing already running for ID: %s " % processing_unique_id)

        logger.info("End of the loop ... wait a while")
        time.sleep(1)