Exemplo n.º 1
0
    def build_task(self, context, task):
        '''
        Build up a set of inputs for a single context
        '''

        LOG.debug("Running build_task()")

        # Instantiate the hirs_ctp_daily computation
        hirs_ctp_daily_comp = hirs_ctp_daily.HIRS_CTP_DAILY()

        num_days = monthrange(context['granule'].year,
                              context['granule'].month)[1]
        interval = TimeInterval(context['granule'],
                                context['granule'] + timedelta(num_days),
                                False, True)

        daily_contexts = hirs_ctp_daily_comp.find_contexts(
            interval, context['satellite'], context['hirs2nc_delivery_id'],
            context['hirs_avhrr_delivery_id'],
            context['hirs_csrb_daily_delivery_id'],
            context['hirs_csrb_monthly_delivery_id'],
            context['hirs_ctp_orbital_delivery_id'],
            context['hirs_ctp_daily_delivery_id'])

        if len(daily_contexts) == 0:
            raise WorkflowNotReady(
                'No HIRS_CTP_DAILY inputs available for {}'.format(
                    context['granule']))

        for (idx, daily_context) in enumerate(daily_contexts):
            hirs_ctp_daily_prod = hirs_ctp_daily_comp.dataset('out').product(
                daily_context)
            if SPC.exists(hirs_ctp_daily_prod):
                task.input('CTPD-{}'.format(idx), hirs_ctp_daily_prod, True)
Exemplo n.º 2
0
    def file(self, sensor, sat, file_type, begin_time):

        file_list = self.files(sensor, sat, file_type,
                               TimeInterval(begin_time, begin_time))
        LOG.debug("file_list: {}".format(file_list))

        # Making sure we have the right begin time as some inputs overlap
        for (i, file) in enumerate(file_list):
            if file.data_interval.left == begin_time:
                return file_list[i]

        raise WorkflowNotReady('No files for {} {} {} {}'.format(
            sensor, sat, file_type, begin_time))
Exemplo n.º 3
0
    def hirs_to_time_interval(self, filename):
        '''
        Takes the HIRS filename as input and returns the 1-day time interval
        covering that file.
        '''

        file_chunks = filename.split('.')
        begin_time = datetime.strptime('.'.join(file_chunks[3:5]),
                                       'D%y%j.S%H%M')
        end_time = datetime.strptime(
            '.'.join([file_chunks[3], file_chunks[5]]), 'D%y%j.E%H%M')

        if end_time < begin_time:
            end_time += timedelta(days=1)

        return TimeInterval(begin_time, end_time)
Exemplo n.º 4
0
    def process_metadata(self, file_type):
        '''
        Run through the *.out file and create a nested dictionary containing the
        the required metadata for the files of each file_type.
        '''
        LOG.debug("Reading the metadata file...")

        metadata_file = self.input_data[file_type]
        LOG.debug("For file_type = '{}', metadata_file = {}".format(
            file_type, metadata_file))

        # Open the metadata file and read each line, splitting into the required
        # metadata for each file.
        with open(metadata_file) as metadata:
            for line in metadata:
                (size, mod_time, begin_time, end_time, sensor, sat, file_type,
                 file_system, relative_path) = line.split(',')
                begin_time = datetime.fromtimestamp(float(begin_time))
                end_time = datetime.fromtimestamp(float(end_time))
                relative_path = relative_path.split()[0]
                name = os.path.basename(relative_path)

                if sensor not in self.file_data:
                    self.file_data[sensor] = {}

                if sat not in self.file_data[sensor]:
                    self.file_data[sensor][sat] = {}

                if file_type not in self.file_data[sensor][sat]:
                    self.file_data[sensor][sat][file_type] = {}

                if name not in self.file_data[sensor][sat][file_type]:

                    if end_time < begin_time:
                        LOG.warning(
                            "{}: End time {} is before begin time {}, adding a day."
                            .format(name, end_time, begin_time))
                        end_time += timedelta(days=1)

                    self.file_data[sensor][sat][file_type][name] = {
                        'data_interval': TimeInterval(begin_time, end_time),
                        'name': name,
                        'path': relative_path
                    }

        self.indexed_file_types[file_type] = 1
Exemplo n.º 5
0
    def build_task(self, context, task):
        '''
        Build up a set of inputs for a single context
        '''
        global delta_catalog

        LOG.debug("Running build_task()")
        LOG.debug("context:  {}".format(context))

        # Initialize the hirs2nc and hirs_avhrr modules with the data locations
        hirs2nc.delta_catalog = delta_catalog
        hirs_avhrr.delta_catalog = delta_catalog

        # Instantiate the hirs and hirs_avhrr computations
        hirs2nc_comp = hirs2nc.HIRS2NC()
        hirs_avhrr_comp = hirs_avhrr.HIRS_AVHRR()

        SPC = StoredProductCatalog()

        day = TimeInterval(
            context['granule'],
            (context['granule'] + timedelta(days=1) - timedelta(seconds=1)))

        hirs2nc_contexts = hirs2nc_comp.find_contexts(
            day, context['satellite'], context['hirs2nc_delivery_id'])

        if len(hirs2nc_contexts) == 0:
            raise WorkflowNotReady('NO HIRS Data For {}'.format(
                context['granule']))

        # Input Counter.
        ic = 0

        for hirs2nc_context in hirs2nc_contexts:

            # Making Input contexts
            hirs_avhrr_context = hirs2nc_context.copy()
            hirs_avhrr_context['hirs_avhrr_delivery_id'] = context[
                'hirs_avhrr_delivery_id']

            LOG.debug("HIRS context:        {}".format(hirs2nc_context))
            LOG.debug("HIRS_AVHRR context:  {}".format(hirs_avhrr_context))

            # Confirming we have HIRS1B and COLLO products...
            hirs2nc_prod = hirs2nc_comp.dataset('out').product(hirs2nc_context)
            hirs_avhrr_prod = hirs_avhrr_comp.dataset('out').product(
                hirs_avhrr_context)

            # If HIRS1B and COLLO products exist, add them and the Patmos-X
            # file for this context to the list of input files to be downloaded to
            # the workspace...
            if SPC.exists(hirs2nc_prod) and SPC.exists(hirs_avhrr_prod):
                # Its safe to require all three inputs
                task.input('HIR1B-{}'.format(ic), hirs2nc_prod)
                task.input('COLLO-{}'.format(ic), hirs_avhrr_prod)
                task.input(
                    'PTMSX-{}'.format(ic),
                    delta_catalog.file('avhrr', hirs2nc_context['satellite'],
                                       'PTMSX', hirs2nc_context['granule']))
                ic += 1

        LOG.debug(
            "There are {} valid HIR1B/COLLO/PTMSX contexts in ({} -> {})".
            format(ic, day.left, day.right))

        if ic == 0:
            LOG.warn(
                "There are no valid HIR1B/COLLO/PTMSX contexts in ({} -> {}), aborting..."
                .format(day.left, day.right))
            return

        interval = TimeInterval(context['granule'],
                                context['granule'] + timedelta(days=1))

        num_cfsr_files = 0

        # Search for the old style pgbhnl.gdas.*.grb2 files from the PEATE
        if num_cfsr_files == 0:
            LOG.debug(
                "Trying to retrieve CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG..."
            )
            try:
                cfsr_files = dawg_catalog.files('', 'CFSR_PGRBHANL', interval)
                num_cfsr_files = len(cfsr_files)
                if num_cfsr_files == 0:
                    LOG.debug("\tpgbhnl.gdas.*.grb2 CFSR files from DAWG : {}".
                              format(cfsr_files))
            except Exception, err:
                LOG.error("{}.".format(err))
                LOG.warn(
                    "Retrieval of CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG failed"
                )
Exemplo n.º 6
0
    def build_task(self, context, task):
        '''
        Build up a set of inputs for a single context
        '''
        global delta_catalog

        LOG.debug("Running build_task()")

        # Initialize the hirs_tpw_orbital module with the data locations
        hirs_tpw_orbital.delta_catalog = delta_catalog

        # Instantiate the hirs_tpw_orbital computation
        hirs_tpw_orbital_comp = hirs_tpw_orbital.HIRS_TPW_ORBITAL()

        SPC = StoredProductCatalog()

        # TPW Orbital Input

        granule = context['granule']
        wedge = timedelta(seconds=1)
        hour = timedelta(hours=1)
        day = timedelta(days=1)

        # Add an hour to each end of the day to make sure the day is completely covered
        interval = TimeInterval(context['granule'] - 1 * hour,
                                (context['granule'] + day + 1 * hour))

        hirs_tpw_orbital_contexts = hirs_tpw_orbital_comp.find_contexts(
            interval, context['satellite'], context['hirs2nc_delivery_id'],
            context['hirs_avhrr_delivery_id'],
            context['hirs_csrb_daily_delivery_id'],
            context['hirs_csrb_monthly_delivery_id'],
            context['hirs_ctp_orbital_delivery_id'],
            context['hirs_ctp_daily_delivery_id'],
            context['hirs_ctp_monthly_delivery_id'],
            context['hirs_tpw_orbital_delivery_id'])

        if len(hirs_tpw_orbital_contexts) == 0:
            raise WorkflowNotReady(
                'No HIRS_TPW_ORBITAL inputs available for {}'.format(
                    context['granule']))

        LOG.debug("There are {} TPW Orbital contexts for {}.".format(
            len(hirs_tpw_orbital_contexts), interval))

        for context in hirs_tpw_orbital_contexts:
            LOG.debug(context)

        # Knock off all but the last of the "previous" day's contexts
        this_day = granule.day
        previous_day = (granule - day + wedge).day
        next_day = (granule + day + wedge).day
        LOG.debug("previous_day: {}".format(previous_day))
        LOG.debug("this_day: {}".format(this_day))
        LOG.debug("next_day: {}".format(next_day))

        start_idx = 0
        end_idx = -1
        num_contexts = len(hirs_tpw_orbital_contexts)

        indices = np.arange(num_contexts)
        reverse_indices = np.flip(np.arange(num_contexts) - num_contexts,
                                  axis=0)

        # have this set to zero unless we need to set it otherwise (say for Metop-B)
        interval_pad = 0

        # Pruning all but the last of the previous day's contexts
        for idx in indices:
            if hirs_tpw_orbital_contexts[
                    idx + interval_pad]['granule'].day == this_day:
                start_idx = idx
                LOG.debug("Breaking: start_idx = {}, granule = {}".format(
                    start_idx,
                    hirs_tpw_orbital_contexts[start_idx]['granule']))
                break

        # Pruning all but the first of the next day's contexts
        for idx in reverse_indices:
            if hirs_tpw_orbital_contexts[
                    idx - interval_pad]['granule'].day == this_day:
                end_idx = idx
                LOG.debug("Breaking: end_idx = {}, granule = {}".format(
                    end_idx, hirs_tpw_orbital_contexts[end_idx]['granule']))
                break

        hirs_tpw_orbital_contexts = hirs_tpw_orbital_contexts[
            start_idx:end_idx + 1]
        #hirs_tpw_orbital_contexts = hirs_tpw_orbital_contexts[start_idx:end_idx]
        for context in hirs_tpw_orbital_contexts:
            LOG.debug("{}".format(context))

        for idx, context in enumerate(hirs_tpw_orbital_contexts):
            hirs_tpw_orbital_prod = hirs_tpw_orbital_comp.dataset(
                'shift').product(context)
            if SPC.exists(hirs_tpw_orbital_prod):
                task.input('TPWO_shift-{}'.format(str(idx).zfill(2)),
                           hirs_tpw_orbital_prod)

        for idx, context in enumerate(hirs_tpw_orbital_contexts):
            hirs_tpw_orbital_prod = hirs_tpw_orbital_comp.dataset(
                'noshift').product(context)
            if SPC.exists(hirs_tpw_orbital_prod):
                task.input('TPWO_noshift-{}'.format(str(idx).zfill(2)),
                           hirs_tpw_orbital_prod)
#intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(4,13) ]
#for years in range(2010, 2018):
#intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(1,13) ]
#years = 2018
#intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(1,2) ]

satellite = 'metop-b'
#granule = datetime(2015,5,14, 9, 23)
#intervals = [TimeInterval(granule, granule + hour - wedge)]
# NSS.GHRR.M1.D13140.S0029.E0127.B0347172.SV.level2.hdf --> NSS.GHRR.M1.D15365.S2307.E0004.B1705253.SV.level2.hdf
# datetime(2013, 5, 20, 0, 0) --> datetime(2017, 12, 31, 0, 0)
intervals = []
years = 2013
intervals += [
    TimeInterval(
        datetime(years, month, 1),
        datetime(years, month,
                 calendar.monthrange(years, month)[1]) + day - wedge)
    for month in range(5, 13)
]
for years in range(2014, 2018):
    intervals += [
        TimeInterval(
            datetime(years, month, 1),
            datetime(years, month,
                     calendar.monthrange(years, month)[1]) + day - wedge)
        for month in range(1, 13)
    ]

satellite_choices = [
    'noaa-06', 'noaa-07', 'noaa-08', 'noaa-09', 'noaa-10', 'noaa-11',
    'noaa-12', 'noaa-14', 'noaa-15', 'noaa-16', 'noaa-17', 'noaa-18',
# Satellite specific information

#satellite = 'noaa-19'
#granule = datetime(2015, 4, 17, 0, 20)
#intervals = [TimeInterval(granule, granule + day - wedge)]
# NSS.GHRR.NP.D09108.S2301.E0050.B0100809.GC.level2.hdf --> NSS.GHRR.NP.D17365.S2238.E2359.B4585757.GC.level2.hdf
# datetime(2009, 4, 18, 0, 0) --> datetime(2017, 12, 31, 0, 0)
#intervals = [TimeInterval(datetime(years,1,1), datetime(years+1,1,1)-wedge) for years in range(2009, 2018) ]

satellite = 'metop-b'
#intervals = [TimeInterval(datetime(2015,1,1), datetime(2015,2,1)-wedge)]
# NSS.GHRR.M1.D13140.S0029.E0127.B0347172.SV.level2.hdf --> NSS.GHRR.M1.D15365.S2307.E0004.B1705253.SV.level2.hdf
# datetime(2013, 5, 20, 0, 0) --> datetime(2017, 12, 31, 0, 0)
intervals = [
    TimeInterval(datetime(years, 1, 1),
                 datetime(years + 1, 1, 1) - wedge)
    for years in range(2013, 2018)
]

satellite_choices = [
    'noaa-06', 'noaa-07', 'noaa-08', 'noaa-09', 'noaa-10', 'noaa-11',
    'noaa-12', 'noaa-14', 'noaa-15', 'noaa-16', 'noaa-17', 'noaa-18',
    'noaa-19', 'metop-a', 'metop-b'
]

# Instantiate the computation
hirs_csrb_daily_comp = hirs_csrb_daily.HIRS_CSRB_DAILY()
comp = hirs_csrb_monthly.HIRS_CSRB_MONTHLY()

LOG.info("Submitting intervals...")
Exemplo n.º 9
0
#intervals = []
#years = 2009
#intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(4,13) ]
#for years in range(2010, 2018):
    #intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(1,13) ]
#years = 2018
#intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(1,2) ]

satellite = 'metop-b'
#granule = datetime(2015,5,14, 9, 23)
#intervals = [TimeInterval(granule, granule + hour - wedge)]
# NSS.GHRR.M1.D13140.S0029.E0127.B0347172.SV.level2.hdf --> NSS.GHRR.M1.D15365.S2307.E0004.B1705253.SV.level2.hdf
# datetime(2013, 5, 20, 0, 0) --> datetime(2017, 12, 31, 0, 0)
intervals = []
years = 2013
intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(5,13) ]
for years in range(2014,2018):
    intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(1,13) ]

satellite_choices = ['noaa-06', 'noaa-07', 'noaa-08', 'noaa-09', 'noaa-10', 'noaa-11',
                    'noaa-12', 'noaa-14', 'noaa-15', 'noaa-16', 'noaa-17', 'noaa-18',
                    'noaa-19', 'metop-a', 'metop-b']

def setup_computation(satellite):

    input_data = {'HIR1B': '/mnt/software/flo/hirs_l1b_datalists/{0:}/HIR1B_{0:}_latest'.format(satellite),
                  'CFSR':  '/mnt/cephfs_data/geoffc/hirs_data_lists/CFSR.out',
                  'PTMSX': '/mnt/software/flo/hirs_l1b_datalists/{0:}/PTMSX_{0:}_latest'.format(satellite)}

    # Data locations
    collection = {'HIR1B': 'ILIAD',