def test_globify_known_lengths(self): # Run result = globify('{directory}/{platform:4s}{satnum:2d}/{orbit:05d}', {'directory': 'otherdir', 'platform': 'noaa'}) # Assert self.assertEqual(result, 'otherdir/noaa??/?????')
def process_IN_OPEN(self, event): """When the file opens. """ fname = os.path.basename(event.pathname) if not fnmatch(fname, globify(self._pattern)): logger.debug("Ignoring %s", event.pathname) return False if self.current_event is None: self.current_event = event elif(event.pathname != self.current_event.pathname): self.clean_up(self.current_event) self.current_event = event if self._fp is None: self._fp = open(event.pathname) self._current_pass = self._schedule_reader.next_pass info = parse(self._pattern, fname) try: self.sat = " ".join((info["platform"], info["number"])) self.time = info["utctime"] except KeyError: logger.info("Could not retrieve satellite name from filename") self.set_reception_active(event) return self._fp is not None
def test_globify_unknown_lengths(self): # Run result = globify('hrpt_{platform_and_num}_' + '{date}_{time}_{orbit}.l1b', {'platform_and_num': 'noaa16'}) # Assert self.assertEqual(result, 'hrpt_noaa16_*_*_*.l1b')
def test_globify_datetime(self): # Run result = globify('hrpt_{platform}{satnum}_' + '{time:%Y%m%d_%H%M}_{orbit}.l1b', {'platform': 'noaa', 'time': dt.datetime(2014, 2, 10, 12, 12)}) # Assert self.assertEqual(result, 'hrpt_noaa*_20140210_1212_*.l1b')
def test_globify_datetime_nosub(self): # Run result = globify('hrpt_{platform:4s}{satnum:2d}_' + '{time:%Y%m%d_%H%M}_{orbit}.l1b', {'platform': 'noaa'}) # Assert self.assertEqual(result, 'hrpt_noaa??_????????_????_*.l1b')
def match_filenames(filenames, pattern): """Get the filenames matching *pattern*.""" matching = [] for filename in filenames: if fnmatch(get_filebase(filename, pattern), globify(pattern)): matching.append(filename) return matching
def test_globify_partial_datetime(self): # Run result = globify('hrpt_{platform:4s}{satnum:2d}_' + '{time:%Y%m%d_%H%M}_{orbit}.l1b', {'platform': 'noaa', 'time': (dt.datetime(2014, 2, 10, 12, 12), 'Ymd')}) # Assert self.assertEqual(result, 'hrpt_noaa??_20140210_????_*.l1b')
def folder_get_first_last( root, fmt="SMAP_L3_SM_P_{time:%Y%m%d}_R{orbit:05d}_{proc_number:03d}.h5", subpaths=['{:%Y.%m.%d}']): """ Get first and last product which exists under the root folder. Parameters ---------- root: string Root folder on local filesystem fmt: string, optional formatting string subpaths: list, optional format of the subdirectories under root. Returns ------- start: datetime.datetime First found product datetime end: datetime.datetime Last found product datetime """ start = None end = None first_folder = get_first_folder(root, subpaths) last_folder = get_last_folder(root, subpaths) if first_folder is not None: files = sorted(glob.glob(os.path.join( first_folder, parser.globify(fmt)))) data = parser.parse(fmt, os.path.split(files[0])[1]) start = data['time'] if last_folder is not None: files = sorted(glob.glob(os.path.join( last_folder, parser.globify(fmt)))) data = parser.parse(fmt, os.path.split(files[-1])[1]) end = data['time'] return start, end
def select_files_from_directory(self, directory=None): """Find files for this reader in *directory*. If directory is None or '', look in the current directory. """ filenames = [] if directory is None: directory = '' for pattern in self.file_patterns: matching = glob.iglob(os.path.join(directory, globify(pattern))) filenames.extend(matching) return filenames
def find_filenames(self, directory, file_patterns=None): if file_patterns is None: file_patterns = self.file_patterns # file_patterns.extend(item['file_patterns'] for item in # self.config['file_types']) filelist = [] if directory is None: directory = '' for pattern in file_patterns: filelist.extend(glob.iglob(os.path.join(directory, globify( pattern)))) return filelist
def gldas_folder_get_first_last( root, fmt="GLDAS_NOAH025SUBP_3H.A{time:%Y%j.%H%M}.001.{production_time:%Y%j%H%M%S}.grb", subpaths=['{:%Y}', '{:%j}']): """ Get first and last product which exists under the root folder. Parameters ---------- root: string Root folder on local filesystem fmt: string, optional formatting string subpaths: list, optional format of the subdirectories under root. Returns ------- start: datetime.datetime First found product datetime end: datetime.datetime Last found product datetime """ start = None end = None first_folder = get_first_gldas_folder(root, subpaths) last_folder = get_last_gldas_folder(root, subpaths) if first_folder is not None: files = sorted(glob.glob(os.path.join(first_folder, parser.globify(fmt)))) data = parser.parse(fmt, os.path.split(files[0])[1]) start = data['time'] if last_folder is not None: files = sorted(glob.glob(os.path.join(last_folder, parser.globify(fmt)))) data = parser.parse(fmt, os.path.split(files[-1])[1]) end = data['time'] return start, end
def select_files(self, base_dir=None, filenames=None, sensor=None): file_set, info_filenames = super(xRITFile, self).select_files( base_dir, filenames, sensor) # for pattern in self.file_patterns: # for filename in filenames: # parse(pattern, os.path.basename(filename)) matching_filenames = [] # Organize filenames in to file types and create file handlers remaining_filenames = set(self.info['filenames']) start_times = [] end_times = [] for filetype, filetype_info in self.config['file_types'].items(): patterns = filetype_info['file_patterns'] for pattern in patterns: used_filenames = set() for filename in remaining_filenames: if fnmatch(os.path.basename(filename), globify(pattern)): # we know how to use this file (even if we may not use # it later) used_filenames.add(filename) filename_info = parse(pattern, os.path.basename(filename)) # Only add this file handler if it is within the time # we want file_start = filename_info['start_time'] file_end = filename_info.get('end_time', file_start) if self._start_time and file_start < self._start_time: continue if self._end_time and file_end > self._end_time: continue start_times.append(file_start) end_times.append(file_end) matching_filenames.append(filename) # TODO: Area filtering remaining_filenames -= used_filenames if matching_filenames: # Assign the start time and end time self._start_time = min(start_times) self._end_time = max(end_times) self.info['filenames'] = matching_filenames return file_set, info_filenames
def match_filenames(self, filenames, base_dir=None): result = [] for file_pattern in self.file_patterns: if base_dir is not None: file_pattern = os.path.join(base_dir, file_pattern) pattern = globify(file_pattern) if not filenames: return result for filename in list(filenames): if fnmatch(os.path.basename(filename), os.path.basename(pattern)): result.append(filename) filenames.remove(filename) return result
def match_filenames(self, filenames, base_dir=None): result = [] for file_pattern in self.file_patterns: if base_dir is not None: file_pattern = os.path.join(base_dir, file_pattern) pattern = globify(file_pattern) if not filenames: return result for filename in list(filenames): if fnmatch( os.path.basename(filename), os.path.basename(pattern)): result.append(filename) filenames.remove(filename) return result
def select_files(self, base_dir=None, filenames=None, sensor=None): res = super(FileYAMLReader, self).select_files(base_dir, filenames, sensor) # Organize filenames in to file types and create file handlers remaining_filenames = set(self.info['filenames']) for filetype, filetype_info in self.config['file_types'].items(): filetype_cls = filetype_info['file_reader'] patterns = filetype_info['file_patterns'] file_handlers = [] for pattern in patterns: used_filenames = set() levels = len(pattern.split('/')) for filename in remaining_filenames: filebase = os.path.join( *filename.split(os.path.sep)[-levels:]) if fnmatch(filebase, globify(pattern)): # we know how to use this file (even if we may not use # it later) used_filenames.add(filename) filename_info = parse(pattern, filebase) file_handler = filetype_cls(filename, filename_info, filetype_info) # Only add this file handler if it is within the time # we want if self._start_time and file_handler.start_time < self._start_time: continue if self._end_time and file_handler.end_time > self._end_time: continue # TODO: Area filtering file_handlers.append(file_handler) remaining_filenames -= used_filenames # Only create an entry in the file handlers dictionary if # we have those files if file_handlers: # Sort the file handlers by start time file_handlers.sort(key=lambda fh: fh.start_time) self.file_handlers[filetype] = file_handlers return res
def get_lrit_filenames(scene, area_name): """Get the set of lrit filenames for the given scene """ conf = ConfigParser() conf.read(os.path.join(CONFIG_PATH, scene.fullname + ".cfg")) filename = conf.get(scene.instrument_name + "-level4", "filename", raw=True, vars=os.environ) directory = conf.get(scene.instrument_name + "-level4", "dir", vars=os.environ) pathname_tmpl = os.path.join(directory, filename) LOG.debug("Path = " + str(pathname_tmpl)) fparser = parser.Parser(pathname_tmpl) lrit_files = glob( parser.globify(pathname_tmpl, {'nominal_time': scene.time_slot})) prologue = None segmfiles = [] segm_numbers = [] for item in lrit_files: p__ = fparser.parse(item) segm = p__['segment'].strip('_') if segm == 'PRO': prologue = item else: segm_numbers.append(int(segm)) segmfiles.append(item) if not prologue: LOG.warning("No prologue file found for timeslot") segm_numbers.sort() if range(1, 11) == segm_numbers: LOG.info("All ten segment files found") else: LOG.warning("Less than 10 segments found: %s", str(segm_numbers)) return prologue, segmfiles
def clean_up(self, event): """Clean up. """ fname = os.path.basename(event.pathname) if not fnmatch(fname, globify(self._pattern)): return if self._fp is not None: self._fp.close() else: logger.warning("File descriptor is None for %s", event.pathname) self._fp = None self._schedule_reader.get_next_pass() self.stop_receiving() try: del self._readers[event.pathname] except KeyError: logger.info("No reader defined for %s", str(event.pathname))
def process_IN_MODIFY(self, event): """File has been modified, read it ! """ if not self.process_IN_OPEN(event): return logger.debug("File modified! %s", event.pathname) fname = os.path.basename(event.pathname) if not fnmatch(fname, globify(self._pattern)): return self.set_reception_active(event) for sat, key, elevation, qual, data in self._reader( event.pathname, self._current_pass): if qual > 0: self._holder.add(sat, key, elevation, qual, data)
def process_IN_MODIFY(self, event): """File has been modified, read it ! """ if not self.process_IN_OPEN(event): return logger.debug("File modified! %s", event.pathname) fname = os.path.basename(event.pathname) if not fnmatch(fname, globify(self._pattern)): return self.set_reception_active(event) for sat, key, elevation, qual, data in self._reader(event.pathname, self._current_pass): if qual > 0: self._holder.add(sat, key, elevation, qual, data)
def process_all_scans_in_dname(dname, out_path, ok_dates=None): """ Make level 1c files for all files in directory dname """ fl_ = glob(os.path.join(dname, globify(hrit_file_pattern))) dates = [p__.parse(os.path.basename(p))['start_time'] for p in fl_] unique_dates = np.unique(dates).tolist() for uqdate in unique_dates: date_formated = uqdate.strftime("%Y%m%d%H%M") if ok_dates is not None and date_formated not in ok_dates.keys(): print("Skipping date {date}".format(date=date_formated)) continue # Every hour only: #if uqdate.minute != 0: # continue tslot_files = [ f for f in fl_ if p__.parse(os.path.basename(f))['start_time'] == uqdate ] try: process_one_scan(tslot_files, out_path) except: pass
def get_product_statistics_files(pps_control_path, scene, product_statistics_filename, max_abs_deviation_minutes): """From directory path, sensor and platform name get possible product statistics filenames.""" platform_id = SATELLITE_NAME.get(scene['platform_name'], scene['platform_name']) platform_id = METOP_NAME_LETTER.get(platform_id, platform_id) product_stat_flist = [] scene_start_time = scene['starttime'] possible_filetimes = [scene_start_time] for nmin in range(1, max_abs_deviation_minutes + 1): possible_filetimes.append(scene_start_time - timedelta(seconds=60*nmin)) possible_filetimes.append(scene_start_time + timedelta(seconds=60*nmin)) for product_name in ['CMA', 'CT', 'CTTH', 'CPP', 'CMAPROB']: for start_time in possible_filetimes: glbify = globify(product_statistics_filename, {'product': product_name, 'satellite': platform_id, 'orbit': '%.5d' % scene['orbit_number'], 'starttime': start_time}) product_stat_flist = product_stat_flist + glob(os.path.join(pps_control_path, glbify)) return product_stat_flist
def test_globify_empty(self): # Run result = globify('{a}_{b:4d}.end', {}) # Assert self.assertEqual(result, '*_????.end')
def test_globify_noarg(self): # Run result = globify('{a}_{b:4d}.end') # Assert self.assertEqual(result, '*_????.end')
def test_globify_simple(self): # Run result = globify('{a}_{b}.end', {'a': 'a', 'b': 'b'}) # Assert self.assertEqual(result, 'a_b.end')
def select_files(self, base_dir=None, filenames=None, sensor=None): res = super(FileYAMLReader, self).select_files(base_dir, filenames, sensor) # Organize filenames in to file types and create file handlers remaining_filenames = set(self.info['filenames']) for filetype, filetype_info in self.config['file_types'].items(): filetype_cls = filetype_info['file_reader'] patterns = filetype_info['file_patterns'] file_handlers = [] for pattern in patterns: used_filenames = set() levels = len(pattern.split('/')) # correct separator if needed pattern = os.path.join(*pattern.split('/')) for filename in remaining_filenames: filebase = os.path.join( *filename.split(os.path.sep)[-levels:]) if fnmatch(filebase, globify(pattern)): # we know how to use this file (even if we may not use # it later) used_filenames.add(filename) filename_info = parse(pattern, filebase) file_handler = filetype_cls(filename, filename_info, filetype_info) # Only add this file handler if it is within the time # we want if self._start_time and file_handler.start_time < self._start_time: continue if self._end_time and file_handler.end_time > self._end_time: continue if self._area: from trollsched.boundary import AreaDefBoundary, Boundary from satpy.resample import get_area_def try: gbb = Boundary( *file_handler.get_bounding_box()) except NotImplementedError: pass else: abb = AreaDefBoundary( get_area_def(self._area), frequency=1000) intersection = gbb.contour_poly.intersection( abb.contour_poly) if not intersection: continue file_handlers.append(file_handler) remaining_filenames -= used_filenames # Only create an entry in the file handlers dictionary if # we have those files if file_handlers: # Sort the file handlers by start time file_handlers.sort(key=lambda fh: fh.start_time) self.file_handlers[filetype] = file_handlers return res
def load_dataset(self, satscene, filename=None, *args, **kwargs): """Read data from file and load it into *satscene*. """ del args conf = ConfigParser() conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg")) options = dict(conf.items(satscene.instrument_name + "-level2", raw=True)) options["resolution"] = 1000 options["geofile"] = os.path.join(options["dir"], options["geofile"]) options.update(kwargs) fparser = Parser(options.get("filename")) gparser = Parser(options.get("geofile")) if isinstance(filename, (list, set, tuple)): # we got the entire dataset. for fname in filename: if fnmatch(os.path.basename(fname), fparser.globify()): metadata = fparser.parse(os.path.basename(fname)) resolution = self.res[metadata["resolution"]] self.datafiles[resolution] = fname elif fnmatch(os.path.basename(fname), gparser.globify()): self.geofile = fname elif ((filename is not None) and fnmatch(os.path.basename(options["filename"]), fparser.globify())): # read just one file logger.debug("Reading from file: " + str(options["filename"])) filename = options["filename"] resolution = self.res[os.path.basename(filename)[5]] self.datafiles[resolution] = filename if not self.datafiles: # find files according to config logger.debug( "Didn't get any valid file as input, looking in defined places") resolution = int(options["resolution"]) or 1000 for res in [250, 500, 1000]: datafile = globify(os.path.join(options['dir'], options["filename"]), {'resolution': self.inv_res[res], 'start_time': satscene.time_slot}) try: self.datafiles[res] = check_filename(datafile) except IOError: self.datafiles[res] = None logger.warning("Can't find file for resolution %s with template: %s", str(res), datafile) try: self.geofile = check_filename(globify(options["geofile"], {'start_time': satscene.time_slot})) except IOError: self.geofile = None logger.warning("Can't find geofile with template: %s", options['geofile']) resolution = options["resolution"] cores = options.get("cores", max(multiprocessing.cpu_count() / 4, 1)) datadict = { 1000: ['EV_250_Aggr1km_RefSB', 'EV_500_Aggr1km_RefSB', 'EV_1KM_RefSB', 'EV_1KM_Emissive'], 500: ['EV_250_Aggr500_RefSB', 'EV_500_RefSB'], 250: ['EV_250_RefSB']} loaded_bands = [] # process by dataset, reflective and emissive datasets separately resolutions = [250, 500, 1000] for res in resolutions: if res < resolution: continue logger.debug("Working on resolution %d", res) self.filename = self.datafiles[res] logger.debug("Using " + str(cores) + " cores for interpolation") try: self.data = SD(str(self.filename)) except HDF4Error as err: logger.warning("Could not load data from " + str(self.filename) + ": " + str(err)) continue datasets = datadict[res] for dataset in datasets: subdata = self.data.select(dataset) band_names = subdata.attributes()["band_names"].split(",") if len(satscene.channels_to_load & set(band_names)) > 0: # get the relative indices of the desired channels indices = [i for i, band in enumerate(band_names) if band in satscene.channels_to_load] uncertainty = self.data.select(dataset + "_Uncert_Indexes") if dataset.endswith('Emissive'): array = calibrate_tb( subdata, uncertainty, indices, band_names) else: array = calibrate_refl(subdata, uncertainty, indices) for (i, idx) in enumerate(indices): if band_names[idx] in loaded_bands: continue satscene[band_names[idx]] = array[i] # fix the resolution to match the loaded data. satscene[band_names[idx]].resolution = res loaded_bands.append(band_names[idx]) # Get the orbit number if not satscene.orbit: mda = self.data.attributes()["CoreMetadata.0"] orbit_idx = mda.index("ORBITNUMBER") satscene.orbit = int(mda[orbit_idx + 111:orbit_idx + 116]) # Get the geolocation # if resolution != 1000: # logger.warning("Cannot load geolocation at this resolution (yet).") # return for band_name in loaded_bands: lon, lat = self.get_lonlat( satscene[band_name].resolution, satscene.time_slot, cores) area = geometry.SwathDefinition(lons=lon, lats=lat) satscene[band_name].area = area # Trimming out dead sensor lines (detectors) on aqua: # (in addition channel 21 is noisy) if satscene.satname == "aqua": for band in ["6", "27", "36"]: if not satscene[band].is_loaded() or satscene[band].data.mask.all(): continue width = satscene[band].data.shape[1] height = satscene[band].data.shape[0] indices = satscene[band].data.mask.sum(1) < width if indices.sum() == height: continue satscene[band] = satscene[band].data[indices, :] satscene[band].area = geometry.SwathDefinition( lons=satscene[band].area.lons[indices, :], lats=satscene[band].area.lats[indices, :]) # Trimming out dead sensor lines (detectors) on terra: # (in addition channel 27, 30, 34, 35, and 36 are nosiy) if satscene.satname == "terra": for band in ["29"]: if not satscene[band].is_loaded() or satscene[band].data.mask.all(): continue width = satscene[band].data.shape[1] height = satscene[band].data.shape[0] indices = satscene[band].data.mask.sum(1) < width if indices.sum() == height: continue satscene[band] = satscene[band].data[indices, :] satscene[band].area = geometry.SwathDefinition( lons=satscene[band].area.lons[indices, :], lats=satscene[band].area.lats[indices, :]) for band_name in loaded_bands: band_uid = hashlib.sha1(satscene[band_name].data.mask).hexdigest() satscene[band_name].area.area_id = ("swath_" + satscene.fullname + "_" + str(satscene.time_slot) + "_" + str(satscene[ band_name].shape) + "_" + str(band_uid)) satscene[band_name].area_id = satscene[band_name].area.area_id
def __call__(self, message): urlobj = urlparse(message.data['uri']) if 'start_time' in message.data: start_time = message.data['start_time'] else: raise InconsistentMessage("No start time in message!") if message.data['instruments'] == self.instrument: path, fname = os.path.split(urlobj.path) LOG.debug("path " + str(path) + " filename = " + str(fname)) instrument = str(message.data['instruments']) LOG.debug("Instrument %r supported!", instrument) platform_name = METOPS.get(message.data['satellite'], message.data['satellite']) filepath = os.path.join(path, fname) else: LOG.debug("Scene is not supported") raise SceneNotSupported("platform and instrument: " + str(message.data['platform_name']) + " " + str(message.data['instruments'])) if 'end_time' in message.data: end_time = message.data['end_time'] else: LOG.warning("No end time in message!") end_time = start_time + timedelta(seconds=self.passlength_seconds) LOG.info("End time set to: %s", str(end_time)) # Check that the input file really exists: if not os.path.exists(filepath): #LOG.error("File %s does not exist. Don't do anything...", filepath) raise IOError("File %s does not exist. Don't do anything...", filepath) LOG.info("Sat and Instrument: %s %s", platform_name, instrument) if not isinstance(self.tle_dirs, list): tle_dirs = [self.tle_dirs] tle_files = [] for tledir in tle_dirs: tle_files = tle_files + glob( os.path.join(tledir, globify(self.tlefilename))) tlep = Parser(self.tlefilename) time_thr = timedelta(days=5) utcnow = datetime.utcnow() valid_tle_file = None for tlefile in tle_files: fname = os.path.basename(tlefile) res = tlep.parse(fname) dtobj = res['time'] delta_t = abs(utcnow - dtobj) if delta_t < time_thr: time_thr = delta_t valid_tle_file = tlefile if not valid_tle_file: raise NoValidTles("Failed finding a valid tle file!") else: LOG.debug("Valid TLE file: %s", valid_tle_file) if not isinstance(self.areaids, list): self.areaids = [self.areaids] inside = False for areaid in self.areaids: area_def = load_area(self.area_def_file, areaid) inside = self.granule_inside_area(start_time, end_time, platform_name, area_def, valid_tle_file) if inside: return True return False
def select_files(self, base_dir=None, filenames=None, sensor=None): res = super(FileYAMLReader, self).select_files(base_dir, filenames, sensor) # Organize filenames in to file types and create file handlers remaining_filenames = set(self.info['filenames']) for filetype, filetype_info in self.config['file_types'].items(): filetype_cls = filetype_info['file_reader'] patterns = filetype_info['file_patterns'] file_handlers = [] for pattern in patterns: used_filenames = set() levels = len(pattern.split('/')) # correct separator if needed pattern = os.path.join(*pattern.split('/')) for filename in remaining_filenames: filebase = os.path.join( *filename.split(os.path.sep)[-levels:]) if fnmatch(filebase, globify(pattern)): # we know how to use this file (even if we may not use # it later) used_filenames.add(filename) filename_info = parse(pattern, filebase) file_handler = filetype_cls(filename, filename_info, filetype_info) # Only add this file handler if it is within the time # we want if self._start_time and file_handler.start_time < self._start_time: continue if self._end_time and file_handler.end_time > self._end_time: continue if self._area: from trollsched.boundary import AreaDefBoundary, Boundary from satpy.resample import get_area_def try: gbb = Boundary( *file_handler.get_bounding_box()) except NotImplementedError: pass else: abb = AreaDefBoundary(get_area_def(self._area), frequency=1000) intersection = gbb.contour_poly.intersection( abb.contour_poly) if not intersection: continue file_handlers.append(file_handler) remaining_filenames -= used_filenames # Only create an entry in the file handlers dictionary if # we have those files if file_handlers: # Sort the file handlers by start time file_handlers.sort(key=lambda fh: fh.start_time) self.file_handlers[filetype] = file_handlers return res