def test_number_files(self): kwargs = self.kwargs.copy() kwargs.update({'to_date': datetime.date(2014, 12, 31)}) dataset = Dataset(**kwargs) dataset.get_filenames = lambda **my_kwargs: self.files_dekad number = dataset.get_number_files() self.assertEqual(number, number)
def test_intervals(self): kwargs = self.kwargs.copy() kwargs.update({'to_date': datetime.date(2018, 12, 31)}) dataset = Dataset(**kwargs) dataset.get_filenames = lambda **my_kwargs: self.files_dekad intervals = dataset.intervals self.assertEqual(len(intervals), 5) self.assertEqual(intervals[0].interval_type, INTERVAL_TYPE.PRESENT) self.assertEqual(intervals[1].interval_type, INTERVAL_TYPE.PERMANENT_MISSING) self.assertEqual(intervals[2].interval_type, INTERVAL_TYPE.PRESENT) self.assertEqual(intervals[3].interval_type, INTERVAL_TYPE.MISSING) self.assertEqual(intervals[4].interval_type, INTERVAL_TYPE.PRESENT)
def test_normalized_info_hour(self): kwargs = self.kwargs.copy() kwargs.update({ 'from_date': datetime.datetime(2021, 1, 1), 'to_date': datetime.datetime(2021, 1, 20), 'product_code': "era5-hourly-2mt", 'version': '1.0', 'sub_product_code': "2mt", 'mapset': 'CDS-Africa-25km' }) dataset = Dataset(**kwargs) completeness = dataset.get_dataset_normalized_info() self.assertEqual(completeness['totfiles'], 457) self.assertEqual(completeness['missingfiles'], 457)
def test_get_filenames_over_a_period(self): kwargs = { 'product_code': "vgt-dmp", 'version': 'V2.0', 'sub_product_code': "dmp", 'mapset': 'SPOTV-Africa-1km' } dataset = Dataset(**kwargs) fromdate = datetime.date(1981, 1, 1) # '19810101' todate = datetime.date(2016, 1, 20) # '20160120' regex = dataset.get_regex_from_range(fromdate, todate) filenames = dataset.get_filenames(regex) self.assertEqual(36, 36)
def test_normalized_info_15_minutes(self): kwargs = self.kwargs.copy() # ES2-596: had to change 2016 -> 2026 since on stations some data are present. kwargs.update({ 'from_date': datetime.datetime(2026, 2, 1), 'to_date': datetime.datetime(2026, 2, 20), 'product_code': "lsasaf-et", 'version': 'undefined', 'sub_product_code': "et", 'mapset': 'MSG-satellite-3km' }) dataset = Dataset(**kwargs) completeness = dataset.get_dataset_normalized_info() self.assertEqual(completeness['totfiles'], 913) self.assertEqual(completeness['missingfiles'], 913)
def test_find_gaps(self): # ES2-596: had to change 2020 -> 2032 since on stations some data are present. from_date = datetime.date(2032, 1, 1) to_date = datetime.date(2032, 12, 31) kwargs = { 'product_code': "modis-pp", 'version': "v2013.1", 'sub_product_code': "8daysmax", # "lst" 'mapset': 'MODIS-Africa-4km', 'from_date': from_date, 'to_date': to_date } dataset = Dataset(**kwargs) completeness = dataset.get_dataset_normalized_info() self.assertEqual(completeness['totfiles'], 46) self.assertEqual(completeness['missingfiles'], 46)
def test_product_vgt_fapar(self): kwargs = self.kwargs.copy() kwargs.update({ 'from_date': datetime.date(2014, 1, 1), 'to_date': datetime.date(2014, 12, 21), 'product_code': "vgt-fapar", 'sub_product_code': "fapar", 'version': "V2.0", 'mapset': 'SPOTV-Africa-1km' }) files = [ "20140621_vgt-fapar_fapar_SPOTV-Africa-1km_V2.0.tif", ] dataset = Dataset(**kwargs) dataset.get_filenames = lambda **my_kwargs: files completeness = dataset.get_dataset_normalized_info() self.assertEqual(completeness['totfiles'], 36) self.assertEqual(completeness['missingfiles'], 35)
def test_class_no_product(self): kwargs = { 'product_code': "---prod---", 'sub_product_code': "---subprod---", 'mapset': '---mapset---' } # The dataset class was modified not to raise an exception, rather returning object with empty fields. # self.assertRaisesRegexp(NoProductFound, "(?i).*found.*product.*", Dataset, **kwargs) dataset = Dataset(**kwargs) self.assertEqual(dataset.frequency_id, 'undefined')
def test_normalized_info(self): kwargs = self.kwargs.copy() kwargs.update({'to_date': datetime.date(2018, 2, 1)}) files_dekad = [ "20180101_fewsnet-rfe_10d_FEWSNET-Africa-8km_2.0.tif", "20180101_fewsnet-rfe_10d_FEWSNET-Africa-8km_2.0.xml", "20180111_fewsnet-rfe_10d_FEWSNET-Africa-8km_2.0.tif", # Here 1 hole "20180201_fewsnet-rfe_10d_FEWSNET-Africa-8km_2.0.tif", ] dataset = Dataset(**kwargs) dataset.get_filenames = lambda **my_kwargs: files_dekad segments = dataset.get_dataset_normalized_info()['intervals'] total = 0 for segment in segments: total += segment['intervalpercentage'] self.assertEqual(int(total), 100) self.assertEqual(segments[0]['intervalpercentage'], 50.0) self.assertEqual(segments[1]['intervalpercentage'], 25.0) self.assertEqual(segments[2]['intervalpercentage'], 25.0)
def create_archive_eumetcast(product, version, subproducts, mapset, start_date=None, end_date=None, target_dir=None, overwrite=False, tgz=False): # Rename and copy to target dir (/data/archives by default) the eStation2 files # Check target_dir if target_dir is None: target_dir = es_constants.es2globals['archive_dir'] # Loop over subproducts if not isinstance(subproducts, list): subproducts_list = [] subproducts_list.append(subproducts) else: subproducts_list = subproducts for subproduct in subproducts_list: # Identify all existing files # Check if dates have to be disregarded (i.e. get all files) if start_date == -1 or end_date == -1: my_dataset = Dataset(product, subproduct, mapset, version=version) filenames = my_dataset.get_filenames() else: my_dataset = Dataset(product, subproduct, mapset, version=version, from_date=start_date, to_date=end_date) filenames = my_dataset.get_filenames_range() filenames.sort() for filename in filenames: # Derive MESA_JRC_ filename archive_name = functions.convert_name_to_eumetcast(filename, tgz=tgz) # Check if the target_file already exist if not os.path.isfile(target_dir + os.path.sep + archive_name) or overwrite: target_file = target_dir + os.path.sep + archive_name if not tgz: # Copy only to target_dir status = shutil.copyfile(filename, target_file) else: command = 'tar -cvzf ' + target_file + ' -C ' + os.path.dirname( filename) + ' ' + os.path.basename(filename) status = os.system(command) logger.info( "Files copied for product [%s]/version [%s]/subproducts [%s]/mapset [%s]" % (product, version, subproduct, mapset))
def test_product_only_month_day(self): kwargs = self.kwargs.copy() kwargs.update({ # Note: it goes wrong with the definition of the dates ... # 'from_date': datetime.date(2014, 1, 1), # 'to_date': datetime.date(2014, 12, 1), 'product_code': "fewsnet-rfe", 'sub_product_code': "1monmax", 'mapset': 'FEWSNET-Africa-8km' }) files = [ "0101_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0201_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0301_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0401_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0501_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0601_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0701_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0801_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0901_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "1001_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "1101_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "1201_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", ] dataset = Dataset(**kwargs) dataset.get_filenames = lambda **my_kwargs: files completeness = dataset.get_dataset_normalized_info() self.assertEqual(completeness['totfiles'], 12) self.assertEqual(completeness['missingfiles'], 0) self.assertEqual(completeness['intervals'][0]['todate'], '12-01') self.assertEqual(completeness['intervals'][0]['fromdate'], '01-01') self.assertEqual(completeness['firstdate'], '01-01') self.assertEqual(completeness['lastdate'], '12-01') current_date = datetime.date(2014, 1, 1) last_date = datetime.date(2015, 1, 1) for i in range(12): current_date = dataset.next_date(current_date) self.assertEqual(last_date, current_date)
def test_get_dates(self): kwargs = { 'product_code': "vgt-ndvi", 'version': 'sv2-pv2.2', 'sub_product_code': "ndvi-linearx2", 'mapset': 'SPOTV-Africa-1km' } dataset = Dataset(**kwargs) if dataset._db_product.frequency_id == 'singlefile': dates = 'nodate' else: dates = dataset._frequency.get_dates(datetime.date(2018, 1, 1), datetime.date(2018, 12, 21)) last = None for date in dates: if last: self.assertTrue(last < date) last = date self.assertEqual(len(dates), 36)
def test_with_xml(self): kwargs = self.kwargs.copy() kwargs.update({ 'from_date': datetime.date(2018, 1, 1), 'to_date': datetime.date(2018, 12, 31) }) dataset = Dataset(**kwargs) files_dekad = sorted(self.files_dekad[:]) # Add and .xml at the begin and at the end files_dekad = [files_dekad[0][:-3] + 'xml' ] + files_dekad + [files_dekad[-1][:-3] + 'xml'] dataset.get_filenames = lambda **my_kwargs: files_dekad dataset._clean_cache() # Note: the .missing (permanent missing) are not counted as 'missing' completeness = dataset.get_dataset_normalized_info() self.assertEqual(completeness['missingfiles'], 3)
def test_product_no_dates(self): kwargs = { 'product_code': "fewsnet-rfe", 'version': "2.0", 'sub_product_code': "1monmax", 'mapset': 'FEWSNET-Africa-8km', } files = [ "0101_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0201_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0301_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0401_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0501_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0601_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0701_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0801_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "0901_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "1001_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "1101_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", "1201_fewsnet-rfe_1monmax_FEWSNET-Africa-8km_2.0.tif", ] dataset = Dataset(**kwargs) dataset.get_filenames = lambda **my_kwargs: files dataset._clean_cache() completeness = dataset.get_dataset_normalized_info() self.assertEqual(completeness['totfiles'], 12) self.assertEqual(completeness['missingfiles'], 0) self.assertEqual(completeness['intervals'][0]['todate'], '12-01') self.assertEqual(completeness['intervals'][0]['fromdate'], '01-01') self.assertEqual(completeness['firstdate'], '01-01') self.assertEqual(completeness['lastdate'], '12-01') current_date = datetime.date(2014, 1, 1) last_date = datetime.date(2015, 1, 1) for i in range(12): current_date = dataset.next_date(current_date) self.assertEqual(last_date, current_date)
def convert_driver(output_dir=None): # Definitions input_dir = es_constants.es2globals['processing_dir'] # Instance metadata object sds_meta = metadata.SdsMetadata() # Check base output dir if output_dir is None: output_dir = es_constants.es2globals['spirits_output_dir'] functions.check_output_dir(output_dir) # Read the spirits table and convert all existing files spirits_list = querydb.get_spirits() for entry in spirits_list: use_range = False product_code = entry['productcode'] sub_product_code = entry['subproductcode'] version = entry['version'] out_data_type = entry['out_data_type'] out_scale_factor = entry['out_scale_factor'] out_offset = entry['out_offset'] out_nodata = entry['data_ignore_value'] # Prepare the naming dict naming_spirits = { 'sensor_filename_prefix':entry['sensor_filename_prefix'], \ 'frequency_filename_prefix':entry['frequency_filename_prefix'], \ 'pa_filename_prefix':entry['product_anomaly_filename_prefix']} metadata_spirits= {'values': entry['prod_values'], 'flags': entry['flags'], \ 'data_ignore_value':entry['data_ignore_value'], \ 'days': entry['days'], \ 'sensor_type':entry['sensor_type'], \ 'comment':entry['comment'], \ 'date':''} # Manage mapsets: if defined use it, else read the existing ones from filesystem my_mapsets = [] if entry['mapsetcode']: my_mapsets.append(entry['mapsetcode']) else: prod = Product(product_code, version=version) for mp in prod.mapsets: my_mapsets.append(mp) # Manage dates if entry['start_date']: from_date = datetime.datetime.strptime(str(entry['start_date']), '%Y%m%d').date() use_range = True else: from_date = None if entry['end_date']: to_date = datetime.datetime.strptime(str(entry['end_date']), '%Y%m%d').date() use_range = True else: to_date = None for my_mapset in my_mapsets: # Manage output dirs out_sub_dir = my_mapset+os.path.sep+\ product_code+os.path.sep+\ entry['product_anomaly_filename_prefix']+\ entry['frequency_filename_prefix']+\ str(entry['days'])+os.path.sep logger.info('Working on [%s]/[%s]/[%s]/[%s]' % (product_code, version, my_mapset, sub_product_code)) ds = Dataset(product_code, sub_product_code, my_mapset, version=version, from_date=from_date, to_date=to_date) product_info = ds._db_product in_scale_factor = product_info.scale_factor in_offset = product_info.scale_offset in_nodata = product_info.nodata mask_min = product_info.mask_min mask_max = product_info.mask_max productcode = product_info.productcode subproductcode = product_info.subproductcode if productcode == 'vgt-ndvi' and subproductcode == 'ndv': mask_min = 0 if use_range: available_files = ds.get_filenames_range() else: available_files = ds.get_filenames() # Convert input products if len(available_files) > 0: for input_file in available_files: # Check it is a .tif file (not .missing) path, ext = os.path.splitext(input_file) if ext == '.tif': functions.check_output_dir(output_dir + out_sub_dir) str_date = functions.get_date_from_path_filename( os.path.basename(input_file)) # Check input file exists if os.path.isfile(input_file): if len(naming_spirits['frequency_filename_prefix'] ) > 1: my_str_date = naming_spirits[ 'frequency_filename_prefix'][1:5] + str_date metadata_spirits['date'] = my_str_date else: metadata_spirits['date'] = str_date #Read metadata from the file and differentiate chirps prelim and final data sds_meta.read_from_file(input_file) input_file_name = sds_meta.get_item( 'eStation2_input_files') if productcode == 'chirps-dekad' and input_file_name.endswith( ".tif;"): metadata_spirits[ 'comment'] = 'Prelim ' + entry['comment'] elif productcode == 'chirps-dekad' and input_file_name.endswith( ".gz;"): metadata_spirits[ 'comment'] = 'Final ' + entry['comment'] # Check output file exists #convert_geotiff_file(input_file, output_dir+out_sub_dir, str_date, naming_spirits, metadata_spirits) convert_geotiff_datatype_rescaled( input_file, output_dir + out_sub_dir, str_date, naming_spirits, metadata_spirits, in_scale_factor, in_offset, in_nodata, out_scale_factor, out_offset, out_nodata, out_data_type, mask_min, mask_max) else: logger.debug('Input file does not exist: %s' % input_file)
def test_class(self): self.assertIsInstance(Dataset(**self.kwargs), Dataset)