def test_aqua_pds_label(self): ds = ptype.DatasetMetadata( id_=UUID('d083fa45-1edd-11e5-8f9e-1040f381a756'), product_type='satellite_telemetry_data', creation_dt=datetime.datetime(2015, 6, 11, 5, 51, 50), platform=ptype.PlatformMetadata(code='AQUA'), instrument=ptype.InstrumentMetadata(name='MODIS'), format_=ptype.FormatMetadata(name='PDS'), rms_string='S1A1C1D1R1', acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2014, 8, 7, 3, 16, 28, 750910), los=datetime.datetime(2014, 8, 7, 3, 16, 30, 228023), platform_orbit=65208 ), image=ptype.ImageMetadata(day_percentage_estimate=100.0), lineage=ptype.LineageMetadata( machine=ptype.MachineMetadata(), source_datasets={} ) ) self.assertEqual( "AQUA_MODIS_STD-PDS_P00_65208.S1A1C1D1R1_0_0_20140807T031628Z20140807T031630", drivers.RawDriver().get_ga_label(ds) )
def test_extract_md(_run_pdsinfo_exe): input_dir = write_files({'P1540064AAAAAAAAAAAAAA14219032341001.PDS': ''}) # def run_pdsinfo(file_): # assert file_ == input_dir # # return _run_pdsinfo_exe.return_value = b"""APID 64: count 610338 invalid 0 missing 6255 first packet: 2014/08/07 03:16:28.750910 last packet: 2014/08/07 03:21:28.604695 missing seconds: 2 day packets: 545223/64311 night packets: 0/0 engineering packets: 804/0 """ md = pds.extract_md(ptype.DatasetMetadata(), input_dir) expected = ptype.DatasetMetadata( platform=ptype.PlatformMetadata(code='AQUA'), instrument=ptype.InstrumentMetadata(name='MODIS'), format_=ptype.FormatMetadata(name='PDS'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2014, 8, 7, 3, 16, 28, 750910), los=datetime.datetime(2014, 8, 7, 3, 21, 28, 604695) ), image=ptype.ImageMetadata( day_percentage_estimate=100.0 ) ) md.id_, expected.id_ = None, None assert_same(expected, md)
def borrow_single_sourced_fields(dataset, source_dataset): """ Copy common metadata fields from a source dataset. The fields copied assume a non-composite dataset with only one source. :type dataset: ptype.DatasetMetadata :type source_dataset: ptype.DatasetMetadata :rtype: ptype.DatasetMetadata """ if not dataset.image: dataset.image = ptype.ImageMetadata(bands={}) if not dataset.extent: dataset.extent = ptype.ExtentMetadata() dataset.extent.steal_fields_from(source_dataset.extent) dataset.platform = source_dataset.platform dataset.instrument = source_dataset.instrument if not dataset.acquisition: dataset.acquisition = ptype.AcquisitionMetadata() dataset.acquisition.steal_fields_from(source_dataset.acquisition) if not dataset.image.satellite_ref_point_start: dataset.image.satellite_ref_point_start = source_dataset.image.satellite_ref_point_start dataset.image.satellite_ref_point_end = source_dataset.image.satellite_ref_point_end return dataset
def _build_ls8_raw(): _reset_runtime_id() raw = ptype.DatasetMetadata( id_=uuid.UUID('1c76a8ca-51ae-11e4-8644-0050568d59ac'), creation_dt=dateutil.parser.parse("2014-10-12 04:18:01"), size_bytes=5680940 * 1024, ga_label= 'MD_P00_LC81010700832014285LGN00_101_070-083_20141012T032336Z20141012T032910_1', product_type='satellite_telemetry_data', usgs=ptype.UsgsMetadata(interval_id='LC81010782014285LGN00'), platform=ptype.PlatformMetadata(code='LANDSAT-8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS'), format_=ptype.FormatMetadata(name='MD'), acquisition=ptype.AcquisitionMetadata( aos=dateutil.parser.parse('2014-10-12T00:52:52'), los=dateutil.parser.parse('2014-10-12T00:58:37'), groundstation=ptype.GroundstationMetadata( code='ASA', antenna_coord=ptype.Coord(lat=-23.759, lon=133.8824, height=579.312)), heading='D', platform_orbit=8846), extent=None, grid_spatial=None, browse=None, image=ptype.ImageMetadata(satellite_ref_point_start=ptype.Point( 101, 70), satellite_ref_point_end=ptype.Point(101, 83)), lineage=ptype.LineageMetadata(machine=ptype.MachineMetadata())) return raw
def _extract_mdf_id_fields(base_md, mdf_usgs_id): """ V I N ppp RRR rrr YYYY ddd GSI vv :type base_md: ptype.DatasetMetadata :type mdf_usgs_id: str :rtype: ptype.DatasetMetadata """ m = re.search((r"(?P<vehicle>L)" r"(?P<instrument>[OTC])" r"(?P<vehicle_number>\d)" r"(?P<path>\d{3})" r"(?P<row_start>\d{3})" r"(?P<row_end>\d{3})" r"(?P<acq_date>\d{7})" r"(?P<gsi>\w{3})" r"(?P<version>\d{2})"), mdf_usgs_id) fields = m.groupdict() if not base_md.usgs: base_md.usgs = ptype.UsgsMetadata() base_md.usgs.interval_id = mdf_usgs_id if not base_md.platform: base_md.platform = ptype.PlatformMetadata() base_md.platform.code = "LANDSAT_" + fields["vehicle_number"] if not base_md.instrument: base_md.instrument = ptype.InstrumentMetadata() base_md.instrument.name = LS8_SENSORS[fields["instrument"]] path = int(fields["path"]) if not base_md.image: base_md.image = ptype.ImageMetadata() base_md.image.satellite_ref_point_start = ptype.Point( path, int(fields["row_start"])) base_md.image.satellite_ref_point_end = ptype.Point( path, int(fields["row_end"])) # base_md.version = int(fields["version"]) or base_md.version # Probably less than useful without time. # if not base_md.extent: # base_md.extent = ptype.ExtentMetadata() # # base_md.extent.center_dt = base_md.extent.center_dt or datetime.strptime(fields["acq_date"], "%Y%j").date() if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() base_md.acquisition.groundstation = ptype.GroundstationMetadata( code=fields["gsi"]) return base_md
def _extract_rcc_filename_fields(base_md, filename): """ Landsat 5 and 7 RCC format specifications: http://landsat.usgs.gov/documents/LS_DFCB_01.pdf http://landsat.usgs.gov/documents/LS_DFCB_06.pdf :type base_md: ptype.DatasetMetadata :type filename: str :rtype: ptype.DatasetMetadata """ m = re.search( r'(?P<satsens>\w{4})(?P<date>\d{13})(?P<gsi>[^\d]+).*?(?P<version>\d\d)?\.data', filename) fields = m.groupdict() if not base_md.platform or not base_md.platform.code: # TODO: Do we have a cleaner way to do this? A list of mappings? satsens_ = fields['satsens'] vehicle = satsens_[0] vehicle_num = satsens_[1] instrument_short = satsens_[2] smode_short = satsens_[3] platform_code, instrument_name, operation_mode = _expand_platform_info( vehicle, vehicle_num, instrument_short, smode_short) if not base_md.platform: base_md.platform = ptype.PlatformMetadata() if not base_md.instrument: base_md.instrument = ptype.InstrumentMetadata() base_md.platform.code = platform_code base_md.instrument.name = instrument_name base_md.instrument.operation_mode = operation_mode if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() if not base_md.acquisition.aos: base_md.acquisition.aos = datetime.datetime.strptime( fields['date'], "%Y%j%H%M%S") base_md.gsi = fields['gsi'] if not base_md.acquisition.groundstation: base_md.acquisition.groundstation = ptype.GroundstationMetadata( code=fields['gsi']) if not base_md.usgs: base_md.usgs = ptype.UsgsMetadata() base_md.usgs.interval_id = _usgs_id_from_filename(filename) # RCC is raw: P00 base_md.ga_level = 'P00' version = int(fields['version']) if fields.get('version') else None base_md.format_ = ptype.FormatMetadata(name='RCC', version=version) return base_md
def _extract_mdf_file_fields(base_md, mdf_file_names): """ From http://landsat.usgs.gov/documents/LDCM-DFCB-001.pdf... RRR.ZZZ.YYYYDOYHHMMSS.sss.XXX where RRR = the root file directory number on the SSR the data was stored (001-511) ZZZ = the sequence (or sub-file) of the file within the root file (000-127) YYYY = the year the data was received (2012-2999) DOY = the day of the year the data was received (001-366) HH = the hour of the day the data was received (00-23) MM = the minute of the hour the data was received (00-59) SS = the second of the minute the data was received (00-60) sss = the fraction of the second the data was received (000-999) XXX = the ground station identifier (e.g. ASA) for example 383.000.2013137232105971.ASA :type base_md: ptype.DatasetMetadata :type mdf_file_names: list of str :rtype: ptype.DatasetMetadata """ times = [] for f_name in mdf_file_names: m = re.search((r"(?P<root_file_number>\d{3})" r"\." r"(?P<root_file_sequence>\d{3})" r"\." r"(?P<date_time>\d{16})" r"\." r"(?P<gsi>\w{3})"), f_name) fields = m.groupdict() t = datetime.datetime.strptime(fields["date_time"], "%Y%j%H%M%S%f") times.append(t) # TODO: This calculation comes from the old jobmanger code. Is it desirable? start = min(times) - datetime.timedelta( seconds=60 ) # 60 seconds before first segment's acquisition complete time stop = max(times) # the last segment's acquisition complete time if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() base_md.acquisition.aos = base_md.acquisition.aos or start base_md.acquisition.los = base_md.acquisition.los or stop return base_md
def test_eods_fill_metadata(self): dataset_folder = "LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012" bandname = '10' bandfile = dataset_folder + '_B' + bandname + '.tif' input_folder = write_files({ dataset_folder: { 'metadata.xml': """<EODS_DATASET> <ACQUISITIONINFORMATION> <EVENT> <AOS>20141012T03:23:36</AOS> <LOS>20141012T03:29:10</LOS> </EVENT> </ACQUISITIONINFORMATION> <EXEXTENT> <TEMPORALEXTENTFROM>20141012 00:55:54</TEMPORALEXTENTFROM> <TEMPORALEXTENTTO>20141012 00:56:18</TEMPORALEXTENTTO> </EXEXTENT> </EODS_DATASET>""", 'scene01': { bandfile: '' } } }) expected = ptype.DatasetMetadata( id_=_EXPECTED_NBAR.id_, ga_label=dataset_folder, ga_level='P54', product_type='EODS_NBAR', platform=ptype.PlatformMetadata(code='LANDSAT_8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS'), format_=ptype.FormatMetadata(name='GeoTiff'), acquisition=ptype.AcquisitionMetadata(aos=datetime.datetime(2014, 10, 12, 3, 23, 36), los=datetime.datetime(2014, 10, 12, 3, 29, 10), groundstation=ptype.GroundstationMetadata(code='LGS')), extent=ptype.ExtentMetadata( center_dt=datetime.datetime(2014, 10, 12, 0, 56, 6), from_dt=datetime.datetime(2014, 10, 12, 0, 55, 54), to_dt=datetime.datetime(2014, 10, 12, 0, 56, 18) ), image=ptype.ImageMetadata(satellite_ref_point_start=ptype.Point(x=101, y=78), satellite_ref_point_end=ptype.Point(x=101, y=78), bands={bandname: ptype.BandMetadata(number=bandname, path=Path(input_folder, dataset_folder, 'scene01', bandfile))}) ) dataset = ptype.DatasetMetadata( id_=_EXPECTED_NBAR.id_ ) received = drivers.EODSDriver().fill_metadata(dataset, input_folder.joinpath(dataset_folder)) self.assert_same(expected, received)
def _populate_from_mtl_dict(md, mtl_, folder): """ :param mtl_: Parsed mtl file :param folder: Folder containing imagery (and mtl). For fixing relative paths in the MTL. :type md: eodatasets.type.DatasetMetadata :type mtl_: dict of (str, obj) :rtype: eodatasets.type.DatasetMetadata """ if not md.usgs: md.usgs = ptype.UsgsMetadata() md.usgs.scene_id = _get(mtl_, 'METADATA_FILE_INFO', 'landsat_scene_id') md.creation_dt = _get(mtl_, 'METADATA_FILE_INFO', 'file_date') # TODO: elsewhere we've used 'GAORTHO01' etc. Here it's 'L1T' etc. md.product_level = _translate_to_old_usgs_code( _get(mtl_, 'PRODUCT_METADATA', 'data_type')) # md.size_bytes=None, satellite_id = _get(mtl_, 'PRODUCT_METADATA', 'spacecraft_id') if not md.platform: md.platform = ptype.PlatformMetadata() md.platform.code = satellite_id md.format_ = ptype.FormatMetadata( name=_get(mtl_, 'PRODUCT_METADATA', 'output_format')) product_md = _get(mtl_, 'PRODUCT_METADATA') sensor_id = _get(mtl_, 'PRODUCT_METADATA', 'sensor_id') if not md.instrument: md.instrument = ptype.InstrumentMetadata() md.instrument.name = sensor_id # md.instrument.type_ md.instrument.operation_mode = _get(product_md, 'sensor_mode') if not md.acquisition: md.acquisition = ptype.AcquisitionMetadata() md.acquisition.groundstation = ptype.GroundstationMetadata( code=_get(mtl_, "METADATA_FILE_INFO", "station_id")) # md.acquisition.groundstation.antenna_coord # aos, los, groundstation, heading, platform_orbit _populate_extent(md, product_md) _populate_grid_spatial(md, mtl_) _populate_image(md, mtl_) _populate_lineage(md, mtl_) return md
def test_expand_metadata(self): id_ = uuid.uuid1() create_dt = datetime.datetime.utcnow() d = ptype.DatasetMetadata( id_=id_, creation_dt=create_dt, platform=ptype.PlatformMetadata(code='LANDSAT_8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS'), image=ptype.ImageMetadata( bands={ '4': BandMetadata( path=PosixPath( '/tmp/fake-folder/LC81010782014285LGN00_B4.TIF'), number='4', ) }), acquisition=ptype.AcquisitionMetadata( groundstation=ptype.GroundstationMetadata('ALSP'))) assert_same( metadata.expand_common_metadata(d), ptype.DatasetMetadata( id_=id_, creation_dt=create_dt, platform=ptype.PlatformMetadata(code='LANDSAT_8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS'), image=ptype.ImageMetadata( bands={ '4': BandMetadata(path=PosixPath( '/tmp/fake-folder/LC81010782014285LGN00_B4.TIF'), type_='reflective', label='Visible Red', number='4', cell_size=25.0), }), acquisition=ptype. AcquisitionMetadata(groundstation=ptype.GroundstationMetadata( # Should have translated groundstation 'ALSP' to standard GSI form 'ASA' code='ASA', label='Alice Springs', eods_domain_code='002'))))
def extract_md(base_md, directory_path): """ Extract metadata from a directory of PDF files :type base_md: ptype.DatasetMetadata :type directory_path: pathlib.Path :rtype: ptype.DatasetMetadata """ pds_file = find_pds_file(directory_path) if not pds_file: _LOG.debug('No PDS files found') return base_md # Extract PDS info. _LOG.info('Using PDS file %r', pds_file) base_md.format_ = ptype.FormatMetadata(name='PDS') base_md.platform = ptype.PlatformMetadata( code=_pds_satellite(pds_file.stem)) base_md.instrument = ptype.InstrumentMetadata(name='MODIS') if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() base_md.acquisition.aos = _pds_date(pds_file.stem) start, end, day, night = get_pdsinfo(pds_file) base_md.acquisition.aos = start base_md.acquisition.los = end if not base_md.image: base_md.image = ptype.ImageMetadata() base_md.image.day_percentage_estimate = (float(day) / (day + night)) * 100.0 return base_md
def _parse_passinfo_md(base_md, lines): """ :type base_md: ptype.DatasetMetadata :type lines: list of str :rtype: ptype.DatasetMetadata """ fields = {} for l in lines: tmp = l.split() if len(tmp) >= 2: k, v = tmp[:2] fields[k.upper()] = v.upper() if not base_md.platform: base_md.platform = ptype.PlatformMetadata() base_md.platform.code = standardise_satellite(fields.get('SATELLITE')) if not base_md.instrument: base_md.instrument = ptype.InstrumentMetadata() base_md.instrument.name = fields.get('SENSOR') if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() base_md.acquisition.platform_orbit = int(fields.get('ORBIT')) start_dt = _parse_common_date(fields.get('START')) if start_dt: base_md.acquisition.aos = start_dt stop_dt = _parse_common_date(fields.get('STOP')) if stop_dt: base_md.acquisition.los = stop_dt if not base_md.acquisition.groundstation: gsi = station_to_gsi(fields.get('STATION')) if gsi is not None: base_md.acquisition.groundstation = ptype.GroundstationMetadata( code=gsi) return base_md
def extract_md(base_md, directory_path): """ Extract metadata from an NPP HDF5 filename if one exists. The NPP directory should contain VIRS ".h5" (HDF5) data file from which we can get the date The filename will be of the form: NPP: RNSCA-RVIRS_npp_d20130422_t0357358_e0410333_b07686_c20130422041225898000_nfts_drl.h5 where: d: start date (YYYMMDD) t: start time (hhmmss.s) e: b: orbit number c: stop date/time (YYYMMDDhhmmss.ssssss) :type base_md: ptype.DatasetMetadata :type directory_path: pathlib.Path :rtype: ptype.DatasetMetadata """ files = find_hdf5_files(directory_path) if len(files) < 1: _LOG.debug("No NPP HDF5 file found") return base_md filename = files[0].name base_md = _extract_hdf5_filename_fields(base_md, filename) if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() # HDF5 is raw: P00 base_md.ga_level = 'P00' base_md.format_ = ptype.FormatMetadata(name='HDF5') return base_md
def _extract_hdf5_filename_fields(base_md, filename): """ NPP VIRS format specifications: ??? :type base_md: ptype.DatasetMetadata :type filename: str :rtype: ptype.DatasetMetadata """ m = re.search(r'(?P<satsens>.{15})' r'_d(?P<date>\d{8})' r'_t(?P<startTime>\d{7})' r'_e(?P<endTime>\d{7})' r'_b(?P<orbit>\d{5})' r'_c(?P<enddatetime>\d{20})' r'_nfts_drl.h5', filename) fields = m.groupdict() satellite, sensor = _split_sat_sen(fields['satsens']) if satellite: base_md.platform = ptype.PlatformMetadata(code=satellite) if sensor: base_md.instrument = ptype.InstrumentMetadata(name=sensor) if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() start_time = fields['date'] + fields['startTime'] base_md.acquisition.aos = datetime.datetime.strptime(start_time[:-1], "%Y%m%d%H%M%S") base_md.acquisition.los = datetime.datetime.strptime(fields['enddatetime'][:14], "%Y%m%d%H%M%S") if int(fields['orbit']) > 0: base_md.acquisition.platform_orbit = int(fields['orbit']) return base_md
def extract_md(md, directory): """ Extract metadata from typical ads3 directory names. Folder names contain orbit numbers. Eg: LANDSAT-7.76773.S3A1C2D2R2 AQUA.60724.S1A1C2D2R2 TERRA.73100.S1A2C2D4R4 LANDSAT-8.3108 NPP.VIIRS.10014.ALICE :type md: ptype.DatasetMetadata :type directory: pathlib.Path :rtype: ptype.DatasetMetadata """ directory = directory.absolute() parent_dir = directory.parent orbit = _extract_orbit(directory.name) or _extract_orbit(parent_dir.name) rms_string = _extract_rms_string(directory.name) or _extract_rms_string(parent_dir.name) gsi = _extract_gsi(directory.name) or _extract_gsi(parent_dir.name) if rms_string: md.rms_string = rms_string if not md.acquisition: md.acquisition = ptype.AcquisitionMetadata() if not md.acquisition.platform_orbit: md.acquisition.platform_orbit = orbit if not md.acquisition.groundstation and gsi: md.acquisition.groundstation = ptype.GroundstationMetadata(code=gsi) return md
def test_parse_variations(self): new_examples = { 'L7EB2007303000923ASA222Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7EB2007303000923ASA222'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2007, 10, 30, 0, 9, 23), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7EB2015118010116ASA213Q00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7EB2015118010116ASA213'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2015, 4, 28, 1, 1, 16), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7EB2011239021036ASA111Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7EB2011239021036ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2011, 8, 27, 2, 10, 36), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TB2005120001242ASA111I.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TB2005120001242ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2005, 4, 30, 0, 12, 42), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TT1995117002206ASA111I00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TT1995117002206ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(1995, 4, 27, 0, 22, 6), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TT1990118013106ASA111I00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TT1990118013106ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(1990, 4, 28, 1, 31, 6), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7ET2005302020634ASA123Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7ET2005302020634ASA123'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2005, 10, 29, 2, 6, 34), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TB2011299000126ASA111I00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TB2011299000126ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2011, 10, 26, 0, 1, 26), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TB2010119010045ASA214I.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TB2010119010045ASA214'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2010, 4, 29, 1, 0, 45), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7ET2000296234136ASA111Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7ET2000296234136ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2000, 10, 22, 23, 41, 36), groundstation=ptype.GroundstationMetadata(code='ASA'))), } for file_name, expected_output in new_examples.items(): output = rccfile._extract_rcc_filename_fields( ptype.DatasetMetadata(), file_name) # The ids will be different — clear them before comparison. output.id_ = None expected_output.id_ = None self.assert_same(expected_output, output)
id_=uuid.UUID('3ff71eb0-d5c5-11e4-aebb-1040f381a756'), product_level='L1G', creation_dt=datetime.datetime(2015, 4, 7, 1, 58, 25), platform=ptype.PlatformMetadata( code='LANDSAT_7' ), instrument=ptype.InstrumentMetadata( name='ETM', operation_mode='SAM' ), format_=ptype.FormatMetadata( name='GeoTIFF' ), acquisition=ptype.AcquisitionMetadata( groundstation=ptype.GroundstationMetadata( code='ASA' ) ), usgs=ptype.UsgsMetadata( scene_id='LE71140732005007ASA00' ), extent=ptype.ExtentMetadata( coord=ptype.CoordPolygon( ul=ptype.Coord( lat=-17.82157, lon=115.58472 ), ur=ptype.Coord( lat=-17.82497, lon=117.82111 ),
from tests import write_files, TestCase from tests.metadata.mtl import test_ls8, test_ls7_definitive, test_ls5_definitive _LS5_RAW = ptype.DatasetMetadata( id_=UUID('c86809b3-e894-11e4-8958-1040f381a756'), ga_level='P00', product_type='satellite_telemetry_data', creation_dt=datetime.datetime(2015, 4, 22, 0, 7, 48), size_bytes=5871413760, checksum_path=Path('package.sha1'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2005, 6, 1, 1, 51, 10), los=datetime.datetime(2005, 6, 1, 2, 0, 25), groundstation=ptype.GroundstationMetadata(code='ASA'), platform_orbit=113025 ), usgs=ptype.UsgsMetadata( interval_id='L5TB2005152015110ASA111' ), lineage=ptype.LineageMetadata( machine=ptype.MachineMetadata( hostname='niggle.local', runtime_id=UUID('b2af5545-e894-11e4-b3b0-1040f381a756'), type_id='jobmanager', version='2.4.0', uname='Darwin niggle.local 14.3.0 Darwin Kernel Version 14.3.0: Mon Mar 23 11:59:05 PDT 2015; ' 'root:xnu-2782.20.48~5/RELEASE_X86_64 x86_64' ), source_datasets={}
def fill_metadata(self, dataset, path, additional_files=()): """ :type additional_files: tuple[Path] :type dataset: ptype.DatasetMetadata :type path: Path :rtype: ptype.DatasetMetadata """ fields = re.match((r"(?P<vehicle>LS[578])" r"_(?P<instrument>OLI_TIRS|OLI|TIRS|TM|ETM)" r"_(?P<type>NBAR|PQ|FC)" r"_(?P<level>[^-_]*)" r"_(?P<product>[^-_]*)" r"-(?P<groundstation>[0-9]{3})" r"_(?P<path>[0-9]{3})" r"_(?P<row>[0-9]{3})" r"_(?P<date>[12][0-9]{7})" r"(_(?P<version>[0-9]+))?" "$"), path.stem).groupdict() dataset.product_type = "EODS_" + fields["type"] dataset.ga_level = fields["level"] dataset.ga_label = path.stem dataset.format_ = ptype.FormatMetadata(name='GeoTiff') if not dataset.platform: dataset.platform = ptype.PlatformMetadata() dataset.platform.code = "LANDSAT_" + fields["vehicle"][2] if not dataset.instrument: dataset.instrument = ptype.InstrumentMetadata() dataset.instrument.name = fields["instrument"] if not dataset.image: dataset.image = ptype.ImageMetadata(bands={}) dataset.image.satellite_ref_point_start = ptype.Point( int(fields["path"]), int(fields["row"])) dataset.image.satellite_ref_point_end = ptype.Point( int(fields["path"]), int(fields["row"])) for image_path in path.joinpath("scene01").iterdir(): band = self.to_band(dataset, image_path) if band: dataset.image.bands[band.number] = band md_image.populate_from_image_metadata(dataset) if not dataset.acquisition: dataset.acquisition = ptype.AcquisitionMetadata() for _station in _GROUNDSTATION_LIST: if _station["eods_domain_code"] == fields["groundstation"]: dataset.acquisition.groundstation = ptype.GroundstationMetadata( code=_station["code"]) break if not dataset.extent: dataset.extent = ptype.ExtentMetadata() def els2date(els): if not els: return None return parse(els[0].text) doc = ElementTree.parse(str(path.joinpath('metadata.xml'))) start_time = els2date(doc.findall("./EXEXTENT/TEMPORALEXTENTFROM")) end_time = els2date(doc.findall("./EXEXTENT/TEMPORALEXTENTTO")) # check if the dates in the metadata file are at least as accurate as what we have filename_time = datetime.datetime.strptime(fields["date"], "%Y%m%d") time_diff = start_time - filename_time # Is the EODS metadata extremely off? if abs(time_diff).days != 0: raise ValueError( 'EODS time information differs too much from source files: %s' % time_diff) dataset.acquisition.aos = els2date( doc.findall("./ACQUISITIONINFORMATION/EVENT/AOS")) dataset.acquisition.los = els2date( doc.findall("./ACQUISITIONINFORMATION/EVENT/LOS")) dataset.extent.center_dt = start_time + (end_time - start_time) / 2 dataset.extent.from_dt = start_time dataset.extent.to_dt = end_time return dataset