def test_good_file(self): """Test that a good file passes validation""" contents = get_file_string('tests/data/UV617FEB.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertTrue(dict['status'])
def test_non_ascii_file(self): """Test that a non_ascii file passes validation""" contents = get_file_string('tests/data/test-non-ascii.TO1') reader = loads(contents) dict = reader.metadata_validator() self.assertTrue(dict['status'])
def test_bad_location(self): """Test that locations off by >= 1 degree are caught""" contents = get_file_string('tests/data/UV617FEB-bad-location.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Location Latitude of -46.038 does \ not match database. Please change it to -45.0379981995.')
def test_bad_platform_name(self): """Test that bad platform names are resolved using platform ID""" contents = get_file_string('tests/data/UV617FEB-bad-platform.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Platform name of Sapporo does not \ match database. Please change it to Lauder')
def test_bad_platform_id(self): """Test that bad platform IDs are resolved using platform name""" contents = get_file_string('tests/data/UV617FEB-bad-platform-id.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Platform ID of 024 does not match \ database. Please change it to 256')
def test_agency_name(self): """Test that agency names are resolved to acronyms""" contents = get_file_string('tests/data/UV617FEB-agency-name.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Please use the Agency \ acronym of NIWA-LAU.')
def test_bad_agency(self): """Test that platform info is used to resolve bad agencies""" contents = get_file_string('tests/data/UV617FEB-bad-agency.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertTrue('The following agencies match the given \ platform name and/or ID:' in ''.join(dict['errors']))
def test_bad_content_level(self): """Test that content level has to be 1.0 or 2.0""" contents =\ get_file_string('tests/data/UV617FEB-bad-content-level.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Level for category Spectral \ must be 1.0')
def test_trailing_commas(self): """Test that trailing commas are detected""" contents =\ get_file_string('tests/data/UV617FEB-trailing-commas.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertTrue('This file has extra trailing commas. \ Please remove them before submitting.' in ''.join(dict['errors']))
def test_bad_platform_country(self): """Test that country names are resolved to country codes""" contents =\ get_file_string('tests/data/UV617FEB-bad-platform-country.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Platform country of New Zealand \ does not match database. Please change it to NZL')
def test_no_matches(self): """Test that a bad platform name and ID produces no matches""" contents =\ get_file_string('tests/data/UV617FEB-no-platform-match.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Could not find a record for \ either the platform name or ID. If this is a new \ station, please notify WOUDC.')
def test_no_agency_matches(self): """Test that bad agency and platform information produces tentative new agency error """ contents = get_file_string('tests/data/UV617FEB-no-match.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertEqual(''.join(dict['errors']), 'Agency acronym of ZZZZZZ not \ found in the woudc database. If this is a new agency, \ please notify WOUDC')
def test_bad_instrument_model(self): """Test that unknown instrument model produces tentative new model error """ contents =\ get_file_string('tests/data/UV617FEB-bad-instrument-model.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertTrue('Instrument Model is not in database. \ Please verify that it is correct.' in ''.join(dict['errors']))
def test_different_agency(self): """Test that a valid (but wrong) agency is distinct from a bad agency """ contents =\ get_file_string('tests/data/UV617FEB-different-agency.woudc') reader = loads(contents) dict = reader.metadata_validator() self.assertTrue('Agency and Platform information do not \ match. These agencies are valid for this \ platform:' in ''.join(dict['errors']))
def get_extcsv(url): """Get an Extended CSV from WOUDC WAF.""" url = quote(url, "%/:=&?~#+!$,;'@()*[]|") try: content = urlopen(url).read() return loads(content) except (SocketError, URLError) as err: LOGGER.warn(str(err)) if err.errno in (errno.ECONNRESET, errno.ECONNREFUSED, errno.ETIMEDOUT): time.sleep(5) LOGGER.info('Retrying...') return get_extcsv(url) else: return
def qa(file_content, file_path=None, rule_path=None): """ Parse incoming file content, invoke dataset handlers, and invoke quality checker :param file_content: file as string :param file_path: path to file (optional) """ # parse incoming file content try: ecsv = loads(file_content) except Exception, err: msg = 'Unable to parse file. Due to: %s' % str(err) LOGGER.error(msg) raise err
def test_writer_reader(self): """ Produce woudc_extcsv.Writer object, use woudc_extcsv.Reader to check values """ # produce extcsv object extcsv = Writer(template=True) # add data here extcsv.add_comment('This file was generated by\ WODC_TO_CSX v1.0 using WODC 80-column formatted data.') extcsv.add_comment('\'na\' is used where Instrument\ Model or Number are not available.') extcsv.add_data('CONTENT', 'WOUDC,TotalOzone,1.0,1') extcsv.add_data('DATA_GENERATION', '2014-08-28,NOAA-CMDL,0.0') extcsv.add_data('PLATFORM', 'STN,031,MAUNA LOA,USA') extcsv.add_data('INSTRUMENT', 'Dobson,Beck,076') extcsv.add_data('LOCATION', '19.533,-155.574,3405') extcsv.add_data('TIMESTAMP', '+00:00:00,2014-04-01') extcsv.add_data('DAILY', '2014-04-01,0,2,283,,,,18', field='Date,WLCode,ObsCode,ColumnO3,StdDevO3,\ UTC_Begin,UTC_End,UTC_Mean,nObs,mMu,ColumnSO2') extcsv.add_data('DAILY', '2014-04-08,0,0,288,,,,23') extcsv.add_data('DAILY', '2014-04-09,0,0,279,,,,23') extcsv.add_data('DAILY', '2014-04-10,0,0,273,,,,24') extcsv.add_data('DAILY', '2014-04-11,0,0,274,,,,21') extcsv.add_data('DAILY', '2014-04-12,0,2,271,,,,18') extcsv.add_data('DAILY', '2014-04-13,0,2,274,,,,18') extcsv.add_data('DAILY', '2014-04-14,0,0,283,,,,23') extcsv.add_data('DAILY', '2014-04-15,0,0,285,,,,23') extcsv.add_data('DAILY', '2014-04-16,0,0,284,,,,23') extcsv.add_data('DAILY', '2014-04-17,0,0,280,,,,22') extcsv.add_data('DAILY', '2014-04-18,0,2,268,,,,18') extcsv.add_data('DAILY', '2014-04-19,0,2,271,,,,18') extcsv.add_data('DAILY', '2014-04-20,0,2,264,,,,18') extcsv.add_data('DAILY', '2014-04-21,0,0,278,,,,23') extcsv.add_data('DAILY', '2014-04-22,0,0,276,,,,21') extcsv.add_data('DAILY', '2014-04-23,0,0,280,,,,23') extcsv.add_data('DAILY', '2014-04-24,0,0,269,,,,22') extcsv.add_data('DAILY', '2014-04-25,0,0,275,,,,21') extcsv.add_data('DAILY', '2014-04-26,0,2,278,,,,18') extcsv.add_data('DAILY', '2014-04-28,0,0,296,,,,21') extcsv.add_data('DAILY', '2014-04-29,0,0,291,,,,23') extcsv.add_data('DAILY', '2014-04-30,0,0,294,,,,21', table_comment=' 1992 Coefficients in use') extcsv.add_data('TIMESTAMP', '+00:00:00,2014-04-30', field='UTCOffset,Date,Time', index=2) extcsv.add_data('MONTHLY', '2014-04-01,279,8.3,23', field='Date,ColumnO3,StdDevO3,Npts') extcsv_s = dumps(extcsv) # load my extcsv into Reader my_extcsv_to = loads(extcsv_s) # check tables self.assertTrue('DAILY' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('PLATFORM' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('LOCATION' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('TIMESTAMP' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('DATA_GENERATION' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('TIMESTAMP2' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('MONTHLY' in my_extcsv_to.sections, 'check totalozone in my extcsv') # check fields self.assertTrue('Level' in my_extcsv_to.sections['CONTENT'], 'check totalozone field in my extcsv') self.assertTrue('UTCOffset' in my_extcsv_to.sections['TIMESTAMP'], 'check totalozone field in my extcsv') self.assertTrue( 'ScientificAuthority' in my_extcsv_to.sections['DATA_GENERATION'], 'check totalozone field in my extcsv') self.assertTrue('Time' in my_extcsv_to.sections['TIMESTAMP2'], 'check totalozone field in my extcsv') self.assertTrue('ColumnO3' in my_extcsv_to.sections['MONTHLY'], 'check totalozone field in my extcsv') # check values self.assertEqual('19.533', my_extcsv_to.sections['LOCATION']['Latitude'], 'check totalozone value in my extcsv') self.assertEqual('NOAA-CMDL', my_extcsv_to.sections['DATA_GENERATION']['Agency'], 'check totalozone value in my extcsv') self.assertEqual('1', my_extcsv_to.sections['CONTENT']['Form'], 'check totalozone value in my extcsv') self.assertEqual('23', my_extcsv_to.sections['MONTHLY']['Npts'], 'check totalozone value in my extcsv') my_to_daily = StringIO(my_extcsv_to.sections['DAILY']['_raw']) my_daily_rows = csv.reader(my_to_daily) my_daily_header = my_daily_rows.next() self.assertTrue('WLCode' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertTrue('nObs' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertTrue('ColumnO3' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertTrue('ColumnSO2' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertEqual(0, my_daily_header.index('Date'), 'check totalozone daily field order in my extcsv') self.assertEqual( len(my_daily_header) - 1, my_daily_header.index('ColumnSO2'), 'check totalozone daily field order in my extcsv') my_daily_row = None # seek for i in range(1, 6): my_daily_row = my_daily_rows.next() self.assertEqual('274', my_daily_row[my_daily_header.index('ColumnO3')], 'check totalozone daily value in my extcsv') self.assertEqual('', my_daily_row[my_daily_header.index('StdDevO3')], 'check totalozone daily value in my extcsv') self.assertEqual('', my_daily_row[my_daily_header.index('UTC_Begin')], 'check totalozone daily value in my extcsv') self.assertEqual('21', my_daily_row[my_daily_header.index('UTC_Mean')], 'check totalozone daily value in my extcsv') for i in range(1, 18): my_daily_row = my_daily_rows.next() self.assertEqual('291', my_daily_row[my_daily_header.index('ColumnO3')], 'check totalozone daily value in my extcsv') self.assertEqual('23', my_daily_row[my_daily_header.index('UTC_Mean')], 'check totalozone daily value in my extcsv') with self.assertRaises(TypeError): extcsv_s = dump(extcsv)
def test_writer_reader(self): """ Produce woudc_extcsv.Writer object, use woudc_extcsv.Reader to check values """ # produce extcsv object extcsv = Writer(template=True) # add data here extcsv.add_comment('This file was generated by\ WODC_TO_CSX v1.0 using WODC 80-column formatted data.') extcsv.add_comment('\'na\' is used where Instrument\ Model or Number are not available.') extcsv.add_data('CONTENT', 'WOUDC,TotalOzone,1.0,1') extcsv.add_data('DATA_GENERATION', '2014-08-28,NOAA-CMDL,0.0') extcsv.add_data('PLATFORM', 'STN,031,MAUNA LOA,USA') extcsv.add_data('INSTRUMENT', 'Dobson,Beck,076') extcsv.add_data('LOCATION', '19.533,-155.574,3405') extcsv.add_data('TIMESTAMP', '+00:00:00,2014-04-01') extcsv.add_data('DAILY', '2014-04-01,0,2,283,,,,18', field='Date,WLCode,ObsCode,ColumnO3,StdDevO3,\ UTC_Begin,UTC_End,UTC_Mean,nObs,mMu,ColumnSO2') extcsv.add_data('DAILY', '2014-04-08,0,0,288,,,,23') extcsv.add_data('DAILY', '2014-04-09,0,0,279,,,,23') extcsv.add_data('DAILY', '2014-04-10,0,0,273,,,,24') extcsv.add_data('DAILY', '2014-04-11,0,0,274,,,,21') extcsv.add_data('DAILY', '2014-04-12,0,2,271,,,,18') extcsv.add_data('DAILY', '2014-04-13,0,2,274,,,,18') extcsv.add_data('DAILY', '2014-04-14,0,0,283,,,,23') extcsv.add_data('DAILY', '2014-04-15,0,0,285,,,,23') extcsv.add_data('DAILY', '2014-04-16,0,0,284,,,,23') extcsv.add_data('DAILY', '2014-04-17,0,0,280,,,,22') extcsv.add_data('DAILY', '2014-04-18,0,2,268,,,,18') extcsv.add_data('DAILY', '2014-04-19,0,2,271,,,,18') extcsv.add_data('DAILY', '2014-04-20,0,2,264,,,,18') extcsv.add_data('DAILY', '2014-04-21,0,0,278,,,,23') extcsv.add_data('DAILY', '2014-04-22,0,0,276,,,,21') extcsv.add_data('DAILY', '2014-04-23,0,0,280,,,,23') extcsv.add_data('DAILY', '2014-04-24,0,0,269,,,,22') extcsv.add_data('DAILY', '2014-04-25,0,0,275,,,,21') extcsv.add_data('DAILY', '2014-04-26,0,2,278,,,,18') extcsv.add_data('DAILY', '2014-04-28,0,0,296,,,,21') extcsv.add_data('DAILY', '2014-04-29,0,0,291,,,,23') extcsv.add_data('DAILY', '2014-04-30,0,0,294,,,,21', table_comment=' 1992 Coefficients in use') extcsv.add_data('TIMESTAMP', '+00:00:00,2014-04-30', field='UTCOffset,Date,Time', index=2) extcsv.add_data('MONTHLY', '2014-04-01,279,8.3,23', field='Date,ColumnO3,StdDevO3,Npts') extcsv_s = dumps(extcsv) # load my extcsv into Reader my_extcsv_to = loads(extcsv_s) # check tables self.assertTrue('DAILY' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('PLATFORM' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('LOCATION' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('TIMESTAMP' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('DATA_GENERATION' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('TIMESTAMP2' in my_extcsv_to.sections, 'check totalozone table in my extcsv') self.assertTrue('MONTHLY' in my_extcsv_to.sections, 'check totalozone in my extcsv') # check fields self.assertTrue('Level' in my_extcsv_to.sections['CONTENT'], 'check totalozone field in my extcsv') self.assertTrue('UTCOffset' in my_extcsv_to.sections['TIMESTAMP'], 'check totalozone field in my extcsv') self.assertTrue('ScientificAuthority' in my_extcsv_to.sections['DATA_GENERATION'], 'check totalozone field in my extcsv') self.assertTrue('Time' in my_extcsv_to.sections['TIMESTAMP2'], 'check totalozone field in my extcsv') self.assertTrue('ColumnO3' in my_extcsv_to.sections['MONTHLY'], 'check totalozone field in my extcsv') # check values self.assertEqual('19.533', my_extcsv_to.sections['LOCATION']['Latitude'], 'check totalozone value in my extcsv') self.assertEqual('NOAA-CMDL', my_extcsv_to.sections['DATA_GENERATION']['Agency'], 'check totalozone value in my extcsv') self.assertEqual('1', my_extcsv_to.sections['CONTENT']['Form'], 'check totalozone value in my extcsv') self.assertEqual('23', my_extcsv_to.sections['MONTHLY']['Npts'], 'check totalozone value in my extcsv') my_to_daily = StringIO(my_extcsv_to.sections['DAILY']['_raw']) my_daily_rows = csv.reader(my_to_daily) my_daily_header = my_daily_rows.next() self.assertTrue('WLCode' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertTrue('nObs' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertTrue('ColumnO3' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertTrue('ColumnSO2' in my_daily_header, 'check totalozone daily field in my extcsv') self.assertEqual(0, my_daily_header.index('Date'), 'check totalozone daily field order in my extcsv') self.assertEqual(len(my_daily_header) - 1, my_daily_header.index('ColumnSO2'), 'check totalozone daily field order in my extcsv') my_daily_row = None # seek for i in range(1, 6): my_daily_row = my_daily_rows.next() self.assertEqual('274', my_daily_row[my_daily_header.index('ColumnO3')], 'check totalozone daily value in my extcsv') self.assertEqual('', my_daily_row[my_daily_header.index('StdDevO3')], 'check totalozone daily value in my extcsv') self.assertEqual('', my_daily_row[my_daily_header.index('UTC_Begin')], 'check totalozone daily value in my extcsv') self.assertEqual('21', my_daily_row[my_daily_header.index('UTC_Mean')], 'check totalozone daily value in my extcsv') for i in range(1, 18): my_daily_row = my_daily_rows.next() self.assertEqual('291', my_daily_row[my_daily_header.index('ColumnO3')], 'check totalozone daily value in my extcsv') self.assertEqual('23', my_daily_row[my_daily_header.index('UTC_Mean')], 'check totalozone daily value in my extcsv') with self.assertRaises(TypeError): extcsv_s = dump(extcsv)
def loads(content): """stub to woudc_extcsv.loads""" return woudc_extcsv.loads(content)