def test_esm_flatfile(self): input_file = os.path.join(os.path.dirname(self.input_file), 'esm_sa_flatfile_2018.csv') log = EsmParser.parse(input_file, output_path=self.output_file) self.assertEqual(log['total'], 98) self.assertEqual(log['written'], 98) missingvals = log['missing_values'] self.assertTrue(missingvals['rjb'] == missingvals['rrup'] == missingvals['rupture_length'] == missingvals['ry0'] == missingvals['rx'] == missingvals['rupture_width'] == 97) self.assertTrue(missingvals['strike_1'] == missingvals['dip_1'] == missingvals['rake_1'] == missingvals['duration_5_75'] == 98) self.assertTrue(all(_ not in missingvals for _ in ('pga', 'pgv', 'sa', 'duration_5_95'))) self.assertTrue(all(_ + '_components' not in missingvals for _ in ('pga', 'pgv', 'sa', 'duration_5_95'))) self.assertEqual(missingvals['duration_5_75'], 98) self.assertTrue(missingvals['magnitude'] == missingvals['magnitude_type'] == 13) gmdb = GroundMotionTable(self.output_file, 'esm_sa_flatfile_2018') with self.assertRaises(ValueError): # trying to filter inside a with statement with gmdb: gmdb.filter('magnitude <= 4') gmdb2 = gmdb.filter('magnitude <= 4') # underlying HDF5 file not open (ValueError): with self.assertRaises(ValueError): for rec in gmdb2.records: pass # check that we correctly wrote default attrs: with gmdb2: tbl = gmdb2.table.attrs self.assertTrue(isinstance(tbl.parser_stats, dict)) self.assertEqual(tbl.filename, 'template_basic_flatfile.hd5') self.assertEqual(len(gmdb2.attrnames()), 6) # now it works: with gmdb2: mag_le_4 = 0 for rec in gmdb2.records: self.assertTrue(rec['magnitude'] <= 4) mag_le_4 += 1 gmdb2 = gmdb.filter('magnitude > 4') with gmdb2: mag_gt_4 = 0 for rec in gmdb2.records: self.assertTrue(rec['magnitude'] > 4) mag_gt_4 += 1 self.assertTrue(mag_le_4 + mag_gt_4 == 98 - 13)
def testing(params): '''Core method to compute testing data :param params: dict with the request parameters :return: json serializable dict to be passed into a Response object ''' GMDB = 'gmdb' # pylint: disable=invalid-name GSIM = 'gsim' # pylint: disable=invalid-name IMT = 'imt' # pylint: disable=invalid-name FIT_M = 'fit_measure' # pylint: disable=invalid-name CONFIG = 'config' # pylint: disable=invalid-name SEL = 'selexpr' # pylint: disable=invalid-name # params[GMDB] is the tuple (hdf file name, table name): gmdb_base = GroundMotionTable(*params[GMDB], mode='r') ret = {} obs_count = defaultdict(int) gsim_skipped = {} config = params.get(CONFIG, {}) # columns: "Measure of fit" "imt" "gsim" "value(s)" for gsim in params[GSIM]: try: residuals = Residuals([gsim], params[IMT]) selexpr = _get_selexpr(gsim, params.get(SEL, '')) # we have some record to be used, compute residuals: gmdb = gmdb_base.filter(selexpr) numrecords = _gmdb_records(residuals, gmdb) obs_count[gsim] = numrecords if not numrecords: gsim_skipped[gsim] = 'No matching db record found' continue gsim_values = [] for key, name, func in params[FIT_M]: result = func(residuals, config) gsim_values.extend(_itervalues(gsim, key, name, result)) for moffit, imt, value in gsim_values: # note: value isa Numpy scalar, but not ALL numpy scalar # are json serializable: only those that are equal to Python's ret.setdefault(moffit, {}).\ setdefault(imt, {})[gsim] = value.item() except Exception as exc: # pylint: disable=broad-except gsim_skipped[gsim] = str(exc) return { 'Measure of fit': ret, 'Db records': obs_count, 'Gsim skipped': gsim_skipped }
def check_gsim_defined_for_current_db(testdata): '''no test function, it is used to inspect in debug mode in order to get gsims with records in the current gmdb used for tests''' for gsim in OQ.gsims(): try: residuals = Residuals([gsim], ['PGA', 'PGV', 'SA(0.1)']) gmdbpath = testdata.path('esm_sa_flatfile_2018.csv.hd5') gm_table = GroundMotionTable(gmdbpath, 'esm_sa_flatfile_2018', mode='r') selexpr = get_selexpr(gsim) num = gmdb_records(residuals, gm_table.filter(selexpr)) except: pass
def get_residuals(params): '''Core method to compute residuals plots data :param params: dict with the request parameters :return: json serializable dict to be passed into a Response object ''' # params: GMDB = 'gmdb' # pylint: disable=invalid-name GSIM = 'gsim' # pylint: disable=invalid-name IMT = 'imt' # pylint: disable=invalid-name PLOTTYPE = 'plot_type' # pylint: disable=invalid-name SEL = 'selexpr' # pylint: disable=invalid-name func, kwargs = params[PLOTTYPE] residuals = Residuals(params[GSIM], params[IMT]) # Compute residuals. # params[GMDB] is the tuple (hdf file name, table name): gmdb = GroundMotionTable(*params[GMDB], mode='r') if params.get(SEL): gmdb = gmdb.filter(params[SEL]) residuals.get_residuals(gmdb) # statistics = residuals.get_residual_statistics() ret = defaultdict(lambda: defaultdict(lambda: {})) # extend keyword arguments: kwargs = dict(kwargs, residuals=residuals, as_json=True) # linestep = binwidth/10 for gsim in residuals.residuals: for imt in residuals.residuals[gsim]: kwargs['gmpe'] = gsim kwargs['imt'] = imt imt2 = _relabel_sa(imt) res_plots = func(**kwargs) for res_type, res_plot in res_plots.items(): for stat in RESIDUALS_STATS: res_plot.setdefault(stat, None) if imt2 != imt: res_plot['xlabel'] = _relabel_sa(res_plot['xlabel']) res_plot['ylabel'] = _relabel_sa(res_plot['ylabel']) # make also x and y keys consistent with trellis response: res_plot['xvalues'] = res_plot.pop('x') res_plot['yvalues'] = res_plot.pop('y') ret[imt2][res_type][gsim] = res_plot return ret
def setUpClass(cls): """ Setup constructs the database from the ESM test data """ ifile = os.path.join(BASE_DATA_PATH, "residual_tests_esm_data.csv") cls.out_location = os.path.join(BASE_DATA_PATH, "residual_tests") if os.path.exists(cls.out_location): shutil.rmtree(cls.out_location) parser = ESMFlatfileParser.autobuild("000", "ESM ALL", cls.out_location, ifile) del parser cls.database_file = os.path.join(cls.out_location, "metadatafile.pkl") cls.database = None with open(cls.database_file, "rb") as f: cls.database = pickle.load(f) cls.gsims = ["AkkarEtAlRjb2014", "ChiouYoungs2014"] cls.imts = ["PGA", "SA(1.0)"] # create the sm table: cls.out_location2 = cls.out_location + '_table' EsmParser.parse(ifile, cls.out_location2, delimiter=';') cls.dbtable = \ GroundMotionTable(cls.out_location2, os.path.splitext(os.path.basename(ifile))[0])
def test_esm_flatfile(self): input_file = os.path.join(os.path.dirname(self.input_file), 'esm_sa_flatfile_2018.csv') log = EsmParser.parse(input_file, output_path=self.output_file) self.assertEqual(log['total'], 98) self.assertEqual(log['written'], 98) missingvals = log['missing_values'] self.assertTrue( missingvals['rjb'] == missingvals['rrup'] == missingvals['rupture_length'] == missingvals['ry0'] == missingvals['rx'] == missingvals['rupture_width'] == 97) self.assertTrue( missingvals['strike_1'] == missingvals['dip_1'] == missingvals['rake_1'] == missingvals['duration_5_75'] == 98) self.assertTrue( all(_ not in missingvals for _ in ('pga', 'pgv', 'sa', 'duration_5_95'))) self.assertTrue( all(_ + '_components' not in missingvals for _ in ('pga', 'pgv', 'sa', 'duration_5_95'))) self.assertEqual(missingvals['duration_5_75'], 98) self.assertTrue( missingvals['magnitude'] == missingvals['magnitude_type'] == 13) gmdb = GroundMotionTable(self.output_file, 'esm_sa_flatfile_2018') gmdb2 = gmdb.filter('magnitude <= 4') # check that we correctly wrote default attrs: with gmdb2.table as tbl: self.assertTrue(isinstance(tbl.attrs.parser_stats, dict)) self.assertEqual(tbl.attrs.flatfilename, 'template_basic_flatfile.hd5') # self.assertEqual(len(gmdb2.attrnames()), 6) mag_le_4 = 0 for rec in gmdb2.records: self.assertTrue(rec['magnitude'] <= 4) mag_le_4 += 1 gmdb2 = gmdb.filter('magnitude > 4') mag_gt_4 = 0 for rec in gmdb2.records: self.assertTrue(rec['magnitude'] > 4) mag_gt_4 += 1 self.assertTrue(mag_le_4 + mag_gt_4 == 98 - 13)
def records_iter(params): '''Computes the selection from the given already validated params and returns a filtered GroundMotionDatabase object''' # params: GMDB = 'gmdb' # pylint: disable=invalid-name SEL = 'selexpr' # pylint: disable=invalid-name # params[GMDB] is the tuple (hdf file name, table name): with GroundMotionTable(*params[GMDB], mode='r') as gmdb: for rec in records_where(gmdb.table, params.get(SEL)): yield rec
def test_reading_concurrentcy(self): '''Tests that it is ok to open an HDF table twice (NOTE: this is currently NOT YET SUPPORTED) ''' return # the test file has a comma delimiter. Test that we raise with # the default semicolon: # now should be ok: log = UserDefinedParser.parse(self.input_file, output_path=self.output_file, delimiter=',') dbname = os.path.splitext(os.path.basename(self.output_file))[0] rec1 = [] rec2 = [] gmdb = GroundMotionTable(self.output_file, dbname) for r in gmdb.records: rec1.append(r['record_id']) if not rec2: for r2 in gmdb.records: rec2.append(r2['record_id']) self.assertEqual(rec1, rec2)
def test_template_basic_file_selection(self): '''parses a sample flatfile and tests some selection syntax on it''' # the test file has a comma delimiter. Test that we raise with # the default semicolon: with self.assertRaises(ValueError): log = UserDefinedParser.parse(self.input_file, output_path=self.output_file) # now should be ok: log = UserDefinedParser.parse(self.input_file, output_path=self.output_file, delimiter=',') dbname = os.path.splitext(os.path.basename(self.output_file))[0] with GroundMotionTable(self.output_file, dbname) as gmdb: table = gmdb.table total = table.nrows selection = 'pga <= %s' % 100.75 ids = [r['record_id'] for r in records_where(table, selection)] ids_len = len(ids) # test that read where gets the same number of records: ids = [r['record_id'] for r in read_where(table, selection)] self.assertEqual(len(ids), ids_len) # test with limit given: ids = [r['record_id'] for r in records_where(table, selection, ids_len-1)] self.assertEqual(len(ids), ids_len-1) # test by negating the selection condition and expect the remaining # records to be found: ids = [r['record_id'] for r in records_where(table, "~(%s)" % selection)] self.assertEqual(len(ids), total - ids_len) # same should happend for read_where: ids = [r['record_id'] for r in read_where(table, "~(%s)" % selection)] self.assertEqual(len(ids), total - ids_len) # test with limit 0 (expected: no record yielded): ids = [r['record_id'] for r in records_where(table, "~(%s)" % selection, 0)] self.assertEqual(len(ids), 0) # restrict the search: # note that we must pass strings to event_time, # either 1935-01-01, 1935-01-01T00:00:00, or simply the year: selection2 = "(%s) & (%s)" % \ (selection, '(event_time >= "1935") & ' '(event_time < \'1936-01-01\')') ids = [r['record_id'] for r in records_where(table, selection2)] ids_len2 = len(ids) # test that the search was restricted: self.assertTrue(ids_len2 < ids_len) # now negate the serarch on event_time and test that we get all # remaining records: selection2 = "(%s) & ~(%s)" % \ (selection, '(event_time >= "1935") & ' '(event_time < "1936-01-01")') ids = [r['record_id'] for r in records_where(table, selection2)] self.assertEqual(len(ids) + ids_len2, ids_len) # test truthy condition (isaval on bool col returns True): selection = 'vs30_measured == vs30_measured' ids = read_where(table, selection) self.assertEqual(len(ids), total) # test with limit exceeding the available records (should get # all records as if limit was not given): ids = read_where(table, selection, total+1) self.assertEqual(len(ids), total) # records_where should get the same results as read_where: ids = [r['record_id'] for r in records_where(table, selection)] self.assertEqual(len(ids), total) # test falsy condition (isaval on bool col returns True): ids = read_where(table, "~(%s)" % selection) self.assertEqual(len(ids), 0) ids = read_where(table, selection, total+1) self.assertEqual(len(ids), total) ids = [r['record_id'] for r in records_where(table, "~(%s)" % selection)] self.assertEqual(len(ids), 0)
def test_template_basic_file(self): '''parses sample flatfile and perfomrs some tests''' # test a file not found with self.assertRaises(IOError): with GroundMotionTable(self.output_file + 'what', dbname='whatever', mode='r') as gmdb: pass log = UserDefinedParser.parse(self.input_file, output_path=self.output_file, delimiter=',') dbname = os.path.splitext(os.path.basename(self.output_file))[0] # the flatfile parsed has: # 1. an event latitude out of bound (row 0) # 2. an event longitude out of bound (row 1) # 3. a pga with extremely high value (row 2) # 4. a sa[0] with extremely high value (row 3) total = log['total'] written = total - 2 # row 2 and 3 not written self.assertEqual(log['total'], 99) self.assertEqual(log['written'], written) self.assertEqual(sorted(log['error']), [2, 3]) self.assertEqual(len(log['outofbound_values']), 2) # rows 0 and 1 self.assertEqual(log['outofbound_values']['event_latitude'], 1) # 0 self.assertEqual(log['outofbound_values']['event_longitude'], 1) # 1 # self.assertEqual(log['missing_values']['pga'], 0) self.assertEqual(log['missing_values']['pgv'], log['written']) self.assertEqual(log['missing_values']['pgv'], log['written']) # assert auto generated ids are not missing: self.assertFalse('record_id' in log['missing_values']) self.assertFalse('event_id' in log['missing_values']) self.assertFalse('station_id' in log['missing_values']) # PYTABLES. IMPORTANT # seems that this is NOT possible: # list(table.iterrows()) # returns N times the LAST row # seems also that we should NOT break inside a iterrows or where loop # (see here: https://github.com/PyTables/PyTables/issues/8) # open HDF5 and check for incremental ids: test_col = 'event_name' test_col_oldval, test_col_newval = None, b'dummy' test_cols_found = 0 with GroundMotionTable(self.output_file, dbname, 'a') as gmdb: tbl = gmdb.table ids = list(r['event_id'] for r in tbl.iterrows()) # assert record ids are the number of rows self.assertTrue(len(ids) == written) # assert we have some event shared across records: self.assertTrue(len(set(ids)) < written) # modify one row for row in tbl.iterrows(): if test_col_oldval is None: test_col_oldval = row[test_col] if row[test_col] == test_col_oldval: row[test_col] = test_col_newval test_cols_found += 1 row.update() tbl.flush() # all written columns have the same value of row[test_col]: self.assertTrue(test_cols_found == 1) # assert that we modified the event name with GroundMotionTable(self.output_file, dbname, 'r') as gmdb: tbl = gmdb.table count = 0 for row in tbl.where('%s == %s' % (test_col, test_col_oldval)): # we should never be here (no row with the old value): count += 1 self.assertTrue(count == 0) count = 0 for row in tbl.where('%s == %s' % (test_col, test_col_newval)): count += 1 self.assertTrue(count == test_cols_found) # now re-write, with append mode log = UserDefinedParser.parse(self.input_file, output_path=self.output_file, delimiter=',') # . NOT SUPPORTED, COMMENTED: # open HDF5 with append='a' (the default) # and check that wewrote stuff twice # with GroundMotionTable(self.output_file, dbname, 'r') as gmdb: # tbl = gmdb.table # self.assertTrue(tbl.nrows == written * 2) # # assert the old rows are there # oldrows = list(row[test_col] for row in # tbl.where('%s == %s' % (test_col, test_col_oldval))) # self.assertTrue(len(oldrows) == test_cols_found) # # assert the new rows are added: # newrows = list(row[test_col] for row in # tbl.where('%s == %s' % (test_col, test_col_newval))) # self.assertTrue(len(newrows) == test_cols_found) # now re-write, with no mode='w' log = UserDefinedParser.parse(self.input_file, output_path=self.output_file, delimiter=',') with GroundMotionTable(self.output_file, dbname, 'r') as gmdb: tbl = gmdb.table self.assertTrue(tbl.nrows == written) # assert the old rows are not there anymore oldrows = list(row[test_col] for row in tbl.where('%s == %s' % (test_col, test_col_oldval))) self.assertTrue(len(oldrows) == test_cols_found) # assert the new rows are added: newrows = list(row[test_col] for row in tbl.where('%s == %s' % (test_col, test_col_newval))) self.assertTrue(not newrows) # get db names: dbnames = get_dbnames(self.output_file) self.assertTrue(len(dbnames) == 1) name = os.path.splitext(os.path.basename(self.output_file))[0] self.assertTrue(dbnames[0] == name) # now a delete names = get_dbnames(self.output_file) assert len(names) > 0 GroundMotionTable(self.output_file, name, 'w').delete() names = get_dbnames(self.output_file) assert len(names) == 0