def get_weights(_dict, _ky, _d): #set all objects equal weight, unless defined if pval.key_not_none(_dict, _ky) is False: #print ("you have not set any weights for this test") #print ("continuing with weights=1") weights = np.ones(len(_d)) else: weights = _d[tst['weights']] return weights / np.sum(weights)
#poplate the lists of files for point predictions, and pdf predictions files = {'point': [], 'pdf': []} #load the files we will use for arg in args: # are these standard .fits and .hdf5 files? files = fileType(arg, files) #do we also have a yaml configuration file? if '.yaml' in arg: config = load_yaml(arg) if 'filePaths' in config: if pval.key_not_none(config, 'filePaths'): for i in config['filePaths']: f = glob.glob(i) for ii in f: files = fileType(ii, files) if len(files['point']) + len(files['pdf']) < 1: print "DES photoz validation code" print "usage like" print "photoz_metrics.py data/PointPredictions1.fits data/PointPredictions*.fits" print "or" print "photoz_metrics.py data/pdfPredictions*.hdf5" print "or a mix of the two" print "photoz_metrics.py data/pdfPredictions*.hdf5 data/PointPredictions*.fits" print "or you can make more fine tuned validations using a configuration yaml file"
def perform_tests_fast(d, tst): """perform_tests_fast performs a fast set of tests without boot strap resampling, or any error resampling """ #results dictionary res = {} #get all the columns we are gonna test on reqcols = tst['metrics'].keys() for i in reqcols: if i not in d.keys(): print "missing column ", i sys.exit() #which redshift do we need for this metric test? for photoz in tst['metrics']: res[photoz] = {} z_truth = np.array(d[tst['truths']]) z_pred = np.array(d[photoz]) #what is the metric test? for metric in tst['metrics'][photoz]: res[photoz][metric] = {} #convert metric name to metric function metric_function = pval.get_function(metric) #do I have to pass any additional arguments to this function? extra_params = pval.get_extra_params(tst, metric) #what weighting scheme shall we apply? for wght in tst['weights']: res[photoz][metric][wght] = {} #get the data weights weights = np.array(d[wght], dtype=float) res[photoz][metric][wght]['value'] = vlfn.process_function( metric_function, z_truth, z_pred, weights=weights, extra_params=extra_params) #shall we calculate binning statiscs? if pval.key_not_none(tst, 'bins'): binning = tst['bins'] res[photoz][metric][wght]['bins'] = {} for ky in binning: bin_vals = binning[ky] res[photoz][metric][wght]['bins'][ky] = {} res[photoz][metric][wght]['bins'][ky]['bin_center'] = [] res[photoz][metric][wght]['bins'][ky]['value'] = [] for bbn in range(len(bin_vals) - 1): ind_bn = (d[ky] <= bin_vals[bbn + 1]) * (d[ky] > bin_vals[bbn]) if np.sum(ind_bn) > 1 and np.sum(weights[ind_bn]) > 0: res[photoz][metric][wght]['bins'][ky][ 'bin_center'].append(np.mean(d[ky][ind_bn])) res[photoz][metric][wght]['bins'][ky][ 'value'].append( vlfn.process_function( metric_function, z_truth[ind_bn], z_pred[ind_bn], weights=weights[ind_bn], extra_params=extra_params)) return res
files = fileType(arg, files) if len(files['point']) + len(files['pdf']) < 1: help() #which sets of metrics + tests shall we perform testProperties = {'point': [], 'pdf': []} import string import random #nothing is specified, using the standard tests test_path = path + '/testConfig/' + SCIENCE_SAMPLE + '.yaml' p = load_yaml(test_path) for ptype in testProperties: if pval.key_not_none(p, ptype): testProperties[ptype] = p[ptype] #First point predictions ptype = 'point' #do we have any files of this type to work with? if len(files[ptype]) > 0: #results dictionary res = {'test_config': testProperties[ptype]} #obtain the tests and required cols tst = testProperties[ptype] #get all the columns we are gonna test on