def __init__(self, params, numtrajectories,levels,gridjump, response=False, iteration_cb=None): if response: print('Morris method does not support creating response surfaces') raise ValueError #compare the state of the seeds to the ones set in the control script a=pickle(random.getstate()) b=pickle(np.random.get_state()) f=open('random.getstate.2.txt','w') f.write(a) f.close() f=open('np.random.get_state.2.txt','w') f.write(b) f.close() #output of puq run is 1 elementary effect for each parameter per trajectory. #Therefore, each parameter will have *numtrajectories* elementary effects. PSweep.__init__(self, iteration_cb) self.params = params num = int(numtrajectories) self.num = num*(len(params)+1) #total number of model runs for morris self.response = response self._start_at = 0 self.levels=levels self.gridjump=gridjump self._hf=None self._salib_paramFile='==SALib_morris_params==.txt' self._salib_realizationsFile='==SALib_morris_realizations==.txt' self._salib_realizationsFile_verify='==0SALib_morris_realizations==.txt' self._salib_analysisFile='==SALib_morris_outputs==.txt' #generate morris samples N(D+1) x D numpy array. Rows are realizations, columns are params #Each column is independent in the range [0,1] #TODO: allow for correlation self._samples=SAs.morris_oat.sample(N=num,D=len(params),num_levels=levels,grid_jump=gridjump) #puq will evaluate the output by picking a sample from each parameter in the #order specified in p.values i=0 f=open(self._salib_paramFile,'w') for p in self.params: #map each column of _samples to a parameter, using the inverse cdf to transform it #into the appropriate distribution. p.values = p.pdf.ppf(self._samples[:,i]) i+=1 f.write('{}\t{}\t{}\n'.format(p.name,p.pdf.range[0],p.pdf.range[1])) f.close() #save the samples, as constructed by SALib for verification later. # --removed. save the file directly instead since it was verified # that puq evaluates the output in the order specified in p.values for each param. #np.savetxt(self._salib_realizationsFile_verify,self._samples) np.savetxt(self._salib_realizationsFile,self._samples)
def _save_hdf5(self): debug('') h5 = h5py.File(self.fname + '.hdf5') # write HDF5 header information, once only if 'version' not in h5.attrs: h5.attrs['MEMOSA_UQ'] = 'MEMOSA' h5.attrs['version'] = 201 # h5.attrs['id'] = self.id h5.attrs['date'] = time.strftime("%b %d %H:%M %Z %Y", time.localtime()) h5.attrs['hostname'] = gethostname() h5.attrs['username'] = getpass.getuser() h5.attrs['UQtype'] = self.psweep.__class__.__name__.lower() h5.attrs['description'] = self.description # overwrite previous if 'input' in h5: del h5['input'] if 'private' in h5: del h5['private'] hp = h5.require_group('private') hp['sweep'] = pickle(self) # in /input write the input params in json and regular arrays h = h5.require_group('input') # basic parameter table for non-python reading of the hdf5 file h['param_array'] = np.column_stack( [p.values for p in self.psweep.params]) h['param_array'].attrs['name'] = [ str(p.name) for p in self.psweep.params ] h['param_array'].attrs['description'] = [ str(p.description) for p in self.psweep.params ] # json-pickled parameters h = h.require_group('params') for p in self.psweep.params: h[p.name] = pickle(p) h[p.name].attrs['description'] = p.description h[p.name].attrs['label'] = p.label if hasattr(self.psweep, 'kde'): h5['input/kde'] = pickle(self.psweep.kde) # input script if hasattr(self, 'input_script'): h5['input/scriptname'] = str(self.input_script) try: h5['input/script'] = open(self.input_script).read() except: h5['input/script'] = "Source was unavailable." h5.close()
def _save_hdf5(self): debug('') h5 = h5py.File(self.fname + '.hdf5') # write HDF5 header information, once only if 'version' not in h5.attrs: h5.attrs['MEMOSA_UQ'] = b'MEMOSA' h5.attrs['version'] = 201 # h5.attrs['id'] = self.id h5.attrs['date'] = time.strftime("%b %d %H:%M %Z %Y", time.localtime()) h5.attrs['hostname'] = gethostname() h5.attrs['username'] = getpass.getuser() h5.attrs['UQtype'] = self.psweep.__class__.__name__.lower() h5.attrs['description'] = self.description # overwrite previous if 'input' in h5: del h5['input'] if 'private' in h5: del h5['private'] hp = h5.require_group('private') hp['sweep'] = pickle(self) # in /input write the input params in json and regular arrays h = h5.require_group('input') # basic parameter table for non-python reading of the hdf5 file h['param_array'] = np.column_stack([p.values for p in self.psweep.params]) if py3: h['param_array'].attrs['name'] = [bytes(p.name, 'UTF-8') for p in self.psweep.params] h['param_array'].attrs['description'] = [bytes(p.description, 'UTF-8') for p in self.psweep.params] else: h['param_array'].attrs['name'] = [str(p.name) for p in self.psweep.params] h['param_array'].attrs['description'] = [str(p.description) for p in self.psweep.params] # json-pickled parameters h = h.require_group('params') for p in self.psweep.params: h[p.name] = pickle(p) h[p.name].attrs['description'] = p.description h[p.name].attrs['label'] = p.label if hasattr(self.psweep, 'kde'): h5['input/kde'] = pickle(self.psweep.kde) # input script if hasattr(self, 'input_script'): h5['input/scriptname'] = str(self.input_script) try: h5['input/script'] = open(self.input_script).read() except: h5['input/script'] = "Source was unavailable." h5.close()
def _do_pdf(self, hf, data): mean = np.mean(data) dev = np.std(data) print "Mean = %s" % mean print "StdDev = %s" % dev if self.response: rsd = np.vstack(([p.values for p in self.params], data)) try: rs = pickle(SampledFunc(*rsd, params=self.params)) return [('response', rs), ('mean', mean), ('dev', dev)] except: pass return [('mean', mean), ('dev', dev)]
def _do_pdf(self, hf, data): if self.response: # The response surface was built using Uniform distributions. # We are interested in the mean and deviation of the data # that would have been produced using the real PDFs. For this, # we need to compute a weighted mean and deviation weights = np.prod([p.pdf.pdf(p.values) for p in self.params], 0) tweight = np.sum(weights) mean = np.average(data, weights=weights) dev = np.sqrt(np.dot(weights, (data - mean)**2) / tweight) rsd = np.vstack(([p.values for p in self.params], data)) rs = pickle(SampledFunc(*rsd, params=self.params)) print "Mean = %s" % mean print "StdDev = %s" % dev return [('response', rs), ('mean', mean), ('dev', dev)] else: pdf = ExperimentalPDF(data, fit=0) mean = np.mean(data) dev = np.std(data) print "Mean = %s" % mean print "StdDev = %s" % dev return [('pdf', pickle(pdf)), ('samples', data), ('mean', mean), ('dev', dev)]
def _do_rs(self, hf, data): rs = self._uhat(data) sens = self.sensitivity(data) return [('response', pickle(rs)), ('sensitivity', sens)]
def sensitivity(self, data): """ Elementary Effects Screening see http://en.wikipedia.org/wiki/Elementary_effects_method """ vprint(1, "\nSENSITIVITY:") ee = {} # strip out weight column and rescale to [0,1] grid = (self.grid[:, :-1] + 1) / 2 vgrid = np.column_stack((grid, data)) numcols = grid.shape[1] # Each parameter's value is in a column, so for each # column, create a new grid without that column, then look for # duplicate rows. for coln in range(0, numcols): ee[coln] = [] newgrid = grid[:, [x for x in range(0, numcols) if x != coln]] # (alternative) newgrid = np.delete(grid, coln, axis=1) rowlist = defaultdict(list) for n, row in enumerate(newgrid): rowlist[tuple(row)].append(n) for r in list(rowlist.keys()): if len(rowlist[r]) < 2: del rowlist[r] # For each list of duplicate rows, create an array with all the # parameter values and the output value. Iterate through it to for r in rowlist: rdata = None for rr in rowlist[r]: if rdata is None: rdata = vgrid[rr] else: rdata = np.vstack((rdata, vgrid[rr])) rdata = rdata[rdata[:, coln].argsort()] Y = None for d in rdata: if Y is not None: ee[coln] = np.append(ee[coln], (d[-1] - Y) / (d[coln] - X)) #print (d[-1] - Y) / (d[coln] - X) Y = d[-1] X = d[coln] max_name_len = max(map(len, [p.name for p in self.params])) sens = {} for n, p in enumerate(self.params): std = np.std(ee[n]) ustar = np.mean(np.abs(ee[n])) sens[p.name] = {'std': std, 'ustar': ustar} #p.sensitivity_ustar = ustar #p.sensitivity_dev = std sorted_list = sorted(list(sens.items()), key=lambda a: a[1]['ustar'], reverse=True) vprint(1, "Var%s u* dev" % (' ' * (max_name_len))) vprint(1, '-' * (28 + max_name_len)) for item in sorted_list: pad = ' ' * (max_name_len - len(item[0])) vprint( 1, "%s%s %.4e %.4e" % (pad, item[0], item[1]['ustar'], item[1]['std'])) return pickle(sorted_list)
def dump_hdf5(name, v, desc=''): np.set_printoptions(threshold=np.nan) line = pickle({'name': name, 'desc': desc, 'value': v}) print 'HDF5:%s:5FDH' % line
def __init__(self, params, numtrajectories, levels, gridjump, response=False, iteration_cb=None): if response: print('Morris method does not support creating response surfaces') raise ValueError #compare the state of the seeds to the ones set in the control script a = pickle(random.getstate()) b = pickle(np.random.get_state()) f = open('random.getstate.2.txt', 'w') f.write(a) f.close() f = open('np.random.get_state.2.txt', 'w') f.write(b) f.close() #output of puq run is 1 elementary effect for each parameter per trajectory. #Therefore, each parameter will have *numtrajectories* elementary effects. PSweep.__init__(self, iteration_cb) self.params = params num = int(numtrajectories) self.num = num * (len(params) + 1 ) #total number of model runs for morris self.response = response self._start_at = 0 self.levels = levels self.gridjump = gridjump self._hf = None self._salib_paramFile = '==SALib_morris_params==.txt' self._salib_realizationsFile = '==SALib_morris_realizations==.txt' self._salib_realizationsFile_verify = '==0SALib_morris_realizations==.txt' self._salib_analysisFile = '==SALib_morris_outputs==.txt' #generate morris samples N(D+1) x D numpy array. Rows are realizations, columns are params #Each column is independent in the range [0,1] #TODO: allow for correlation self._samples = SAs.morris_oat.sample(N=num, D=len(params), num_levels=levels, grid_jump=gridjump) #puq will evaluate the output by picking a sample from each parameter in the #order specified in p.values i = 0 f = open(self._salib_paramFile, 'w') for p in self.params: #map each column of _samples to a parameter, using the inverse cdf to transform it #into the appropriate distribution. p.values = p.pdf.ppf(self._samples[:, i]) i += 1 f.write('{}\t{}\t{}\n'.format(p.name, p.pdf.range[0], p.pdf.range[1])) f.close() #save the samples, as constructed by SALib for verification later. # --removed. save the file directly instead since it was verified # that puq evaluates the output in the order specified in p.values for each param. #np.savetxt(self._salib_realizationsFile_verify,self._samples) np.savetxt(self._salib_realizationsFile, self._samples)
def plot(sweep, h5, opt, params=[]): if opt.v: opt.v = [s.strip() for s in opt.v.split(',')] if not opt.l: opt.k = True method = string.lower(sweep.psweep.__class__.__name__) if opt.r: for vname in h5[method]: if not opt.v or vname in opt.v: print "Plotting Response Surface for %s" % vname desc = h5[method][vname].attrs['description'] rsv = h5[method][vname]['response'].value rs = unpickle(rsv) p = rs.plot() if desc and desc != vname: plt.title(desc) else: plt.title("Response Function for %s" % vname) plot_customize(opt, p, 'response-%s' % vname) else: if opt.psamples: psamples = get_psamples_from_csv(sweep, h5, opt.samples) else: psamples = None for vname in h5[method]: if not opt.v or vname in opt.v: print "plotting PDF for %s" % vname desc = h5[method][vname].attrs['description'] if 'samples' in h5[method][vname]: # response surface already sampled. Just calculate pdf. data = h5[method][vname]['samples'].value if opt.k: p = ExperimentalPDF(data, fit=True).plot() if opt.l: p = ExperimentalPDF(data, fit=False).plot() else: rsv = h5[method][vname]['response'].value rs = unpickle(rsv) data = None if opt.k: pdf, data = rs.pdf(fit=True, params=params, psamples=psamples, return_samples=True) p = pdf.plot() if opt.l: if data is not None: p = ExperimentalPDF(data, fit=False).plot() else: pdf, data = rs.pdf(fit=False, params=params, psamples=psamples, return_samples=True) p = pdf.plot() h5[method][vname]['samples'] = data if not 'pdf' in h5[method][vname]: h5[method][vname]['pdf'] = pickle(pdf) plt.xlabel(vname) if desc and desc != vname: plt.title("PDF for %s" % desc) else: plt.title("PDF for %s" % vname) plot_customize(opt, p, 'pdf-%s' % vname)
def plot(sweep, h5, opt, params=[]): if opt.v: opt.v = [s.strip() for s in opt.v.split(',')] if not opt.l: opt.k = True method = string.lower(sweep.psweep.__class__.__name__) if opt.r: for vname in h5[method]: if not opt.v or vname in opt.v: print("Plotting Response Surface for %s" % vname) desc = h5[method][vname].attrs['description'] rsv = h5[method][vname]['response'].value rs = unpickle(rsv) p = rs.plot() if desc and desc != vname: plt.title(desc) else: plt.title("Response Function for %s" % vname) plot_customize(opt, p, 'response-%s' % vname) else: if opt.psamples: psamples = get_psamples_from_csv(sweep, h5, opt.samples) else: psamples = None for vname in h5[method]: if not opt.v or vname in opt.v: print("plotting PDF for %s" % vname) desc = h5[method][vname].attrs['description'] if 'samples' in h5[method][vname]: # response surface already sampled. Just calculate pdf. data = h5[method][vname]['samples'].value if opt.k: p = ExperimentalPDF(data, fit=True).plot() if opt.l: p = ExperimentalPDF(data, fit=False).plot() else: rsv = h5[method][vname]['response'].value rs = unpickle(rsv) data = None if opt.k: pdf, data = rs.pdf(fit=True, params=params, psamples=psamples, return_samples=True) p = pdf.plot() if opt.l: if data is not None: p = ExperimentalPDF(data, fit=False).plot() else: pdf, data = rs.pdf(fit=False, params=params, psamples=psamples, return_samples=True) p = pdf.plot() h5[method][vname]['samples'] = data if 'pdf' not in h5[method][vname]: h5[method][vname]['pdf'] = pickle(pdf) plt.xlabel(vname) if desc and desc != vname: plt.title("PDF for %s" % desc) else: plt.title("PDF for %s" % vname) plot_customize(opt, p, 'pdf-%s' % vname)
def _do_pdf(self, hf, data): if self.response: print('Morris method does not support creating response surfaces') raise ValueError else: pdf = ExperimentalPDF(data, fit=0) mean = np.mean(data) dev = np.std(data) print "Mean = %s" % mean print "StdDev = %s" % dev ############# #analyze results ############ #get the inputs inputs = hf['/input/params'] numparams = len(inputs) #N(D+1) x D # realizations=np.empty((np.size(data,0),numparams )) # f=open(self._salib_paramFile,'w') # i=0 # for p in inputs: # aParam=unpickle(hf['/input/params'][p].value) # print(aParam.name) # #we now have a parameter # f.write('{}\t{}\t{}\n'.format(aParam.name,aParam.pdf.range[0],aParam.pdf.range[1])) # # get the values # realizations[:,i]=aParam.values # i+=1 # f.close() #check to make sure the order in which the parameters were initially sampled by SALib #was the order in which they were actually sampled by puq # np.savetxt(self._salib_realizationsFile,realizations) # if os.path.getsize(self._salib_realizationsFile_verify) == os.path.getsize(self._salib_realizationsFile): # if not filecmp.cmp(self._salib_realizationsFile_verify, self._salib_realizationsFile, shallow=False): # raise Exception('The order in which the parameter samples were constructed is different than the sampled order!') # else: # raise Exception('The order in which the parameter samples were constructed is different than the sampled order!') #get the outputs outputs = hf['/output/data'] numputputs = len(outputs) #SALib expects each output variable in a single column np.savetxt(self._salib_analysisFile, data) #Note: the delimiters for all the files passed to the analyze function must be the same s = SAa.morris.analyze(self._salib_paramFile, self._salib_realizationsFile, self._salib_analysisFile, column=0) #put things in the same format as the smolyak module sens = {} for key, val in s.iteritems(): sens[key] = { 'u': val[0], 'std': val[1], 'ustar': val[2], 'ustar_conf95': val[3] } #senstxt+='{}\t{}\t{}\t{}'.format(key,val[0],val[1],val[2],val[3]) sorted_list = sorted( sens.items(), lambda x, y: cmp(y[1]['ustar'], x[1]['ustar'])) #os.remove(salib_paramFile) return [('pdf', pickle(pdf)), ('samples', data), ('mean', mean), ('dev', dev), ('sensitivity', pickle(sorted_list))]
def _do_pdf(self, hf, data): if self.response: print('Morris method does not support creating response surfaces') raise ValueError else: pdf = ExperimentalPDF(data, fit=0) mean = np.mean(data) dev = np.std(data) print "Mean = %s" % mean print "StdDev = %s" % dev ############# #analyze results ############ #get the inputs inputs=hf['/input/params'] numparams=len(inputs) #N(D+1) x D # realizations=np.empty((np.size(data,0),numparams )) # f=open(self._salib_paramFile,'w') # i=0 # for p in inputs: # aParam=unpickle(hf['/input/params'][p].value) # print(aParam.name) # #we now have a parameter # f.write('{}\t{}\t{}\n'.format(aParam.name,aParam.pdf.range[0],aParam.pdf.range[1])) # # get the values # realizations[:,i]=aParam.values # i+=1 # f.close() #check to make sure the order in which the parameters were initially sampled by SALib #was the order in which they were actually sampled by puq # np.savetxt(self._salib_realizationsFile,realizations) # if os.path.getsize(self._salib_realizationsFile_verify) == os.path.getsize(self._salib_realizationsFile): # if not filecmp.cmp(self._salib_realizationsFile_verify, self._salib_realizationsFile, shallow=False): # raise Exception('The order in which the parameter samples were constructed is different than the sampled order!') # else: # raise Exception('The order in which the parameter samples were constructed is different than the sampled order!') #get the outputs outputs=hf['/output/data'] numputputs=len(outputs) #SALib expects each output variable in a single column np.savetxt(self._salib_analysisFile,data) #Note: the delimiters for all the files passed to the analyze function must be the same s=SAa.morris.analyze(self._salib_paramFile,self._salib_realizationsFile, self._salib_analysisFile,column=0) #put things in the same format as the smolyak module sens={} for key,val in s.iteritems(): sens[key]={'u':val[0],'std': val[1], 'ustar': val[2],'ustar_conf95':val[3]} #senstxt+='{}\t{}\t{}\t{}'.format(key,val[0],val[1],val[2],val[3]) sorted_list = sorted(sens.items(), lambda x, y: cmp(y[1]['ustar'], x[1]['ustar'])) #os.remove(salib_paramFile) return [('pdf', pickle(pdf)), ('samples', data), ('mean', mean), ('dev', dev), ('sensitivity',pickle(sorted_list))]
def sensitivity(self, data): """ Elementary Effects Screening see http://en.wikipedia.org/wiki/Elementary_effects_method """ vprint(1, "\nSENSITIVITY:") ee = {} # strip out weight column and rescale to [0,1] grid = (self.grid[:, :-1] + 1) / 2 vgrid = np.column_stack((grid, data)) numcols = grid.shape[1] # Each parameter's value is in a column, so for each # column, create a new grid without that column, then look for # duplicate rows. for coln in range(0, numcols): ee[coln] = [] newgrid = grid[:, [x for x in range(0, numcols) if x != coln]] # (alternative) newgrid = np.delete(grid, coln, axis=1) rowlist = defaultdict(list) for n, row in enumerate(newgrid): rowlist[tuple(row)].append(n) for r in rowlist.keys(): if len(rowlist[r]) < 2: del rowlist[r] # For each list of duplicate rows, create an array with all the # parameter values and the output value. Iterate through it to for r in rowlist: rdata = None for rr in rowlist[r]: if rdata is None: rdata = vgrid[rr] else: rdata = np.vstack((rdata, vgrid[rr])) rdata = rdata[rdata[:, coln].argsort()] Y = None for d in rdata: if Y is not None: ee[coln] = np.append(ee[coln], (d[-1] - Y) / (d[coln] - X)) #print (d[-1] - Y) / (d[coln] - X) Y = d[-1] X = d[coln] max_name_len = max(map(len, [p.name for p in self.params])) sens = {} for n, p in enumerate(self.params): std = np.std(ee[n]) ustar = np.mean(np.abs(ee[n])) sens[p.name] = {'std': std, 'ustar': ustar} #p.sensitivity_ustar = ustar #p.sensitivity_dev = std sorted_list = sorted(sens.items(), lambda x, y: cmp(y[1]['ustar'], x[1]['ustar'])) vprint(1, "Var%s u* dev" % (' '*(max_name_len))) vprint(1, '-'*(28+max_name_len)) for item in sorted_list: pad = ' '*(max_name_len - len(item[0])) vprint(1, "%s%s %.4e %.4e" % (pad, item[0], item[1]['ustar'], item[1]['std'])) return pickle(sorted_list)