def calc_chisq(datacuts, synobs): """ Method for comparing the I/F values in the datacut object with the I/F values in the synthetic observations. Calculates a correlation score (insert details here later). """ #Some thoughts about code/object organization: # #A 'datacut' is an instance of class Datacut in the observation module. It #refers to an extracted portion of real data, along with all of its attributes #(avg. I/F in specific spectral channel, lat/lon, etc) # #'Datacuts' is a python list of several 'datacut' objects. To reference an #individual attribute within an object in said list, you must use #datacuts[0].uv1, for example. # #'Synobs' is an instance of class Synthetic contained in the observation #module. The object contains parsed data from the execution of the fortran #rad tran adding/doubling code. The execution of said code creates a single #output file, in which information about multiple synthetic observations are #found. Thus, 'synobs' contains an attribute that is a list, that contains said #information, without resorting to a "list of objects" # #There may be better ways of organizing the data. One aspect of all this is #when to collect the information from the 'datacuts' in order to compare with #the data from 'synobs' ... # #Because 'Datacuts' is "manually" created while running the script, i.e. # #datacuts = [cut1, cut2, cut3] # #it makes sense to me to harvest the information from each cut for comparison #in this compare.py module, because a 'harvest' method is not appropriate #in the 'datacut' class, as that refers to a single datacut, not a collection of #several datacuts (i.e. as a list of cuts) #determine number of observations for comparison #(this also determines the number of observations to extract from the #synthetic observation) num_obs = len(datacuts) assert num_obs == synobs.nangles, \ "Number of observations in datacuts != Number of observations in synthetic data." real_obs_data = observation.concat(datacuts) syn_obs_data = synobs.concat(num_obs) #divide real observations by 10000 # real_obs_data /= 10000.0 #naming variable 'chisq' to avoid conflicts with scipy.stats.chisquare #and perhaps other modules that use 'chisquare'/'chisquared' tmp = (syn_obs_data - real_obs_data)**2 / real_obs_data chisq = tmp.sum() #Note (12-Nov-2012): I may be missing the inclusion of 'sigma', the #uncertainties in the observations while calculating the chisq... this is #found in chisqd.pro return chisq
#need to write out outputfile_iter as a text file required by the #fortran code f = open('filenum.dat', 'w') f.write('{:0>3d}'.format(outputfile_iter)) f.close() #Evaluate model using fortran code to get synthetic observation interface.exec_rtmod() #Read result file into python syn_obs = observation.Synthetic(filename = outputfile) #below is testing ... remove when find_best_model.py is finished initial_csq = compare.calc_chisq(datacuts, syn_obs) print "Chi-square: {}".format(initial_csq) #diagnostic: real = observation.concat(datacuts) syn = syn_obs.concat(3) #remove temporary working model files # os.system('rm -f workmodlb*') # os.system('rm -f workmodl*') # os.system('rm -f workrslt.*')
def adj_model_lstsq(data, synobs, atm_model): """ Method for adjusting atmospheric model based on least-squares fitting of the derivative matrix w/r/t chi-square minimization, given a set of real observational data, a set of synthetic observations, and an atmospheric model """ logging.info("Adjusting atmospheric model.") #create a working atmospheric model #must use copy.deepcopy to avoid referencing confusion new_model = copy.deepcopy(atm_model) #determine derivative of model results w/r/t small changes to param deriv = calc_deriv(new_model, synobs) #divide derivative vector by the uncertainty # #first, expand uncertainty vector by repeating elements within by the number #of free parameters sig = data[0].er_list #'er_list' is an attribute of each datacut #'data' is the list of all data cuts sig = np.asarray([i for i in sig for j in range(synobs.nangles)]) # assert sig.shape == deriv[0,:].shape, \ # "Error in uncertainty vector / derivative vector shapes." #porting from IDL code #so what does this all mean? #here is my take, but subject to change from later future understanding #am is the derivative matrix divided by the uncertainty #it is the collection of data denoting how the syn. obs. change w/r/t #changes in the free parameters # #taking the SVD of am reduces this matrix to show where the changes are most #significant/correlated with one another #24 Nov 2012: for simplicity, removing division by uncertainty for now in light #of modification to deriv array # am = deriv / sig am = deriv #b is the difference vector, i.e. the difference between the real observed #data and the synthetic observations, divided by the uncertainty # #the broad objective appears to be.. figure out what changes to make towards #the free parameters that would produce the difference between real and syn #observations real_obs_data = observation.concat(data) syn_obs_data = synobs.concat(synobs.nangles) # b = (syn_obs_data - real_obs_data)/sig b = (syn_obs_data - real_obs_data) #source: #http://stackoverflow.com/questions/12580019/solve-singular-value-decomposition-svd-in-python # #use linalg.lstsq (which apparently uses SVD anyway) #Underscore in python can be used as a "throwaway" variable param_diff, _, _, _ = np.linalg.lstsq(am.T, b) #Note: need to use transpose here to match dimensions... explanation of why #is written in my lab notebook, need to place here. #introduce changes of free parameters into model #p = param_diff index... a bit clunky here (copying code from #calc_deriv as it does what needs to be done here (parse through layers #in model and flags), and I need to keep track of which param_diff is applied #as the change to the parameter p = 0 for i in range(new_model.nlayers): #first, extract said layer l = getattr(new_model, 'l'+str(i+1)) #parse layer for flags #determine index locations flaglocs = [a for a in range(len(l.flaglist)) \ if type(l.flaglist[a]) == str] #source #http://stackoverflow.com/questions/7270321/finding-the-index-of-elements-based-on-a-condition-using-python-list-comprehensi #the negative sign for delta is crucial for driving model towards #convergence for j in flaglocs: old_value = getattr(l, l.param_names[j]) delta = -param_diff[p] logging.info("Adjusting parameter {param} in level {level:d}. Old Value: {old:5.3f} Delta: {delta:5.3f}".\ format(param = l.param_names[j], level = i+1, \ old = old_value, delta = delta)) #safety valve to prevent negative values: if (old_value + delta) < 0: logging.info("Delta produces negative values.\ Dampening adjustment.") #instead, implement a +/- 10% adjustment l.adj_param_epsilon(l.param_names[j], \ epsilon = copysign(0.1, uniform(-1.0, 1.0))) else: l.adj_param_delta(l.param_names[j], delta = -param_diff[p]) p += 1 new_model.replace_layer(i+1, l) return new_model