def __errors_repl(subset=None, verbosity=0): """Calculate the errors for peak intensities from replicated spectra. @keyword subset: The list of spectrum ID strings to restrict the analysis to. @type subset: list of str @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. @type verbosity: int """ # Replicated spectra. repl = replicated_flags() # Are all spectra replicated? if False in list(repl.values()): all_repl = False print("All spectra replicated: No.") else: all_repl = True print("All spectra replicated: Yes.") # Initialise. if not hasattr(cdp, 'sigma_I'): cdp.sigma_I = {} if not hasattr(cdp, 'var_I'): cdp.var_I = {} # The subset. subset_flag = False if not subset: subset_flag = True subset = cdp.spectrum_ids # Loop over the spectra. for id in subset: # Skip non-replicated spectra. if not repl[id]: continue # Skip replicated spectra which already have been used. if id in cdp.var_I and cdp.var_I[id] != 0.0: continue # The replicated spectra. for j in range(len(cdp.replicates)): if id in cdp.replicates[j]: spectra = cdp.replicates[j] # Number of spectra. num_spectra = len(spectra) # Printout. print("\nReplicated spectra: " + repr(spectra)) if verbosity: print("%-20s%-20s" % ("Spin_ID", "SD")) # Calculate the mean value. count = 0 for spin, spin_id in spin_loop(return_id=True): # Skip deselected spins. if not spin.select: continue # Skip and deselect spins which have no data. if not hasattr(spin, 'peak_intensity'): spin.select = False continue # Missing data. missing = False for j in range(num_spectra): if not spectra[j] in spin.peak_intensity: missing = True if missing: continue # The peak intensities. values = [] for j in range(num_spectra): values.append(spin.peak_intensity[spectra[j]]) # The standard deviation. sd = std(values=values, dof=1) # Printout. if verbosity: print("%-20s%-20s" % (spin_id, sd)) # Sum of variances (for average). if not id in cdp.var_I: cdp.var_I[id] = 0.0 cdp.var_I[id] = cdp.var_I[id] + sd**2 count = count + 1 # No data catch. if not count: raise RelaxError( "No data is present, unable to calculate errors from replicated spectra." ) # Average variance. cdp.var_I[id] = cdp.var_I[id] / float(count) # Set all spectra variances. for j in range(num_spectra): cdp.var_I[spectra[j]] = cdp.var_I[id] # Print out. print("Standard deviation: %s" % sqrt(cdp.var_I[id])) # Average across all spectra if there are time points with a single spectrum. if not all_repl: # Print out. if subset_flag: print("\nVariance averaging over the spectra subset.") else: print("\nVariance averaging over all spectra.") # Initialise. var_I = 0.0 num_dups = 0 # Loop over all time points. for id in cdp.var_I: # Only use id's defined in subset if id not in subset: continue # Single spectrum (or extraordinarily accurate NMR spectra!). if cdp.var_I[id] == 0.0: continue # Sum and count. var_I = var_I + cdp.var_I[id] num_dups = num_dups + 1 # Average value. var_I = var_I / float(num_dups) # Assign the average value to all time points. for id in subset: cdp.var_I[id] = var_I # Print out. print("Standard deviation for all spins: " + repr(sqrt(var_I))) # Loop over the spectra. for id in cdp.var_I: # Create the standard deviation data structure. cdp.sigma_I[id] = sqrt(cdp.var_I[id]) # Set the spin specific errors. for spin in spin_loop(): # Skip deselected spins. if not spin.select: continue # Set the error. spin.peak_intensity_err = cdp.sigma_I
def monte_carlo_error_analysis(): """Function for calculating errors from the Monte Carlo simulations. The standard deviation formula used to calculate the errors is the square root of the bias-corrected variance, given by the formula:: __________________________ / 1 sd = / ----- * sum({Xi - Xav}^2) \/ n - 1 where - n is the total number of simulations. - Xi is the parameter value for simulation i. - Xav is the mean parameter value for all simulations. """ # Test if the current data pipe exists. check_pipe() # Test if simulations have been set up. if not hasattr(cdp, 'sim_state'): raise RelaxError("Monte Carlo simulations have not been set up.") # The specific analysis API object. api = return_api() # Loop over the models. for model_info in api.model_loop(): # Get the selected simulation array. select_sim = api.sim_return_selected(model_info=model_info) # Loop over the parameters. index = 0 while True: # Get the array of simulation parameters for the index. param_array = api.sim_return_param(index, model_info=model_info) # Break (no more parameters). if param_array == None: break # Handle dictionary type parameters. if isinstance(param_array[0], dict): # Initialise the standard deviation structure as a dictionary. sd = {} # Loop over each key. for key in param_array[0]: # Create a list of the values for the current key. data = [] for i in range(len(param_array)): data.append(param_array[i][key]) # Calculate and store the SD. sd[key] = statistics.std(values=data, skip=select_sim) # Handle list type parameters. elif isinstance(param_array[0], list): # Initialise the standard deviation structure as a list. sd = [] # Loop over each element. for j in range(len(param_array[0])): # Create a list of the values for the current key. data = [] for i in range(len(param_array)): data.append(param_array[i][j]) # Calculate and store the SD. sd.append(statistics.std(values=data, skip=select_sim)) # SD of simulation parameters with values (ie not None). elif param_array[0] != None: sd = statistics.std(values=param_array, skip=select_sim) # Simulation parameters with the value None. else: sd = None # Set the parameter error. api.set_error(index, sd, model_info=model_info) # Increment the parameter index. index = index + 1 # Turn off the Monte Carlo simulation state, as the MC analysis is now finished. cdp.sim_state = False
def __errors_repl(subset=None, verbosity=0): """Calculate the errors for peak intensities from replicated spectra. @keyword subset: The list of spectrum ID strings to restrict the analysis to. @type subset: list of str @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. @type verbosity: int """ # Replicated spectra. repl = replicated_flags() # Are all spectra replicated? if False in list(repl.values()): all_repl = False print("All spectra replicated: No.") else: all_repl = True print("All spectra replicated: Yes.") # Initialise. if not hasattr(cdp, 'sigma_I'): cdp.sigma_I = {} if not hasattr(cdp, 'var_I'): cdp.var_I = {} # The subset. subset_flag = False if not subset: subset_flag = True subset = cdp.spectrum_ids # Loop over the spectra. for id in subset: # Skip non-replicated spectra. if not repl[id]: continue # Skip replicated spectra which already have been used. if id in cdp.var_I and cdp.var_I[id] != 0.0: continue # The replicated spectra. for j in range(len(cdp.replicates)): if id in cdp.replicates[j]: spectra = cdp.replicates[j] # Number of spectra. num_spectra = len(spectra) # Printout. print("\nReplicated spectra: " + repr(spectra)) if verbosity: print("%-20s%-20s" % ("Spin_ID", "SD")) # Calculate the mean value. count = 0 for spin, spin_id in spin_loop(return_id=True): # Skip deselected spins. if not spin.select: continue # Skip and deselect spins which have no data. if not hasattr(spin, 'peak_intensity'): spin.select = False continue # Missing data. missing = False for j in range(num_spectra): if not spectra[j] in spin.peak_intensity: missing = True if missing: continue # The peak intensities. values = [] for j in range(num_spectra): values.append(spin.peak_intensity[spectra[j]]) # The standard deviation. sd = std(values=values, dof=1) # Printout. if verbosity: print("%-20s%-20s" % (spin_id, sd)) # Sum of variances (for average). if not id in cdp.var_I: cdp.var_I[id] = 0.0 cdp.var_I[id] = cdp.var_I[id] + sd**2 count = count + 1 # No data catch. if not count: raise RelaxError("No data is present, unable to calculate errors from replicated spectra.") # Average variance. cdp.var_I[id] = cdp.var_I[id] / float(count) # Set all spectra variances. for j in range(num_spectra): cdp.var_I[spectra[j]] = cdp.var_I[id] # Print out. print("Standard deviation: %s" % sqrt(cdp.var_I[id])) # Average across all spectra if there are time points with a single spectrum. if not all_repl: # Print out. if subset_flag: print("\nVariance averaging over the spectra subset.") else: print("\nVariance averaging over all spectra.") # Initialise. var_I = 0.0 num_dups = 0 # Loop over all time points. for id in cdp.var_I: # Only use id's defined in subset if id not in subset: continue # Single spectrum (or extraordinarily accurate NMR spectra!). if cdp.var_I[id] == 0.0: continue # Sum and count. var_I = var_I + cdp.var_I[id] num_dups = num_dups + 1 # Average value. var_I = var_I / float(num_dups) # Assign the average value to all time points. for id in subset: cdp.var_I[id] = var_I # Print out. print("Standard deviation for all spins: " + repr(sqrt(var_I))) # Loop over the spectra. for id in cdp.var_I: # Create the standard deviation data structure. cdp.sigma_I[id] = sqrt(cdp.var_I[id]) # Set the spin specific errors. for spin in spin_loop(): # Skip deselected spins. if not spin.select: continue # Set the error. spin.peak_intensity_err = cdp.sigma_I
# Calculate R2eff and store it. minimise.calculate() r2eff_indiv[j, i] = spin.r2eff['800.0_%.1f' % spin_lock[j]] # Randomise I0 once. int['ref'] = gauss(i0, ERR) # Calculate all R2eff and store them. minimise.calculate() for j in range(len(spin_lock)): r2eff_group[j, i] = spin.r2eff['800.0_%.1f' % spin_lock[j]] # The errors. for j in range(len(spin_lock)): sigma_r2eff_indiv[j] = std(r2eff_indiv[j]) sigma_r2eff_group[j] = std(r2eff_group[j]) # Plot the data. file = open('error_plot.agr', 'w') # Header. file.write("@with g0\n") file.write("@ s0 legend \"Full error formula\"\n") file.write("@ s1 legend \"Reduced error formula\"\n") file.write("@ s2 legend \"Bootstrap individual dispersion points\"\n") file.write("@ s3 legend \"Bootstrap group\"\n") # The full error formula. file.write("@target G0.S0\n@type xy\n") for i in range(len(spin_lock)):