class Stereochem_analysis: """Class for performing the relative stereochemistry analysis.""" def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0): """Set up for the stereochemistry analysis. @keyword stage: Stage of analysis (see the module docstring above for the options). @type stage: int @keyword results_dir: The optional directory to place all results files into. @type results_dir: None or str @keyword num_ens: Number of ensembles. @type num_ens: int @keyword num_models: Ensemble size. @type num_models: int @keyword configs: All the configurations. @type configs: list of str @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). @type snapshot_dir: list of str @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). @type snapshot_min: list of int @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). @type snapshot_max: list of int @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. @type pseudo: list of list of str and list of str @keyword noe_file: The name of the NOE restraint file. @type noe_file: str @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). @type noe_norm: float @keyword rdc_name: The label for this RDC data set. @type rdc_name: str @keyword rdc_file: The name of the RDC file. @type rdc_file: str @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. @type rdc_spin_id1_col: None or int @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. @type rdc_spin_id2_col: None or int @keyword rdc_data_col: The data column of the RDC file. @type rdc_data_col: int @keyword rdc_error_col: The error column of the RDC file. @type rdc_error_col: int @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument. @type bond_length: float or None @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data. @type bond_length_file: float or None @keyword log: Log file output flag (only for certain stages). @type log: bool @keyword bucket_num: Number of buckets for the distribution plots. @type bucket_num: int @keyword lower_lim_noe: Distribution plot limits. @type lower_lim_noe: int @keyword upper_lim_noe: Distribution plot limits. @type upper_lim_noe: int @keyword lower_lim_rdc: Distribution plot limits. @type lower_lim_rdc: int @keyword upper_lim_rdc: Distribution plot limits. @type upper_lim_rdc: int """ # Execution lock. status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') # Set up the analysis status object. status.init_auto_analysis('stereochem', type='stereochem') status.current_analysis = 'auto stereochem analysis' # Store all the args. self.stage = stage self.results_dir = results_dir self.num_ens = num_ens self.num_models = num_models self.configs = configs self.snapshot_dir = snapshot_dir self.snapshot_min = snapshot_min self.snapshot_max = snapshot_max self.pseudo = pseudo self.noe_file = noe_file self.noe_norm = noe_norm self.rdc_name = rdc_name self.rdc_file = rdc_file self.rdc_spin_id1_col = rdc_spin_id1_col self.rdc_spin_id2_col = rdc_spin_id2_col self.rdc_data_col = rdc_data_col self.rdc_error_col = rdc_error_col self.bond_length = bond_length self.bond_length_file = bond_length_file self.log = log self.bucket_num = bucket_num self.lower_lim_noe = lower_lim_noe self.upper_lim_noe = upper_lim_noe self.lower_lim_rdc = lower_lim_rdc self.upper_lim_rdc = upper_lim_rdc # Load the interpreter. self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True) self.interpreter.populate_self() self.interpreter.on(verbose=False) # Create the results directory. if self.results_dir: mkdir_nofail(self.results_dir) # Or use the current working directory. else: self.results_dir = getcwd() # Create a directory for log files. if self.log: mkdir_nofail(self.results_dir + sep + "logs") # Finish and unlock execution. status.auto_analysis['stereochem'].fin = True status.current_analysis = None status.exec_lock.release() def run(self): """Execute the given stage of the analysis.""" # Store the original STDOUT. self.stdout_orig = sys.stdout # Sampling of snapshots. if self.stage == 1: self.sample() # NOE violation analysis. elif self.stage == 2: self.noe_viol() # Ensemble superimposition. elif self.stage == 3: self.superimpose() # RDC Q-factor analysis. elif self.stage == 4: self.rdc_analysis() # Grace plot creation. elif self.stage == 5: self.grace_plots() # Final combined Q ordering. elif self.stage == 6: self.combined_q() # Unknown stage. else: raise RelaxError("The stage number %s is unknown." % self.stage) # Restore STDOUT. sys.stdout = self.stdout_orig def combined_q(self): """Calculate the combined Q-factor. The combined Q is defined as:: Q_total^2 = Q_NOE^2 + Q_RDC^2, and the NOE Q-factor as:: Q^2 = U / sum(NOE_i^2), where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. """ # Checks. if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") # Loop over the configurations. for i in range(len(self.configs)): # Print out. print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i]) # Open the NOE results file and read the data. file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) noe_lines = file.readlines() file.close() # Open the RDC results file and read the data. file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) rdc_lines = file.readlines() file.close() # The combined Q-factor file. out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') # Loop over the data (skipping the header line). data = [] for j in range(1, len(noe_lines)): # Split the lines. ens = int(noe_lines[j].split()[0]) noe_viol = float(noe_lines[j].split()[1]) q_rdc = float(rdc_lines[j].split()[1]) # The NOE Q-factor. q_noe = sqrt(noe_viol/self.noe_norm) # Combined Q. q = sqrt(q_noe**2 + q_rdc**2) # Write out the unsorted list. out.write("%-20i%20.15f\n" % (ens, q)) # Store the values. data.append([q, ens]) # Sort the combined Q. data.sort() # Write the data. for i in range(len(data)): out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) # Close the files. out.close() out_sorted.close() def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None): """Create the distribution data structure.""" # The bin width. bin_width = (upper - lower)/float(inc) # Init the dist object. dist = [] for i in range(inc): dist.append([bin_width*i+lower, 0]) # Loop over the values. for val in values: # The bin. bin = int((val - lower)/bin_width) # Outside of the limits. if bin < 0 or bin >= inc: print("Outside of the limits: '%s'" % val) continue # Increment the count. dist[bin][1] = dist[bin][1] + 1 # Convert the counts to frequencies. total_pr = 0.0 for i in range(inc): dist[i][1] = dist[i][1] / float(len(values)) total_pr = total_pr + dist[i][1] print("Total Pr: %s" % total_pr) # Return the dist. return dist def grace_plots(self): """Generate grace plots of the results.""" # The number of configs. n = len(self.configs) # The colours for the different configs. defaults = [4, 2] # Blue and red. colours = [] for i in range(n): # Default colours. if i < len(defaults): colours.append(defaults[i]) # Otherwise black! else: colours.append(0) # The ensemble number text. ens_text = '' dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] num_ens = self.num_ens for i in range(len(dividers)): # The number. num = int(num_ens / dividers[i]) # The text. if num: text = repr(num) elif not num and ens_text: text = '000' else: continue # Update the text. ens_text = ens_text + text # A comma. if i < len(dividers)-1: ens_text = ens_text + ',' # Remove the front part of the number. num_ens = num_ens - dividers[i]*num # Subtitle for all graphs. subtitle = '%s ensembles of %s' % (ens_text, self.num_models) # NOE violations. if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): # Print out. print("Generating NOE violation Grace plots.") # Open the output files. grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') # Loop over the configurations. data = [] dist = [] for i in range(n): # Open the results file and read the data. file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") lines = file.readlines() file.close() # Add a new graph set. data.append([]) # Loop over the ensembles and extract the NOE violation. noe_viols = [] for j in range(1, len(lines)): # Extract the violation. viol = float(lines[j].split()[1]) noe_viols.append(viol) # Add to the data structure. data[i].append([j, viol]) # Calculate the R distribution. dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) # Headers. write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]]) write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]]) # Write the data. write_xy_data([data], file=grace_curve, graph_type='xy') write_xy_data([dist], file=grace_dist, graph_type='xy') # Close the files. grace_curve.close() grace_dist.close() # RDC Q-factors. if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): # Print out. print("Generating RDC Q-factor Grace plots.") # Open the Grace output files. grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') # Loop over the configurations. data = [] dist = [] for i in range(n): # Open the results file and read the data. file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") lines = file.readlines() file.close() # Add a new graph set. data.append([]) # Loop over the Q-factors. values = [] for j in range(1, len(lines)): # Extract the violation. value = float(lines[j].split()[1]) values.append(value) # Add to the data structure. data[i].append([j, value]) # Calculate the R distribution. dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) # Headers. write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]]) write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]]) # Write the data. write_xy_data([data], file=grace_curve, graph_type='xy') write_xy_data([dist], file=grace_dist, graph_type='xy') # Close the files. grace_curve.close() grace_dist.close() # NOE-RDC correlation plots. if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): # Print out. print("Generating NOE-RDC correlation Grace plots.") # Open the Grace output files. grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') # Grace data. data = [] data_scaled = [] for i in range(len(self.configs)): # Open the NOE results file and read the data. file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) noe_lines = file.readlines() file.close() # Add a new graph set. data.append([]) data_scaled.append([]) # Open the RDC results file and read the data. file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) rdc_lines = file.readlines() file.close() # Loop over the data. for j in range(1, len(noe_lines)): # Split the lines. noe_viol = float(noe_lines[j].split()[1]) q_factor = float(rdc_lines[j].split()[1]) # Add the xy pair. data[i].append([noe_viol, q_factor]) data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) # Write the data. write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) write_xy_data([data], file=grace_file, graph_type='xy') write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy') def noe_viol(self): """NOE violation calculations.""" # Redirect STDOUT to a log file. if self.log: sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') # Create a directory for the save files. dir = self.results_dir + sep + "NOE_results" mkdir_nofail(dir=dir) # Loop over the configurations. for config in self.configs: # Print out. print("\n"*10 + "# Set up for config " + config + " #" + "\n") # Open the results file. out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) # Create the data pipe. self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") # Read the first structure. self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) # Load all protons as the sequence. self.interpreter.structure.load_spins("@H*", ave_pos=False) # Create the pseudo-atoms. for i in range(len(self.pseudo)): self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") self.interpreter.sequence.display() # Read the NOE list. self.interpreter.noe.read_restraints(file=self.noe_file) # Set up the N-state model. self.interpreter.n_state_model.select_model(model="fixed") # Print out. print("\n"*2 + "# Set up complete #" + "\n"*10) # Loop over each ensemble. noe_viol = [] for ens in range(self.num_ens): # Print out the ensemble to both the log and screen. if self.log: sys.stdout.write(config + repr(ens) + "\n") sys.stderr.write(config + repr(ens) + "\n") # Delete the old structures and rename the molecule. self.interpreter.structure.delete() # Read the ensemble. self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) # Get the atomic positions. self.interpreter.structure.get_pos(ave_pos=False) # Calculate the average NOE potential. self.interpreter.calc() # Sum the violations. cdp.sum_viol = 0.0 for i in range(len(cdp.ave_dist)): if cdp.quad_pot[i][2]: cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] # Write out the NOE violation. noe_viol.append([cdp.sum_viol, ens]) out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) # Save the state. self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) # Sort the NOE violations. noe_viol.sort() # Write the data. for i in range(len(noe_viol)): out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0])) def rdc_analysis(self): """Perform the RDC part of the analysis.""" # Redirect STDOUT to a log file. if self.log: sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') # The dipolar constant. d = 0.0 if self.bond_length != None: d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length) # Create a directory for the save files. dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name mkdir_nofail(dir=dir) # Loop over the configurations. for config in self.configs: # Print out. print("\n"*10 + "# Set up for config " + config + " #" + "\n") # Open the results files. out = open(self.results_dir+sep+"Q_factors_" + config, 'w') out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) # Create the data pipe. self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") # Read the first structure. self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) # Load all spins as the sequence. self.interpreter.structure.load_spins(ave_pos=False) # Create the pseudo-atoms. for i in range(len(self.pseudo)): self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") self.interpreter.sequence.display() # Read the RDC data. self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) # Define the magnetic dipole-dipole relaxation interaction. if self.bond_length != None: self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) else: self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) # Set the nuclear isotope. self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') # Set up the model. self.interpreter.n_state_model.select_model(model="fixed") # Print out. print("\n"*2 + "# Set up complete #" + "\n"*10) # Loop over each ensemble. q_factors = [] for ens in range(self.num_ens): # Print out the ensemble to both the log and screen. if self.log: sys.stdout.write(config + repr(ens) + "\n") sys.stderr.write(config + repr(ens) + "\n") # Delete the old structures. self.interpreter.structure.delete() # Read the ensemble. self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) # Get the positional information, then load the CH vectors. self.interpreter.structure.get_pos(ave_pos=False) if self.bond_length != None: self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) else: self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) self.interpreter.interatom.unit_vectors(ave=False) # Minimisation. #grid_search(inc=4) self.interpreter.minimise("simplex", constraints=False) # Store and write out the Q-factors. q_factors.append([cdp.q_rdc, ens]) out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2)) # Calculate the alignment tensor in Hz, and store it for reference. cdp.align_tensor_Hz = d * cdp.align_tensors[0].A cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D # Save the state. self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) # Sort the NOE violations. q_factors.sort() # Write the data. for i in range(len(q_factors)): out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0])) def sample(self): """Generate the ensembles by random sampling of the snapshots.""" # Create the directory for the ensembles, if needed. mkdir_nofail(dir=self.results_dir + sep + "ensembles") # Loop over the configurations. for conf_index in range(len(self.configs)): # Loop over each ensemble. for ens in range(self.num_ens): # Random sampling. rand = [] for j in range(self.num_models): rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) # Print out. print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) # The file name. file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" # Open the output file. out = open(self.results_dir+sep+file_name, 'w') # Header. out.write("REM Structures: " + repr(rand) + "\n") # Concatenation the files. for j in range(self.num_models): # The random file. rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" # Append the file. out.write(open(rand_name).read()) # Close the file. out.close() def superimpose(self): """Superimpose the ensembles using fit to first in Molmol.""" # Create the output directory. mkdir_nofail("ensembles_superimposed") # Logging turned on. if self.log: log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') # Loop over S and R. for config in ["R", "S"]: # Loop over each ensemble. for ens in range(self.num_ens): # The file names. file_in = "ensembles" + sep + config + repr(ens) + ".pdb" file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" # Print out. sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) if self.log: log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) # Failure handling (if a failure occurred and this is rerun, skip all existing files). if access(self.results_dir+sep+file_out, F_OK): continue # Open the Molmol pipe. pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) # Init all. pipe.stdin.write("InitAll yes\n") # Read the PDB. pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") # Fitting to mean. pipe.stdin.write("Fit to_first 'selected'\n") pipe.stdin.write("Fit to_mean 'selected'\n") # Write the result. pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") # End Molmol. pipe.stdin.close() # Get STDOUT and STDERR. sys.stdout.write(pipe.stdout.read()) if self.log: log.write(pipe.stderr.read()) # Close the pipe. pipe.stdout.close() pipe.stderr.close() # Open the superimposed file in relax. self.interpreter.reset() self.interpreter.pipe.create('out', 'N-state') self.interpreter.structure.read_pdb(file_out) # Fix the retarded MOLMOL proton naming. for model in cdp.structure.structural_data: # Alias. mol = model.mol[0] # Loop over all atoms. for i in range(len(mol.atom_name)): # A proton. if search('H', mol.atom_name[i]): mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] # Replace the superimposed file. self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
class Stereochem_analysis: """Class for performing the relative stereochemistry analysis.""" def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0): """Set up for the stereochemistry analysis. @keyword stage: Stage of analysis (see the module docstring above for the options). @type stage: int @keyword results_dir: The optional directory to place all results files into. @type results_dir: None or str @keyword num_ens: Number of ensembles. @type num_ens: int @keyword num_models: Ensemble size. @type num_models: int @keyword configs: All the configurations. @type configs: list of str @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). @type snapshot_dir: list of str @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). @type snapshot_min: list of int @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). @type snapshot_max: list of int @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. @type pseudo: list of list of str and list of str @keyword noe_file: The name of the NOE restraint file. @type noe_file: str @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). @type noe_norm: float @keyword rdc_name: The label for this RDC data set. @type rdc_name: str @keyword rdc_file: The name of the RDC file. @type rdc_file: str @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. @type rdc_spin_id1_col: None or int @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. @type rdc_spin_id2_col: None or int @keyword rdc_data_col: The data column of the RDC file. @type rdc_data_col: int @keyword rdc_error_col: The error column of the RDC file. @type rdc_error_col: int @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument. @type bond_length: float or None @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data. @type bond_length_file: float or None @keyword log: Log file output flag (only for certain stages). @type log: bool @keyword bucket_num: Number of buckets for the distribution plots. @type bucket_num: int @keyword lower_lim_noe: Distribution plot limits. @type lower_lim_noe: int @keyword upper_lim_noe: Distribution plot limits. @type upper_lim_noe: int @keyword lower_lim_rdc: Distribution plot limits. @type lower_lim_rdc: int @keyword upper_lim_rdc: Distribution plot limits. @type upper_lim_rdc: int """ # Initial printout. title(file=sys.stdout, text="Stereochemistry auto-analysis") # Safely execute the full protocol. try: # Execution lock. status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') # Set up the analysis status object. status.init_auto_analysis('stereochem', type='stereochem') status.current_analysis = 'auto stereochem analysis' # Store all the args. self.stage = stage self.results_dir = results_dir self.num_ens = num_ens self.num_models = num_models self.configs = configs self.snapshot_dir = snapshot_dir self.snapshot_min = snapshot_min self.snapshot_max = snapshot_max self.pseudo = pseudo self.noe_file = noe_file self.noe_norm = noe_norm self.rdc_name = rdc_name self.rdc_file = rdc_file self.rdc_spin_id1_col = rdc_spin_id1_col self.rdc_spin_id2_col = rdc_spin_id2_col self.rdc_data_col = rdc_data_col self.rdc_error_col = rdc_error_col self.bond_length = bond_length self.bond_length_file = bond_length_file self.log = log self.bucket_num = bucket_num self.lower_lim_noe = lower_lim_noe self.upper_lim_noe = upper_lim_noe self.lower_lim_rdc = lower_lim_rdc self.upper_lim_rdc = upper_lim_rdc # Load the interpreter. self.interpreter = Interpreter(show_script=False, raise_relax_error=True) self.interpreter.populate_self() self.interpreter.on(verbose=False) # Create the results directory. if self.results_dir: mkdir_nofail(self.results_dir) # Or use the current working directory. else: self.results_dir = getcwd() # Create a directory for log files. if self.log: mkdir_nofail(self.results_dir + sep + "logs") # Clean up. finally: # Final printout. title(file=sys.stdout, text="Completion of the stereochemistry auto-analysis") print_elapsed_time(time() - status.start_time) # Finish and unlock execution. status.auto_analysis['stereochem'].fin = True status.current_analysis = None status.exec_lock.release() def run(self): """Execute the given stage of the analysis.""" # Store the original STDOUT. self.stdout_orig = sys.stdout # Sampling of snapshots. if self.stage == 1: self.sample() # NOE violation analysis. elif self.stage == 2: self.noe_viol() # Ensemble superimposition. elif self.stage == 3: self.superimpose() # RDC Q factor analysis. elif self.stage == 4: self.rdc_analysis() # Grace plot creation. elif self.stage == 5: self.grace_plots() # Final combined Q ordering. elif self.stage == 6: self.combined_q() # Unknown stage. else: raise RelaxError("The stage number %s is unknown." % self.stage) # Restore STDOUT. sys.stdout = self.stdout_orig def combined_q(self): """Calculate the combined Q factor. The combined Q is defined as:: Q_total^2 = Q_NOE^2 + Q_RDC^2, and the NOE Q factor as:: Q^2 = U / sum(NOE_i^2), where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. """ # Checks. if not access( self.results_dir + sep + "NOE_viol_" + self.configs[0] + "_sorted", F_OK): raise RelaxError( "The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir + sep + "NOE_viol_" + self.configs[0] + "_sorted") if not access( self.results_dir + sep + "Q_factors_" + self.configs[0] + "_sorted", F_OK): raise RelaxError( "The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir + sep + "Q_factors_" + self.configs[0] + "_sorted") # Loop over the configurations. for i in range(len(self.configs)): # Print out. print( "Creating the combined Q factor file for configuration '%s'." % self.configs[i]) # Open the NOE results file and read the data. file = open(self.results_dir + sep + "NOE_viol_" + self.configs[i]) noe_lines = file.readlines() file.close() # Open the RDC results file and read the data. file = open(self.results_dir + sep + "Q_factors_" + self.configs[i]) rdc_lines = file.readlines() file.close() # The combined Q factor file. out = open(self.results_dir + sep + "Q_total_%s" % self.configs[i], 'w') out_sorted = open( self.results_dir + sep + "Q_total_%s_sorted" % self.configs[i], 'w') # Loop over the data (skipping the header line). data = [] for j in range(1, len(noe_lines)): # Split the lines. ens = int(noe_lines[j].split()[0]) noe_viol = float(noe_lines[j].split()[1]) q_rdc = float(rdc_lines[j].split()[1]) # The NOE Q factor. q_noe = sqrt(noe_viol / self.noe_norm) # Combined Q. q = sqrt(q_noe**2 + q_rdc**2) # Write out the unsorted list. out.write("%-20i%20.15f\n" % (ens, q)) # Store the values. data.append([q, ens]) # Sort the combined Q. data.sort() # Write the data. for i in range(len(data)): out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) # Close the files. out.close() out_sorted.close() def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None): """Create the distribution data structure.""" # The bin width. bin_width = (upper - lower) / float(inc) # Init the dist object. dist = [] for i in range(inc): dist.append([bin_width * i + lower, 0]) # Loop over the values. for val in values: # The bin. bin = int((val - lower) / bin_width) # Outside of the limits. if bin < 0 or bin >= inc: print("Outside of the limits: '%s'" % val) continue # Increment the count. dist[bin][1] = dist[bin][1] + 1 # Convert the counts to frequencies. total_pr = 0.0 for i in range(inc): dist[i][1] = dist[i][1] / float(len(values)) total_pr = total_pr + dist[i][1] print("Total Pr: %s" % total_pr) # Return the dist. return dist def grace_plots(self): """Generate grace plots of the results.""" # The number of configs. n = len(self.configs) # The colours for the different configs. defaults = [4, 2] # Blue and red. colours = [] for i in range(n): # Default colours. if i < len(defaults): colours.append(defaults[i]) # Otherwise black! else: colours.append(0) # The ensemble number text. ens_text = '' dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] num_ens = self.num_ens for i in range(len(dividers)): # The number. num = int(num_ens / dividers[i]) # The text. if num: text = repr(num) elif not num and ens_text: text = '000' else: continue # Update the text. ens_text = ens_text + text # A comma. if i < len(dividers) - 1: ens_text = ens_text + ',' # Remove the front part of the number. num_ens = num_ens - dividers[i] * num # Subtitle for all graphs. subtitle = '%s ensembles of %s' % (ens_text, self.num_models) # NOE violations. if access( self.results_dir + sep + "NOE_viol_" + self.configs[0] + "_sorted", F_OK): # Print out. print("Generating NOE violation Grace plots.") # Open the output files. grace_curve = open(self.results_dir + sep + "NOE_viol_curve.agr", 'w') grace_dist = open(self.results_dir + sep + "NOE_viol_dist.agr", 'w') # Loop over the configurations. data = [] dist = [] for i in range(n): # Open the results file and read the data. file = open(self.results_dir + sep + "NOE_viol_" + self.configs[i] + "_sorted") lines = file.readlines() file.close() # Add a new graph set. data.append([]) # Loop over the ensembles and extract the NOE violation. noe_viols = [] for j in range(1, len(lines)): # Extract the violation. viol = float(lines[j].split()[1]) noe_viols.append(viol) # Add to the data structure. data[i].append([j, viol]) # Calculate the R distribution. dist.append( self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) # Headers. write_xy_header(format='grace', file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0] * n], axis_labels=[[ 'Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)' ]], legend_pos=[[0.3, 0.8]]) write_xy_header( format='grace', file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1] * n], symbol_sizes=[[0.5] * n], linestyle=[[3] * n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]]) # Write the data. write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy') write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy') # Close the files. grace_curve.close() grace_dist.close() # RDC Q factors. if access( self.results_dir + sep + "Q_factors_" + self.configs[0] + "_sorted", F_OK): # Print out. print("Generating RDC Q factor Grace plots.") # Open the Grace output files. grace_curve = open( self.results_dir + sep + "RDC_%s_curve.agr" % self.rdc_name, 'w') grace_dist = open( self.results_dir + sep + "RDC_%s_dist.agr" % self.rdc_name, 'w') # Loop over the configurations. data = [] dist = [] for i in range(n): # Open the results file and read the data. file = open(self.results_dir + sep + "Q_factors_" + self.configs[i] + "_sorted") lines = file.readlines() file.close() # Add a new graph set. data.append([]) # Loop over the Q factors. values = [] for j in range(1, len(lines)): # Extract the violation. value = float(lines[j].split()[1]) values.append(value) # Add to the data structure. data[i].append([j, value]) # Calculate the R distribution. dist.append( self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) # Headers. write_xy_header(format='grace', file=grace_curve, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0] * n], axis_labels=[[ 'Ensemble (sorted)', '%s RDC Q factor (pales format)' % self.rdc_name ]], legend_pos=[[0.3, 0.8]]) write_xy_header(format='grace', file=grace_dist, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1] * n], symbol_sizes=[[0.5] * n], linestyle=[[3] * n], axis_labels=[[ '%s RDC Q factor (pales format)' % self.rdc_name, 'Frequency' ]], legend_pos=[[1.1, 0.8]]) # Write the data. write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy') write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy') # Close the files. grace_curve.close() grace_dist.close() # NOE-RDC correlation plots. if access( self.results_dir + sep + "NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access( self.results_dir + sep + "Q_factors_" + self.configs[0] + "_sorted", F_OK): # Print out. print("Generating NOE-RDC correlation Grace plots.") # Open the Grace output files. grace_file = open(self.results_dir + sep + "correlation_plot.agr", 'w') grace_file_scaled = open( self.results_dir + sep + "correlation_plot_scaled.agr", 'w') # Grace data. data = [] data_scaled = [] for i in range(len(self.configs)): # Open the NOE results file and read the data. file = open(self.results_dir + sep + "NOE_viol_" + self.configs[i]) noe_lines = file.readlines() file.close() # Add a new graph set. data.append([]) data_scaled.append([]) # Open the RDC results file and read the data. file = open(self.results_dir + sep + "Q_factors_" + self.configs[i]) rdc_lines = file.readlines() file.close() # Loop over the data. for j in range(1, len(noe_lines)): # Split the lines. noe_viol = float(noe_lines[j].split()[1]) q_factor = float(rdc_lines[j].split()[1]) # Add the xy pair. data[i].append([noe_viol, q_factor]) data_scaled[i].append( [sqrt(noe_viol / self.noe_norm), q_factor]) # Write the data. write_xy_header( format='grace', file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9] * n], symbol_sizes=[[0.24] * n], linetype=[[0] * n], axis_labels=[[ 'NOE violation (Angstrom\\S2\\N)', '%s RDC Q factor (pales format)' % self.rdc_name ]], legend_pos=[[1.1, 0.8]]) write_xy_header( format='grace', file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9] * n], symbol_sizes=[[0.24] * n], linetype=[[0] * n], axis_labels=[[ 'Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q factor (pales format)' % self.rdc_name ]], legend_pos=[[1.1, 0.8]]) write_xy_data(format='grace', data=[data], file=grace_file, graph_type='xy') write_xy_data(format='grace', data=[data_scaled], file=grace_file_scaled, graph_type='xy') def noe_viol(self): """NOE violation calculations.""" # Redirect STDOUT to a log file. if self.log: sys.stdout = open( self.results_dir + sep + "logs" + sep + "NOE_viol.log", 'w') # Create a directory for the save files. dir = self.results_dir + sep + "NOE_results" mkdir_nofail(dir=dir) # Loop over the configurations. for config in self.configs: # Print out. print("\n" * 10 + "# Set up for config " + config + " #" + "\n") # Open the results file. out = open(self.results_dir + sep + "NOE_viol_" + config, 'w') out_sorted = open( self.results_dir + sep + "NOE_viol_" + config + "_sorted", 'w') out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) # Create the data pipe. self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") # Read the first structure. self.interpreter.structure.read_pdb( "ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models + 1))) # Load all protons as the sequence. self.interpreter.structure.load_spins("@H*", ave_pos=False) # Create the pseudo-atoms. for i in range(len(self.pseudo)): self.interpreter.spin.create_pseudo( spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") self.interpreter.sequence.display() # Read the NOE list. self.interpreter.noe.read_restraints(file=self.noe_file) # Set up the N-state model. self.interpreter.n_state_model.select_model(model="fixed") # Print out. print("\n" * 2 + "# Set up complete #" + "\n" * 10) # Loop over each ensemble. noe_viol = [] for ens in range(self.num_ens): # Print out the ensemble to both the log and screen. if self.log: sys.stdout.write(config + repr(ens) + "\n") sys.stderr.write(config + repr(ens) + "\n") # Delete the old structures and rename the molecule. self.interpreter.structure.delete() # Read the ensemble. self.interpreter.structure.read_pdb( "ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models + 1))) # Get the atomic positions. self.interpreter.structure.get_pos(ave_pos=False) # Calculate the average NOE potential. self.interpreter.minimise.calculate() # Sum the violations. cdp.sum_viol = 0.0 for i in range(len(cdp.ave_dist)): if cdp.quad_pot[i][2]: cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] # Write out the NOE violation. noe_viol.append([cdp.sum_viol, ens]) out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) # Save the state. self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) # Sort the NOE violations. noe_viol.sort() # Write the data. for i in range(len(noe_viol)): out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0])) def rdc_analysis(self): """Perform the RDC part of the analysis.""" # Redirect STDOUT to a log file. if self.log: sys.stdout = open( self.results_dir + sep + "logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') # The dipolar constant. d = 0.0 if self.bond_length != None: d = 3.0 / (2.0 * pi) * dipolar_constant( periodic_table.gyromagnetic_ratio('13C'), periodic_table.gyromagnetic_ratio('1H'), self.bond_length) # Create a directory for the save files. dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name mkdir_nofail(dir=dir) # Loop over the configurations. for config in self.configs: # Print out. print("\n" * 10 + "# Set up for config " + config + " #" + "\n") # Open the results files. out = open(self.results_dir + sep + "Q_factors_" + config, 'w') out_sorted = open( self.results_dir + sep + "Q_factors_" + config + "_sorted", 'w') out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) # Create the data pipe. self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") # Read the first structure. self.interpreter.structure.read_pdb( "ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models + 1))) # Load all spins as the sequence. self.interpreter.structure.load_spins(ave_pos=False) # Create the pseudo-atoms. for i in range(len(self.pseudo)): self.interpreter.spin.create_pseudo( spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") self.interpreter.sequence.display() # Read the RDC data. self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) # Define the magnetic dipole-dipole relaxation interaction. if self.bond_length != None: self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) else: self.interpreter.interatom.read_dist( file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) # Set the nuclear isotope. self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') # Set up the model. self.interpreter.n_state_model.select_model(model="fixed") # Print out. print("\n" * 2 + "# Set up complete #" + "\n" * 10) # Loop over each ensemble. q_factors = [] for ens in range(self.num_ens): # Print out the ensemble to both the log and screen. if self.log: sys.stdout.write(config + repr(ens) + "\n") sys.stderr.write(config + repr(ens) + "\n") # Delete the old structures. self.interpreter.structure.delete() # Read the ensemble. self.interpreter.structure.read_pdb( "ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models + 1))) # Get the positional information, then load the CH vectors. self.interpreter.structure.get_pos(ave_pos=False) if self.bond_length != None: self.interpreter.interatom.set_dist( spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) else: self.interpreter.interatom.read_dist( file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) self.interpreter.interatom.unit_vectors(ave=False) # Minimisation. #minimise.grid_search(inc=4) self.interpreter.minimise.execute("simplex", constraints=False) # Store and write out the Q factors. q_factors.append([cdp.q_rdc_norm_squared_sum, ens]) out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc_norm_squared_sum, cdp.q_rdc_norm_squared_sum)) # Calculate the alignment tensor in Hz, and store it for reference. cdp.align_tensor_Hz = d * cdp.align_tensors[0].A cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D # Save the state. self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) # Sort the NOE violations. q_factors.sort() # Write the data. for i in range(len(q_factors)): out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0])) def sample(self): """Generate the ensembles by random sampling of the snapshots.""" # Create the directory for the ensembles, if needed. mkdir_nofail(dir=self.results_dir + sep + "ensembles") # Loop over the configurations. for conf_index in range(len(self.configs)): # Loop over each ensemble. for ens in range(self.num_ens): # Random sampling. rand = [] for j in range(self.num_models): rand.append( randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) # Print out. print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) # The file name. file_name = "ensembles" + sep + self.configs[ conf_index] + repr(ens) + ".pdb" # Open the output file. out = open(self.results_dir + sep + file_name, 'w') # Header. out.write("REM Structures: " + repr(rand) + "\n") # Concatenation the files. for j in range(self.num_models): # The random file. rand_name = self.snapshot_dir[ conf_index] + sep + self.configs[conf_index] + repr( rand[j]) + ".pdb" # Append the file. out.write(open(rand_name).read()) # Close the file. out.close() def superimpose(self): """Superimpose the ensembles using fit to first in Molmol.""" # Create the output directory. mkdir_nofail("ensembles_superimposed") # Logging turned on. if self.log: log = open( self.results_dir + sep + "logs" + sep + "superimpose_molmol.stderr", 'w') sys.stdout = open( self.results_dir + sep + "logs" + sep + "superimpose.log", 'w') # Loop over S and R. for config in ["R", "S"]: # Loop over each ensemble. for ens in range(self.num_ens): # The file names. file_in = "ensembles" + sep + config + repr(ens) + ".pdb" file_out = "ensembles_superimposed" + sep + config + repr( ens) + ".pdb" # Print out. sys.stderr.write( "Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) if self.log: log.write( "\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) # Failure handling (if a failure occurred and this is rerun, skip all existing files). if access(self.results_dir + sep + file_out, F_OK): continue # Open the Molmol pipe. pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) # Init all. pipe.stdin.write("InitAll yes\n") # Read the PDB. pipe.stdin.write("ReadPdb " + self.results_dir + sep + file_in + "\n") # Fitting to mean. pipe.stdin.write("Fit to_first 'selected'\n") pipe.stdin.write("Fit to_mean 'selected'\n") # Write the result. pipe.stdin.write("WritePdb " + self.results_dir + sep + file_out + "\n") # End Molmol. pipe.stdin.close() # Get STDOUT and STDERR. sys.stdout.write(pipe.stdout.read()) if self.log: log.write(pipe.stderr.read()) # Close the pipe. pipe.stdout.close() pipe.stderr.close() # Open the superimposed file in relax. self.interpreter.reset() self.interpreter.pipe.create('out', 'N-state') self.interpreter.structure.read_pdb(file_out) # Fix the retarded MOLMOL proton naming. for model in cdp.structure.structural_data: # Alias. mol = model.mol[0] # Loop over all atoms. for i in range(len(mol.atom_name)): # A proton. if search('H', mol.atom_name[i]): mol.atom_name[ i] = mol.atom_name[i][1:] + mol.atom_name[i][0] # Replace the superimposed file. self.interpreter.structure.write_pdb( config + repr(ens) + ".pdb", dir=self.results_dir + sep + "ensembles_superimposed", force=True)