class Stereochem_analysis:
    """Class for performing the relative stereochemistry analysis."""

    def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
        """Set up for the stereochemistry analysis.

        @keyword stage:             Stage of analysis (see the module docstring above for the options).  
        @type stage:                int
        @keyword results_dir:       The optional directory to place all results files into.
        @type results_dir:          None or str
        @keyword num_ens:           Number of ensembles.
        @type num_ens:              int
        @keyword num_models:        Ensemble size.
        @type num_models:           int
        @keyword configs:           All the configurations.
        @type configs:              list of str
        @keyword snapshot_dir:      Snapshot directories (corresponding to the configurations).
        @type snapshot_dir:         list of str
        @keyword snapshot_min:      The number of the first snapshots (corresponding to the configurations).
        @type snapshot_min:         list of int
        @keyword snapshot_max:      The number of the last snapshots (corresponding to the configurations).
        @type snapshot_max:         list of int
        @keyword pseudo:            The list of pseudo-atoms.  Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom.  For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]].
        @type pseudo:               list of list of str and list of str
        @keyword noe_file:          The name of the NOE restraint file.
        @type noe_file:             str
        @keyword noe_norm:          The NOE normalisation factor (equal to the sum of all NOEs squared).
        @type noe_norm:             float
        @keyword rdc_name:          The label for this RDC data set.
        @type rdc_name:             str
        @keyword rdc_file:          The name of the RDC file.
        @type rdc_file:             str
        @keyword rdc_spin_id1_col:  The spin ID column of the first spin in the RDC file.
        @type rdc_spin_id1_col:     None or int
        @keyword rdc_spin_id2_col:  The spin ID column of the second spin in the RDC file.
        @type rdc_spin_id2_col:     None or int
        @keyword rdc_data_col:      The data column of the RDC file.
        @type rdc_data_col:         int
        @keyword rdc_error_col:     The error column of the RDC file.
        @type rdc_error_col:        int
        @keyword bond_length:       The bond length value in meters.  This overrides the bond_length_file argument.
        @type bond_length:          float or None
        @keyword bond_length_file:  The file of bond lengths for each atom pair in meters.  The first and second columns must be the spin ID strings and the third column must contain the data.
        @type bond_length_file:     float or None
        @keyword log:               Log file output flag (only for certain stages).
        @type log:                  bool
        @keyword bucket_num:        Number of buckets for the distribution plots.
        @type bucket_num:           int
        @keyword lower_lim_noe:     Distribution plot limits.
        @type lower_lim_noe:        int
        @keyword upper_lim_noe:     Distribution plot limits.
        @type upper_lim_noe:        int
        @keyword lower_lim_rdc:     Distribution plot limits.
        @type lower_lim_rdc:        int
        @keyword upper_lim_rdc:     Distribution plot limits.
        @type upper_lim_rdc:        int
        """

        # Execution lock.
        status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis')

        # Set up the analysis status object.
        status.init_auto_analysis('stereochem', type='stereochem')
        status.current_analysis = 'auto stereochem analysis'

        # Store all the args.
        self.stage = stage
        self.results_dir = results_dir
        self.num_ens = num_ens
        self.num_models = num_models
        self.configs = configs
        self.snapshot_dir = snapshot_dir
        self.snapshot_min = snapshot_min
        self.snapshot_max = snapshot_max
        self.pseudo = pseudo
        self.noe_file = noe_file
        self.noe_norm = noe_norm
        self.rdc_name = rdc_name
        self.rdc_file = rdc_file
        self.rdc_spin_id1_col = rdc_spin_id1_col
        self.rdc_spin_id2_col = rdc_spin_id2_col
        self.rdc_data_col = rdc_data_col
        self.rdc_error_col = rdc_error_col
        self.bond_length = bond_length
        self.bond_length_file = bond_length_file
        self.log = log
        self.bucket_num = bucket_num
        self.lower_lim_noe = lower_lim_noe
        self.upper_lim_noe = upper_lim_noe
        self.lower_lim_rdc = lower_lim_rdc
        self.upper_lim_rdc = upper_lim_rdc

        # Load the interpreter.
        self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True)
        self.interpreter.populate_self()
        self.interpreter.on(verbose=False)

        # Create the results directory.
        if self.results_dir:
            mkdir_nofail(self.results_dir)

        # Or use the current working directory.
        else:
            self.results_dir = getcwd()

        # Create a directory for log files.
        if self.log:
            mkdir_nofail(self.results_dir + sep + "logs")

        # Finish and unlock execution.
        status.auto_analysis['stereochem'].fin = True
        status.current_analysis = None
        status.exec_lock.release()


    def run(self):
        """Execute the given stage of the analysis."""

        # Store the original STDOUT.
        self.stdout_orig = sys.stdout

        # Sampling of snapshots.
        if self.stage == 1:
            self.sample()

        # NOE violation analysis.
        elif self.stage == 2:
            self.noe_viol()

        # Ensemble superimposition.
        elif self.stage == 3:
            self.superimpose()

        # RDC Q-factor analysis.
        elif self.stage == 4:
            self.rdc_analysis()

        # Grace plot creation.
        elif self.stage == 5:
            self.grace_plots()

        # Final combined Q ordering.
        elif self.stage == 6:
            self.combined_q()

        # Unknown stage.
        else:
            raise RelaxError("The stage number %s is unknown." % self.stage)

        # Restore STDOUT.
        sys.stdout = self.stdout_orig


    def combined_q(self):
        """Calculate the combined Q-factor.

        The combined Q is defined as::

            Q_total^2 = Q_NOE^2 + Q_RDC^2,

        and the NOE Q-factor as::

            Q^2 = U / sum(NOE_i^2),

        where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.
        """

        # Checks.
        if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
            raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted")
        if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
            raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted")

        # Loop over the configurations.
        for i in range(len(self.configs)):
            # Print out.
            print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i])

            # Open the NOE results file and read the data.
            file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
            noe_lines = file.readlines()
            file.close()

            # Open the RDC results file and read the data.
            file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
            rdc_lines = file.readlines()
            file.close()

            # The combined Q-factor file.
            out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w')
            out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w')

            # Loop over the data (skipping the header line).
            data = []
            for j in range(1, len(noe_lines)):
                # Split the lines.
                ens = int(noe_lines[j].split()[0])
                noe_viol = float(noe_lines[j].split()[1])
                q_rdc = float(rdc_lines[j].split()[1])

                # The NOE Q-factor.
                q_noe = sqrt(noe_viol/self.noe_norm)

                # Combined Q.
                q = sqrt(q_noe**2 + q_rdc**2)

                # Write out the unsorted list.
                out.write("%-20i%20.15f\n" % (ens, q))

                # Store the values.
                data.append([q, ens])

            # Sort the combined Q.
            data.sort()

            # Write the data.
            for i in range(len(data)):
                out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0]))

            # Close the files.
            out.close()
            out_sorted.close()


    def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):
        """Create the distribution data structure."""

        # The bin width.
        bin_width = (upper - lower)/float(inc)

        # Init the dist object.
        dist = []
        for i in range(inc):
            dist.append([bin_width*i+lower, 0])

        # Loop over the values.
        for val in values:
            # The bin.
            bin = int((val - lower)/bin_width)

            # Outside of the limits.
            if bin < 0 or bin >= inc:
                print("Outside of the limits: '%s'" % val)
                continue

            # Increment the count.
            dist[bin][1] = dist[bin][1] + 1

        # Convert the counts to frequencies.
        total_pr = 0.0
        for i in range(inc):
            dist[i][1] = dist[i][1] / float(len(values))
            total_pr = total_pr + dist[i][1]

        print("Total Pr: %s" % total_pr)

        # Return the dist.
        return dist


    def grace_plots(self):
        """Generate grace plots of the results."""

        # The number of configs.
        n = len(self.configs)

        # The colours for the different configs.
        defaults = [4, 2]    # Blue and red.
        colours = []
        for i in range(n):
            # Default colours.
            if i < len(defaults):
                colours.append(defaults[i])

            # Otherwise black!
            else:
                colours.append(0)

        # The ensemble number text.
        ens_text = ''
        dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1]
        num_ens = self.num_ens
        for i in range(len(dividers)):
            # The number.
            num = int(num_ens / dividers[i])

            # The text.
            if num:
                text = repr(num)
            elif not num and ens_text:
                text = '000'
            else:
                continue

            # Update the text.
            ens_text = ens_text + text

            # A comma.
            if i < len(dividers)-1:
                ens_text = ens_text + ','

            # Remove the front part of the number.
            num_ens = num_ens - dividers[i]*num

        # Subtitle for all graphs.
        subtitle = '%s ensembles of %s' % (ens_text, self.num_models)

        # NOE violations.
        if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
            # Print out.
            print("Generating NOE violation Grace plots.")

            # Open the output files.
            grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w')
            grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w')

            # Loop over the configurations.
            data = []
            dist = []
            for i in range(n):
                # Open the results file and read the data.
                file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted")
                lines = file.readlines()
                file.close()

                # Add a new graph set.
                data.append([])

                # Loop over the ensembles and extract the NOE violation.
                noe_viols = []
                for j in range(1, len(lines)):
                    # Extract the violation.
                    viol = float(lines[j].split()[1])
                    noe_viols.append(viol)

                    # Add to the data structure.
                    data[i].append([j, viol])

                # Calculate the R distribution.
                dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe))

            # Headers.
            write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]])
            write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]])

            # Write the data.
            write_xy_data([data], file=grace_curve, graph_type='xy')
            write_xy_data([dist], file=grace_dist, graph_type='xy')

            # Close the files.
            grace_curve.close()
            grace_dist.close()

        # RDC Q-factors.
        if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
            # Print out.
            print("Generating RDC Q-factor Grace plots.")

            # Open the Grace output files.
            grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w')
            grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w')

            # Loop over the configurations.
            data = []
            dist = []
            for i in range(n):
                # Open the results file and read the data.
                file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted")
                lines = file.readlines()
                file.close()

                # Add a new graph set.
                data.append([])

                # Loop over the Q-factors.
                values = []
                for j in range(1, len(lines)):
                    # Extract the violation.
                    value = float(lines[j].split()[1])
                    values.append(value)

                    # Add to the data structure.
                    data[i].append([j, value])

                # Calculate the R distribution.
                dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc))

            # Headers.
            write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]])
            write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]])

            # Write the data.
            write_xy_data([data], file=grace_curve, graph_type='xy')
            write_xy_data([dist], file=grace_dist, graph_type='xy')

            # Close the files.
            grace_curve.close()
            grace_dist.close()

        # NOE-RDC correlation plots.
        if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
            # Print out.
            print("Generating NOE-RDC correlation Grace plots.")

            # Open the Grace output files.
            grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w')
            grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w')

            # Grace data.
            data = []
            data_scaled = []
            for i in range(len(self.configs)):
                # Open the NOE results file and read the data.
                file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
                noe_lines = file.readlines()
                file.close()

                # Add a new graph set.
                data.append([])
                data_scaled.append([])

                # Open the RDC results file and read the data.
                file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
                rdc_lines = file.readlines()
                file.close()

                # Loop over the data.
                for j in range(1, len(noe_lines)):
                    # Split the lines.
                    noe_viol = float(noe_lines[j].split()[1])
                    q_factor = float(rdc_lines[j].split()[1])

                    # Add the xy pair.
                    data[i].append([noe_viol, q_factor])
                    data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor])

            # Write the data.
            write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
            write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
            write_xy_data([data], file=grace_file, graph_type='xy')
            write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')


    def noe_viol(self):
        """NOE violation calculations."""

        # Redirect STDOUT to a log file.
        if self.log:
            sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w')

        # Create a directory for the save files.
        dir = self.results_dir + sep + "NOE_results"
        mkdir_nofail(dir=dir)

        # Loop over the configurations.
        for config in self.configs:
            # Print out.
            print("\n"*10 + "# Set up for config " + config + " #" + "\n")

            # Open the results file.
            out = open(self.results_dir+sep+"NOE_viol_" + config, 'w')
            out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w')
            out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
            out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))

            # Create the data pipe.
            self.interpreter.pipe.create("noe_viol_%s" % config, "N-state")

            # Read the first structure.
            self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))

            # Load all protons as the sequence.
            self.interpreter.structure.load_spins("@H*", ave_pos=False)

            # Create the pseudo-atoms.
            for i in range(len(self.pseudo)):
                self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
            self.interpreter.sequence.display()

            # Read the NOE list.
            self.interpreter.noe.read_restraints(file=self.noe_file)

            # Set up the N-state model.
            self.interpreter.n_state_model.select_model(model="fixed")

            # Print out.
            print("\n"*2 + "# Set up complete #" + "\n"*10)

            # Loop over each ensemble.
            noe_viol = []
            for ens in range(self.num_ens):
                # Print out the ensemble to both the log and screen.
                if self.log:
                    sys.stdout.write(config + repr(ens) + "\n")
                sys.stderr.write(config + repr(ens) + "\n")

                # Delete the old structures and rename the molecule.
                self.interpreter.structure.delete()

                # Read the ensemble.
                self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))

                # Get the atomic positions.
                self.interpreter.structure.get_pos(ave_pos=False)

                # Calculate the average NOE potential.
                self.interpreter.calc()

                # Sum the violations.
                cdp.sum_viol = 0.0
                for i in range(len(cdp.ave_dist)):
                    if cdp.quad_pot[i][2]:
                        cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2]

                # Write out the NOE violation.
                noe_viol.append([cdp.sum_viol, ens])
                out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol))

                # Save the state.
                self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)

            # Sort the NOE violations.
            noe_viol.sort()

            # Write the data.
            for i in range(len(noe_viol)):
                out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))


    def rdc_analysis(self):
        """Perform the RDC part of the analysis."""

        # Redirect STDOUT to a log file.
        if self.log:
            sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w')

        # The dipolar constant.
        d = 0.0
        if self.bond_length != None:
            d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length)

        # Create a directory for the save files.
        dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name
        mkdir_nofail(dir=dir)

        # Loop over the configurations.
        for config in self.configs:
            # Print out.
            print("\n"*10 + "# Set up for config " + config + " #" + "\n")

            # Open the results files.
            out = open(self.results_dir+sep+"Q_factors_" + config, 'w')
            out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w')
            out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)"))
            out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)"))

            # Create the data pipe.
            self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state")

            # Read the first structure.
            self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))

            # Load all spins as the sequence.
            self.interpreter.structure.load_spins(ave_pos=False)

            # Create the pseudo-atoms.
            for i in range(len(self.pseudo)):
                self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
            self.interpreter.sequence.display()

            # Read the RDC data.
            self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col)

            # Define the magnetic dipole-dipole relaxation interaction.
            if self.bond_length != None:
                self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
                self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length)
            else:
                self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)

            # Set the nuclear isotope.
            self.interpreter.spin.isotope(isotope='13C', spin_id='@C*')
            self.interpreter.spin.isotope(isotope='1H', spin_id='@H*')
            self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*')

            # Set up the model.
            self.interpreter.n_state_model.select_model(model="fixed")

            # Print out.
            print("\n"*2 + "# Set up complete #" + "\n"*10)

            # Loop over each ensemble.
            q_factors = []
            for ens in range(self.num_ens):
                # Print out the ensemble to both the log and screen.
                if self.log:
                    sys.stdout.write(config + repr(ens) + "\n")
                sys.stderr.write(config + repr(ens) + "\n")

                # Delete the old structures.
                self.interpreter.structure.delete()

                # Read the ensemble.
                self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))

                # Get the positional information, then load the CH vectors.
                self.interpreter.structure.get_pos(ave_pos=False)
                if self.bond_length != None:
                    self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
                else:
                    self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)
                self.interpreter.interatom.unit_vectors(ave=False)

                # Minimisation.
                #grid_search(inc=4)
                self.interpreter.minimise("simplex", constraints=False)

                # Store and write out the Q-factors.
                q_factors.append([cdp.q_rdc, ens])
                out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2))

                # Calculate the alignment tensor in Hz, and store it for reference.
                cdp.align_tensor_Hz = d * cdp.align_tensors[0].A
                cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D

                # Save the state.
                self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)

            # Sort the NOE violations.
            q_factors.sort()

            # Write the data.
            for i in range(len(q_factors)):
                out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))


    def sample(self):
        """Generate the ensembles by random sampling of the snapshots."""

        # Create the directory for the ensembles, if needed.
        mkdir_nofail(dir=self.results_dir + sep + "ensembles")

        # Loop over the configurations.
        for conf_index in range(len(self.configs)):
            # Loop over each ensemble.
            for ens in range(self.num_ens):
                # Random sampling.
                rand = []
                for j in range(self.num_models):
                    rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index]))

                # Print out.
                print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand))

                # The file name.
                file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb"

                # Open the output file.
                out = open(self.results_dir+sep+file_name, 'w')

                # Header.
                out.write("REM Structures: " + repr(rand) + "\n")

                # Concatenation the files.
                for j in range(self.num_models):
                    # The random file.
                    rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb"

                    # Append the file.
                    out.write(open(rand_name).read())

                # Close the file.
                out.close()


    def superimpose(self):
        """Superimpose the ensembles using fit to first in Molmol."""

        # Create the output directory.
        mkdir_nofail("ensembles_superimposed")

        # Logging turned on.
        if self.log:
            log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w')
            sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w')

        # Loop over S and R.
        for config in ["R", "S"]:
            # Loop over each ensemble.
            for ens in range(self.num_ens):
                # The file names.
                file_in = "ensembles" + sep + config + repr(ens) + ".pdb"
                file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb"

                # Print out.
                sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
                if self.log:
                    log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out))

                # Failure handling (if a failure occurred and this is rerun, skip all existing files).
                if access(self.results_dir+sep+file_out, F_OK):
                    continue

                # Open the Molmol pipe.
                pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False)

                # Init all.
                pipe.stdin.write("InitAll yes\n")

                # Read the PDB.
                pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n")

                # Fitting to mean.
                pipe.stdin.write("Fit to_first 'selected'\n")
                pipe.stdin.write("Fit to_mean 'selected'\n")

                # Write the result.
                pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n")

                # End Molmol.
                pipe.stdin.close()

                # Get STDOUT and STDERR.
                sys.stdout.write(pipe.stdout.read())
                if self.log:
                    log.write(pipe.stderr.read())

                # Close the pipe.
                pipe.stdout.close()
                pipe.stderr.close()

                # Open the superimposed file in relax.
                self.interpreter.reset()
                self.interpreter.pipe.create('out', 'N-state')
                self.interpreter.structure.read_pdb(file_out)

                # Fix the retarded MOLMOL proton naming.
                for model in cdp.structure.structural_data:
                    # Alias.
                    mol = model.mol[0]

                    # Loop over all atoms.
                    for i in range(len(mol.atom_name)):
                        # A proton.
                        if search('H', mol.atom_name[i]):
                            mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0]

                # Replace the superimposed file.
                self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
Exemplo n.º 2
0
class dAuvergne_protocol:
    """The model-free auto-analysis."""

    # Some class variables.
    opt_func_tol = 1e-25
    opt_max_iterations = int(1e7)

    def __init__(self, pipe_name=None, pipe_bundle=None, results_dir=None, write_results_dir=None, diff_model=None, mf_models=['m0', 'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9'], local_tm_models=['tm0', 'tm1', 'tm2', 'tm3', 'tm4', 'tm5', 'tm6', 'tm7', 'tm8', 'tm9'], grid_inc=11, diff_tensor_grid_inc={'sphere': 11, 'prolate': 11, 'oblate': 11, 'ellipsoid': 6}, min_algor='newton', mc_sim_num=500, max_iter=None, user_fns=None, conv_loop=True):
        """Perform the full model-free analysis protocol of d'Auvergne and Gooley, 2008b.

        @keyword pipe_name:             The name of the data pipe containing the sequence info.  This data pipe should have all values set including the CSA value, the bond length, the heteronucleus name and proton name.  It should also have all relaxation data loaded.
        @type pipe_name:                str
        @keyword pipe_bundle:           The data pipe bundle to associate all spawned data pipes with.
        @type pipe_bundle:              str
        @keyword results_dir:           The directory where optimisation results will read from.  Results will also be saved to this directory if the write_results_dir argument is not given.
        @type results_dir:              str
        @keyword write_results_dir:     The directory where optimisation results will be saved in.  If None, it will default to the value of the results_dir argument.  This is mainly used for debugging.
        @type write_results_dir:        str or None
        @keyword diff_model:            The global diffusion model to optimise.  This can be one of 'local_tm', 'sphere', 'oblate', 'prolate', 'ellipsoid', or 'final'.  If all or a subset of these are supplied as a list, then these will be automatically looped over and calculated.
        @type diff_model:               str or list of str
        @keyword mf_models:             The model-free models.
        @type mf_models:                list of str
        @keyword local_tm_models:       The model-free models.
        @type local_tm_models:          list of str
        @keyword grid_inc:              The grid search size (the number of increments per dimension).
        @type grid_inc:                 int
        @keyword diff_tensor_grid_inc:  A list of grid search sizes for the optimisation of the sphere, prolate spheroid, oblate spheroid, and ellipsoid.
        @type diff_tensor_grid_inc:     list of int
        @keyword min_algor:             The minimisation algorithm (in most cases this should not be changed).
        @type min_algor:                str
        @keyword mc_sim_num:            The number of Monte Carlo simulations to be used for error analysis at the end of the analysis.
        @type mc_sim_num:               int
        @keyword max_iter:              The maximum number of iterations for the global iteration.  Set to None, then the algorithm iterates until convergence.
        @type max_iter:                 int or None.
        @keyword user_fns:              A dictionary of replacement user functions.  These will overwrite the standard user functions.  The key should be the name of the user function or user function class and the value should be the function or class instance.
        @type user_fns:                 dict
        @keyword conv_loop:             Automatic looping over all rounds until convergence.
        @type conv_loop:                bool
        """

        # Execution lock.
        status.exec_lock.acquire(pipe_bundle, mode='auto-analysis')

        # Store the args.
        self.pipe_name = pipe_name
        self.pipe_bundle = pipe_bundle
        self.mf_models = mf_models
        self.local_tm_models = local_tm_models
        self.grid_inc = grid_inc
        self.diff_tensor_grid_inc = diff_tensor_grid_inc
        self.min_algor = min_algor
        self.mc_sim_num = mc_sim_num
        self.max_iter = max_iter
        self.conv_loop = conv_loop

        # The model-free data pipe names.
        self.mf_model_pipes = []
        for i in range(len(self.mf_models)):
            self.mf_model_pipes.append(self.name_pipe(self.mf_models[i]))
        self.local_tm_model_pipes = []
        for i in range(len(self.local_tm_models)):
            self.local_tm_model_pipes.append(self.name_pipe(self.local_tm_models[i]))

        # The diffusion models.
        if isinstance(diff_model, list):
            self.diff_model_list = diff_model
        else:
            self.diff_model_list = [diff_model]

        # Project directory (i.e. directory containing the model-free model results and the newly generated files)
        if results_dir:
            self.results_dir = results_dir + sep
        else:
            self.results_dir = getcwd() + sep
        if write_results_dir:
            self.write_results_dir = write_results_dir + sep
        else:
            self.write_results_dir = self.results_dir

        # Data checks.
        self.check_vars()

        # Set the data pipe to the current data pipe.
        if self.pipe_name != cdp_name():
            switch(self.pipe_name)

        # Some info for the status.
        self.status_setup()

        # Load the interpreter.
        self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True)
        self.interpreter.populate_self()
        self.interpreter.on(verbose=False)

        # Replacement user functions.
        if user_fns:
            for name in user_fns:
                setattr(self.interpreter, name, user_fns[name])

        # Execute the protocol.
        try:
            # Loop over the models.
            for self.diff_model in self.diff_model_list:
                # Wait a little while between diffusion models.
                sleep(1)

                # Set the global model name.
                status.auto_analysis[self.pipe_bundle].diff_model = self.diff_model

                # Initialise the convergence data structures.
                self.conv_data = Container()
                self.conv_data.chi2 = []
                self.conv_data.models = []
                self.conv_data.diff_vals = []
                if self.diff_model == 'sphere':
                    self.conv_data.diff_params = ['tm']
                elif self.diff_model == 'oblate' or self.diff_model == 'prolate':
                    self.conv_data.diff_params = ['tm', 'Da', 'theta', 'phi']
                elif self.diff_model == 'ellipsoid':
                    self.conv_data.diff_params = ['tm', 'Da', 'Dr', 'alpha', 'beta', 'gamma']
                self.conv_data.spin_ids = []
                self.conv_data.mf_params = []
                self.conv_data.mf_vals = []

                # Execute the analysis for each diffusion model.
                self.execute()

        # Clean up.
        finally:
            # Finish and unlock execution.
            status.auto_analysis[self.pipe_bundle].fin = True
            status.current_analysis = None
            status.exec_lock.release()


    def check_vars(self):
        """Check that the user has set the variables correctly."""

        # The pipe bundle.
        if not isinstance(self.pipe_bundle, str):
            raise RelaxError("The pipe bundle name '%s' is invalid." % self.pipe_bundle)

        # The diff model.
        valid_models = ['local_tm', 'sphere', 'oblate', 'prolate', 'ellipsoid', 'final']
        for i in range(len(self.diff_model_list)):
            if self.diff_model_list[i] not in valid_models:
                raise RelaxError("The diff_model value '%s' is incorrectly set.  It must be one of %s." % (self.diff_model_list[i], valid_models))

        # Model-free models.
        mf_models = ['m0', 'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9']
        local_tm_models = ['tm0', 'tm1', 'tm2', 'tm3', 'tm4', 'tm5', 'tm6', 'tm7', 'tm8', 'tm9']
        if not isinstance(self.mf_models, list):
            raise RelaxError("The self.mf_models user variable must be a list.")
        if not isinstance(self.local_tm_models, list):
            raise RelaxError("The self.local_tm_models user variable must be a list.")
        for i in range(len(self.mf_models)):
            if self.mf_models[i] not in mf_models:
                raise RelaxError("The self.mf_models user variable '%s' is incorrectly set.  It must be one of %s." % (self.mf_models, mf_models))
        for i in range(len(self.local_tm_models)):
            if self.local_tm_models[i] not in local_tm_models:
                raise RelaxError("The self.local_tm_models user variable '%s' is incorrectly set.  It must be one of %s." % (self.local_tm_models, local_tm_models))

        # Sequence data.
        if not exists_mol_res_spin_data():
            raise RelaxNoSequenceError(self.pipe_name)

        # Relaxation data.
        if not hasattr(cdp, 'ri_ids') or len(cdp.ri_ids) == 0:
            raise RelaxNoRiError(ri_id)

        # Insufficient data.
        if len(cdp.ri_ids) <= 3:
            raise RelaxError("Insufficient relaxation data, 4 or more data sets are essential for the execution of this script.")

        # Spin vars.
        for spin, spin_id in spin_loop(return_id=True):
            # Skip deselected spins.
            if not spin.select:
                continue

            # Test if the isotope type has been set.
            if not hasattr(spin, 'isotope') or spin.isotope == None:
                raise RelaxNoValueError("nuclear isotope type", spin_id=spin_id)

            # Skip spins with no relaxation data.
            if not hasattr(spin, 'ri_data') or spin.ri_data == None:
                continue

            # Test if the CSA value has been set.
            if not hasattr(spin, 'csa') or spin.csa == None:
                raise RelaxNoValueError("CSA", spin_id=spin_id)

        # Interatomic vars.
        for interatom in interatomic_loop():
            # Get the corresponding spins.
            spin1 = return_spin(interatom.spin_id1)
            spin2 = return_spin(interatom.spin_id2)

            # Skip deselected spins.
            if not spin1.select or not spin2.select:
                continue

            # Test if the interatomic distance has been set.
            if not hasattr(interatom, 'r') or interatom.r == None:
                raise RelaxNoValueError("interatomic distance", spin_id=interatom.spin_id1, spin_id2=interatom.spin_id2)

        # Min vars.
        if not isinstance(self.grid_inc, int):
            raise RelaxError("The grid_inc user variable '%s' is incorrectly set.  It should be an integer." % self.grid_inc)
        if not isinstance(self.diff_tensor_grid_inc, dict):
            raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set.  It should be a dictionary." % self.diff_tensor_grid_inc)
        for tensor in ['sphere', 'prolate', 'oblate', 'ellipsoid']:
            if not tensor in self.diff_tensor_grid_inc:
                raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set.  It should contain the '%s' key." % (self.diff_tensor_grid_inc, tensor))
            if not isinstance(self.diff_tensor_grid_inc[tensor], int):
                raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set.  The value corresponding to the key '%s' should be an integer." % (self.diff_tensor_grid_inc, tensor))
        if not isinstance(self.min_algor, str):
            raise RelaxError("The min_algor user variable '%s' is incorrectly set.  It should be a string." % self.min_algor)
        if not isinstance(self.mc_sim_num, int):
            raise RelaxError("The mc_sim_num user variable '%s' is incorrectly set.  It should be an integer." % self.mc_sim_num)

        # Looping.
        if not isinstance(self.conv_loop, bool):
            raise RelaxError("The conv_loop user variable '%s' is incorrectly set.  It should be one of the booleans True or False." % self.conv_loop)


    def convergence(self):
        """Test for the convergence of the global model."""

        # Print out.
        print("\n\n\n")
        print("#####################")
        print("# Convergence tests #")
        print("#####################\n")

        # Maximum number of iterations reached.
        if self.max_iter and self.round > self.max_iter:
            print("Maximum number of global iterations reached.  Terminating the protocol before convergence has been reached.")
            return True

        # Store the data of the current data pipe.
        self.conv_data.chi2.append(cdp.chi2)

        # Create a string representation of the model-free models of the current data pipe.
        curr_models = ''
        for spin in spin_loop():
            if hasattr(spin, 'model'):
                if not spin.model == 'None':
                    curr_models = curr_models + spin.model
        self.conv_data.models.append(curr_models)

        # Store the diffusion tensor parameters.
        self.conv_data.diff_vals.append([])
        for param in self.conv_data.diff_params:
            # Get the parameter values.
            self.conv_data.diff_vals[-1].append(getattr(cdp.diff_tensor, param))

        # Store the model-free parameters.
        self.conv_data.mf_vals.append([])
        self.conv_data.mf_params.append([])
        self.conv_data.spin_ids.append([])
        for spin, spin_id in spin_loop(return_id=True):
            # Skip spin systems with no 'params' object.
            if not hasattr(spin, 'params'):
                continue

            # Add the spin ID, parameters, and empty value list.
            self.conv_data.spin_ids[-1].append(spin_id)
            self.conv_data.mf_params[-1].append([])
            self.conv_data.mf_vals[-1].append([])

            # Loop over the parameters.
            for j in range(len(spin.params)):
                # Get the parameters and values.
                self.conv_data.mf_params[-1][-1].append(spin.params[j])
                self.conv_data.mf_vals[-1][-1].append(getattr(spin, spin.params[j].lower()))

        # No need for tests.
        if self.round == 1:
            print("First round of optimisation, skipping the convergence tests.\n\n\n")
            return False

        # Loop over the iterations.
        converged = False
        for i in range(self.start_round, self.round - 1):
            # Print out.
            print("\n\n\n# Comparing the current iteration to iteration %i.\n" % (i+1))

            # Index.
            index = i - self.start_round

            # Chi-squared test.
            print("Chi-squared test:")
            print("    chi2 (iter %i):  %s" % (i+1, self.conv_data.chi2[index]))
            print("        (as an IEEE-754 byte array:  %s)" % floatAsByteArray(self.conv_data.chi2[index]))
            print("    chi2 (iter %i):  %s" % (self.round, self.conv_data.chi2[-1]))
            print("        (as an IEEE-754 byte array:  %s)" % floatAsByteArray(self.conv_data.chi2[-1]))
            print("    chi2 (difference):  %s" % (self.conv_data.chi2[index] - self.conv_data.chi2[-1]))
            if self.conv_data.chi2[index] == self.conv_data.chi2[-1]:
                print("    The chi-squared value has converged.\n")
            else:
                print("    The chi-squared value has not converged.\n")
                continue

            # Identical model-free model test.
            print("Identical model-free models test:")
            if self.conv_data.models[index] == self.conv_data.models[-1]:
                print("    The model-free models have converged.\n")
            else:
                print("    The model-free models have not converged.\n")
                continue

            # Identical diffusion tensor parameter value test.
            print("Identical diffusion tensor parameter test:")
            params_converged = True
            for k in range(len(self.conv_data.diff_params)):
                # Test if not identical.
                if self.conv_data.diff_vals[index][k] != self.conv_data.diff_vals[-1][k]:
                    print("    Parameter:   %s" % param)
                    print("    Value (iter %i):  %s" % (i+1, self.conv_data.diff_vals[index][k]))
                    print("        (as an IEEE-754 byte array:  %s)" % floatAsByteArray(self.conv_data.diff_vals[index][k]))
                    print("    Value (iter %i):  %s" % (self.round, self.conv_data.diff_vals[-1][k]))
                    print("        (as an IEEE-754 byte array:  %s)" % floatAsByteArray(self.conv_data.diff_vals[-1][k]))
                    print("    The diffusion parameters have not converged.\n")
                    params_converged = False
                    break
            if not params_converged:
                continue
            print("    The diffusion tensor parameters have converged.\n")

            # Identical model-free parameter value test.
            print("\nIdentical model-free parameter test:")
            if len(self.conv_data.spin_ids[index]) != len(self.conv_data.spin_ids[-1]):
                print("    Different number of spins.")
                continue
            for j in range(len(self.conv_data.spin_ids[-1])):
                # Loop over the parameters.
                for k in range(len(self.conv_data.mf_params[-1][j])):
                    # Test if not identical.
                    if self.conv_data.mf_vals[index][j][k] != self.conv_data.mf_vals[-1][j][k]:
                        print("    Spin ID:     %s" % self.conv_data.spin_ids[-1][j])
                        print("    Parameter:   %s" % self.conv_data.mf_params[-1][j][k])
                        print("    Value (iter %i): %s" % (i+1, self.conv_data.mf_vals[index][j][k]))
                        print("        (as an IEEE-754 byte array:  %s)" % floatAsByteArray(self.conv_data.mf_vals[index][j][k]))
                        print("    Value (iter %i): %s" % (self.round, self.conv_data.mf_vals[-1][j][k]))
                        print("        (as an IEEE-754 byte array:  %s)" % floatAsByteArray(self.conv_data.mf_vals[index][j][k]))
                        print("    The model-free parameters have not converged.\n")
                        params_converged = False
                        break
            if not params_converged:
                continue
            print("    The model-free parameters have converged.\n")

            # Convergence.
            converged = True
            break


        # Final printout.
        ##################

        print("\nConvergence:")
        if converged:
            # Update the status.
            status.auto_analysis[self.pipe_bundle].convergence = True

            # Print out.
            print("    [ Yes ]")

            # Return the termination condition.
            return True
        else:
            # Print out.
            print("    [ No ]")

            # Return False to not terminate.
            return False


    def determine_rnd(self, model=None):
        """Function for returning the name of next round of optimisation."""

        # Get a list of all files in the directory model.  If no directory exists, set the round to 'init' or 0.
        try:
            dir_list = listdir(self.results_dir+sep+model)
        except:
            return 0

        # Set the round to 'init' or 0 if there is no directory called 'init'.
        if 'init' not in dir_list:
            return 0

        # Create a list of all files which begin with 'round_'.
        rnd_dirs = []
        for file in dir_list:
            if search('^round_', file):
                rnd_dirs.append(file)

        # Create a sorted list of integer round numbers.
        numbers = []
        for dir in rnd_dirs:
            try:
                numbers.append(int(dir[6:]))
            except:
                pass
        numbers.sort()

        # No directories beginning with 'round_' exist, set the round to 1.
        if not len(numbers):
            return 1

        # The highest number.
        max_round = numbers[-1]

        # Check that the opt/results file exists for the round (working backwards).
        for i in range(max_round, -1, -1):
            # Assume the round is complete.
            complete_round = i

            # The file root.
            file_root = self.results_dir + sep + model + sep + "round_%i" % i + sep + 'opt' + sep + 'results'

            # Stop looping when the opt/results file is found.
            if access(file_root + '.bz2', F_OK):
                break
            if access(file_root + '.gz', F_OK):
                break
            if access(file_root, F_OK):
                break

        # No round, so assume the initial state.
        if complete_round == 0:
            return 0

        # Determine the number for the next round (add 1 to the highest completed round).
        return complete_round + 1


    def execute(self):
        """Execute the protocol."""

        # MI - Local tm.
        ################

        if self.diff_model == 'local_tm':
            # Base directory to place files into.
            self.base_dir = self.results_dir+'local_tm'+sep

            # Sequential optimisation of all model-free models (function must be modified to suit).
            self.multi_model(local_tm=True)

            # Model selection.
            self.model_selection(modsel_pipe=self.name_pipe('aic'), dir=self.base_dir + 'aic')


        # Diffusion models MII to MV.
        #############################

        elif self.diff_model == 'sphere' or self.diff_model == 'prolate' or self.diff_model == 'oblate' or self.diff_model == 'ellipsoid':
            # No local_tm directory!
            dir_list = listdir(self.results_dir)
            if 'local_tm' not in dir_list:
                raise RelaxError("The local_tm model must be optimised first.")

            # The initial round of optimisation - not zero if calculations were interrupted.
            self.start_round = self.determine_rnd(model=self.diff_model)

            # Loop until convergence if conv_loop is set, otherwise just loop once.
            # This looping could be made much cleaner by removing the dependence on the determine_rnd() function.
            while True:
                # Determine which round of optimisation to do (init, round_1, round_2, etc).
                self.round = self.determine_rnd(model=self.diff_model)
                status.auto_analysis[self.pipe_bundle].round = self.round

                # Inital round of optimisation for diffusion models MII to MV.
                if self.round == 0:
                    # Base directory to place files into.
                    self.base_dir = self.results_dir+self.diff_model+sep+'init'+sep

                    # Run name.
                    name = self.name_pipe(self.diff_model)

                    # Create the data pipe (deleting the old one if it exists).
                    if has_pipe(name):
                        self.interpreter.pipe.delete(name)
                    self.interpreter.pipe.create(name, 'mf', bundle=self.pipe_bundle)

                    # Load the local tm diffusion model MI results.
                    self.interpreter.results.read(file='results', dir=self.results_dir+'local_tm'+sep+'aic')

                    # Remove the tm parameter.
                    self.interpreter.model_free.remove_tm()

                    # Add an arbitrary diffusion tensor which will be optimised.
                    if self.diff_model == 'sphere':
                        self.interpreter.diffusion_tensor.init(10e-9, fixed=False)
                        inc = self.diff_tensor_grid_inc['sphere']
                    elif self.diff_model == 'prolate':
                        self.interpreter.diffusion_tensor.init((10e-9, 0, 0, 0), spheroid_type='prolate', fixed=False)
                        inc = self.diff_tensor_grid_inc['prolate']
                    elif self.diff_model == 'oblate':
                        self.interpreter.diffusion_tensor.init((10e-9, 0, 0, 0), spheroid_type='oblate', fixed=False)
                        inc = self.diff_tensor_grid_inc['oblate']
                    elif self.diff_model == 'ellipsoid':
                        self.interpreter.diffusion_tensor.init((10e-09, 0, 0, 0, 0, 0), fixed=False)
                        inc = self.diff_tensor_grid_inc['ellipsoid']

                    # Minimise just the diffusion tensor.
                    self.interpreter.fix('all_spins')
                    self.interpreter.grid_search(inc=inc)
                    self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations)

                    # Write the results.
                    self.interpreter.results.write(file='results', dir=self.base_dir, force=True)


                # Normal round of optimisation for diffusion models MII to MV.
                else:
                    # Base directory to place files into.
                    self.base_dir = self.results_dir+self.diff_model + sep+'round_'+repr(self.round)+sep

                    # Load the optimised diffusion tensor from either the previous round.
                    self.load_tensor()

                    # Sequential optimisation of all model-free models (function must be modified to suit).
                    self.multi_model()

                    # Model selection.
                    self.model_selection(modsel_pipe=self.name_pipe('aic'), dir=self.base_dir + 'aic')

                    # Final optimisation of all diffusion and model-free parameters.
                    self.interpreter.fix('all', fixed=False)

                    # Minimise all parameters.
                    self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations)

                    # Write the results.
                    dir = self.base_dir + 'opt'
                    self.interpreter.results.write(file='results', dir=dir, force=True)

                    # Test for convergence.
                    converged = self.convergence()

                    # Break out of the infinite while loop if automatic looping is not activated or if convergence has occurred.
                    if converged or not self.conv_loop:
                        break

            # Unset the status.
            status.auto_analysis[self.pipe_bundle].round = None


        # Final run.
        ############

        elif self.diff_model == 'final':
            # Diffusion model selection.
            ############################

            # The contents of the results directory.
            dir_list = listdir(self.results_dir)

            # Check that the minimal set of global diffusion models required for the protocol has been optimised.
            min_models = ['local_tm', 'sphere']
            for model in min_models:
                if model not in dir_list:
                    raise RelaxError("The minimum set of global diffusion models required for the protocol have not been optimised, the '%s' model results cannot be found." % model)

            # Build a list of all global diffusion models optimised.
            all_models = ['local_tm', 'sphere', 'prolate', 'oblate', 'ellipsoid']
            self.opt_models = []
            self.pipes = []
            for model in all_models:
                if model in dir_list:
                    self.opt_models.append(model)
                    self.pipes.append(self.name_pipe(model))

            # Remove all temporary pipes used in this auto-analysis.
            for name in pipe_names(bundle=self.pipe_bundle):
                if name in self.pipes + self.mf_model_pipes + self.local_tm_model_pipes + [self.name_pipe('aic'), self.name_pipe('previous')]:
                    self.interpreter.pipe.delete(name)

            # Create the local_tm data pipe.
            self.interpreter.pipe.create(self.name_pipe('local_tm'), 'mf', bundle=self.pipe_bundle)

            # Load the local tm diffusion model MI results.
            self.interpreter.results.read(file='results', dir=self.results_dir+'local_tm'+sep+'aic')

            # Loop over models MII to MV.
            for model in ['sphere', 'prolate', 'oblate', 'ellipsoid']:
                # Skip missing models.
                if model not in self.opt_models:
                    continue

                # Determine which was the last round of optimisation for each of the models.
                self.round = self.determine_rnd(model=model) - 1

                # If no directories begining with 'round_' exist, the script has not been properly utilised!
                if self.round < 1:
                    # Construct the name of the diffusion tensor.
                    name = model
                    if model == 'prolate' or model == 'oblate':
                        name = name + ' spheroid'

                    # Throw an error to prevent misuse of the script.
                    raise RelaxError("Multiple rounds of optimisation of the " + name + " (between 8 to 15) are required for the proper execution of this script.")

                # Create the data pipe.
                self.interpreter.pipe.create(self.name_pipe(model), 'mf', bundle=self.pipe_bundle)

                # Load the diffusion model results.
                self.interpreter.results.read(file='results', dir=self.results_dir+model + sep+'round_'+repr(self.round)+sep+'opt')

            # Model selection between MI to MV.
            self.model_selection(modsel_pipe=self.name_pipe('final'), write_flag=False)


            # Monte Carlo simulations.
            ##########################

            # Fix the diffusion tensor, if it exists.
            if hasattr(get_pipe(self.name_pipe('final')), 'diff_tensor'):
                self.interpreter.fix('diff')

            # Simulations.
            self.interpreter.monte_carlo.setup(number=self.mc_sim_num)
            self.interpreter.monte_carlo.create_data()
            self.interpreter.monte_carlo.initial_values()
            self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations)
            self.interpreter.eliminate()
            self.interpreter.monte_carlo.error_analysis()


            # Write the final results.
            ##########################

            # Create results files and plots of the data.
            self.write_results()


        # Unknown script behaviour.
        ###########################

        else:
            raise RelaxError("Unknown diffusion model, change the value of 'self.diff_model'")


    def load_tensor(self):
        """Function for loading the optimised diffusion tensor."""

        # Create the data pipe for the previous data (deleting the old data pipe first if necessary).
        if has_pipe(self.name_pipe('previous')):
            self.interpreter.pipe.delete(self.name_pipe('previous'))
        self.interpreter.pipe.create(self.name_pipe('previous'), 'mf', bundle=self.pipe_bundle)

        # Load the optimised diffusion tensor from the initial round.
        if self.round == 1:
            self.interpreter.results.read('results', self.results_dir+self.diff_model + sep+'init')

        # Load the optimised diffusion tensor from the previous round.
        else:
            self.interpreter.results.read('results', self.results_dir+self.diff_model + sep+'round_'+repr(self.round-1)+sep+'opt')


    def model_selection(self, modsel_pipe=None, dir=None, write_flag=True):
        """Model selection function."""

        # Model selection (delete the model selection pipe if it already exists).
        if has_pipe(modsel_pipe):
            self.interpreter.pipe.delete(modsel_pipe)
        self.interpreter.model_selection(method='AIC', modsel_pipe=modsel_pipe, bundle=self.pipe_bundle, pipes=self.pipes)

        # Write the results.
        if write_flag:
            self.interpreter.results.write(file='results', dir=dir, force=True)


    def multi_model(self, local_tm=False):
        """Function for optimisation of all model-free models."""

        # Set the data pipe names (also the names of preset model-free models).
        if local_tm:
            models = self.local_tm_models
            self.pipes = self.local_tm_models
        else:
            models = self.mf_models
        self.pipes = []
        for i in range(len(models)):
            self.pipes.append(self.name_pipe(models[i]))

        # Loop over the data pipes.
        for i in range(len(models)):
            # Place the model name into the status container.
            status.auto_analysis[self.pipe_bundle].current_model = models[i]

            # Create the data pipe (by copying).
            if has_pipe(self.pipes[i]):
                self.interpreter.pipe.delete(self.pipes[i])
            self.interpreter.pipe.copy(self.pipe_name, self.pipes[i], bundle_to=self.pipe_bundle)
            self.interpreter.pipe.switch(self.pipes[i])

            # Copy the diffusion tensor from the 'opt' data pipe and prevent it from being minimised.
            if not local_tm:
                self.interpreter.diffusion_tensor.copy(self.name_pipe('previous'))
                self.interpreter.fix('diff')

            # Select the model-free model.
            self.interpreter.model_free.select_model(model=models[i])

            # Minimise.
            self.interpreter.grid_search(inc=self.grid_inc)
            self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations)

            # Model elimination.
            self.interpreter.eliminate()

            # Write the results.
            dir = self.base_dir + models[i]
            self.interpreter.results.write(file='results', dir=dir, force=True)

        # Unset the status.
        status.auto_analysis[self.pipe_bundle].current_model = None


    def name_pipe(self, prefix):
        """Generate a unique name for the data pipe.

        @param prefix:  The prefix of the data pipe name.
        @type prefix:   str
        """

        # The unique pipe name.
        name = "%s - %s" % (prefix, self.pipe_bundle)

        # Return the name.
        return name


    def status_setup(self):
        """Initialise the status object."""

        # Initialise the status object for this auto-analysis.
        status.init_auto_analysis(self.pipe_bundle, type='dauvergne_protocol')
        status.current_analysis = self.pipe_bundle

        # The global diffusion model.
        status.auto_analysis[self.pipe_bundle].diff_model = None

        # The round of optimisation, i.e. the global iteration.
        status.auto_analysis[self.pipe_bundle].round = None

        # The list of model-free local tm models for optimisation, i.e. the global iteration.
        status.auto_analysis[self.pipe_bundle].local_tm_models = self.local_tm_models

        # The list of model-free models for optimisation, i.e. the global iteration.
        status.auto_analysis[self.pipe_bundle].mf_models = self.mf_models

        # The current model-free model.
        status.auto_analysis[self.pipe_bundle].current_model = None

        # The maximum number of iterations of the global model.
        status.auto_analysis[self.pipe_bundle].max_iter = self.max_iter

        # The convergence of the global model.
        status.auto_analysis[self.pipe_bundle].convergence = False


    def write_results(self):
        """Create Grace plots of the final model-free results."""

        # Save the results file.
        dir = self.write_results_dir + 'final'
        self.interpreter.results.write(file='results', dir=dir, force=True)

        # The Grace plots.
        dir = self.write_results_dir + 'final' + sep + 'grace'
        self.interpreter.grace.write(x_data_type='res_num', y_data_type='s2',  file='s2.agr',        dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='res_num', y_data_type='s2f', file='s2f.agr',       dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='res_num', y_data_type='s2s', file='s2s.agr',       dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='res_num', y_data_type='te',  file='te.agr',        dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='res_num', y_data_type='tf',  file='tf.agr',        dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='res_num', y_data_type='ts',  file='ts.agr',        dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='res_num', y_data_type='rex', file='rex.agr',       dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='s2',      y_data_type='te',  file='s2_vs_te.agr',  dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='s2',      y_data_type='rex', file='s2_vs_rex.agr', dir=dir, force=True)
        self.interpreter.grace.write(x_data_type='te',      y_data_type='rex', file='te_vs_rex.agr', dir=dir, force=True)

        # Write the values to text files.
        dir = self.write_results_dir + 'final'
        self.interpreter.value.write(param='s2',       file='s2.txt',       dir=dir, force=True)
        self.interpreter.value.write(param='s2f',      file='s2f.txt',      dir=dir, force=True)
        self.interpreter.value.write(param='s2s',      file='s2s.txt',      dir=dir, force=True)
        self.interpreter.value.write(param='te',       file='te.txt',       dir=dir, force=True)
        self.interpreter.value.write(param='tf',       file='tf.txt',       dir=dir, force=True)
        self.interpreter.value.write(param='ts',       file='ts.txt',       dir=dir, force=True)
        self.interpreter.value.write(param='rex',      file='rex.txt',      dir=dir, force=True)
        self.interpreter.value.write(param='local_tm', file='local_tm.txt', dir=dir, force=True)
        frqs = spectrometer.get_frequencies()
        for i in range(len(frqs)):
            comment = "This is the Rex value with units rad.s^-1 scaled to a magnetic field strength of %s MHz." % (frqs[i]/1e6)
            self.interpreter.value.write(param='rex', file='rex_%s.txt'%int(frqs[i]/1e6), dir=dir, scaling=(2.0*pi*frqs[i])**2, comment=comment, force=True)

        # Create the PyMOL macros.
        dir = self.write_results_dir + 'final' + sep + 'pymol'
        self.interpreter.pymol.macro_write(data_type='s2',        dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='s2f',       dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='s2s',       dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='amp_fast',  dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='amp_slow',  dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='te',        dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='tf',        dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='ts',        dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='time_fast', dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='time_slow', dir=dir, force=True)
        self.interpreter.pymol.macro_write(data_type='rex',       dir=dir, force=True)

        # Create the Molmol macros.
        dir = self.write_results_dir + 'final' + sep + 'molmol'
        self.interpreter.molmol.macro_write(data_type='s2',        dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='s2f',       dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='s2s',       dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='amp_fast',  dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='amp_slow',  dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='te',        dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='tf',        dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='ts',        dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='time_fast', dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='time_slow', dir=dir, force=True)
        self.interpreter.molmol.macro_write(data_type='rex',       dir=dir, force=True)

        # Create a diffusion tensor representation of the tensor, if a PDB file is present and the local tm global model has not been selected.
        if hasattr(cdp, 'structure') and hasattr(cdp, 'diff_tensor'):
            dir = self.write_results_dir + 'final'
            self.interpreter.structure.create_diff_tensor_pdb(file="tensor.pdb", dir=dir, force=True)
Exemplo n.º 3
0
class Frame_order_analysis:
    """The frame order auto-analysis protocol."""

    def __init__(self, data_pipe_full=None, data_pipe_subset=None, pipe_bundle=None, results_dir=None, grid_inc=11, grid_inc_rigid=21, min_algor='simplex', num_int_pts_grid=50, num_int_pts_subset=[20, 100], func_tol_subset=[1e-2, 1e-2], num_int_pts_full=[100, 1000, 200000], func_tol_full=[1e-2, 1e-3, 1e-4], mc_sim_num=500, mc_int_pts=1000, mc_func_tol=1e-3, models=['rigid', 'free rotor', 'rotor', 'iso cone, free rotor', 'iso cone, torsionless', 'iso cone', 'pseudo-ellipse, torsionless', 'pseudo-ellipse']):
        """Perform the full frame order analysis.

        @param data_pipe_full:          The name of the data pipe containing all of the RDC and PCS data.
        @type data_pipe_full:           str
        @param data_pipe_subset:        The name of the data pipe containing all of the RDC data but only a small subset of ~5 PCS points.
        @type data_pipe_subset:         str
        @keyword pipe_bundle:           The data pipe bundle to associate all spawned data pipes with.
        @type pipe_bundle:              str
        @keyword results_dir:           The directory where files are saved in.
        @type results_dir:              str
        @keyword grid_inc:              The number of grid increments to use in the grid search of certain models.
        @type grid_inc:                 int
        @keyword grid_inc_rigid:        The number of grid increments to use in the grid search of the initial rigid model.
        @type grid_inc_rigid:           int
        @keyword min_algor:             The minimisation algorithm (in most cases this should not be changed).
        @type min_algor:                str
        @keyword num_int_pts_grid:      The number of Sobol' points for the PCS numerical integration in the grid searches.
        @type num_int_pts_grid:         int
        @keyword num_int_pts_subset:    The list of the number of Sobol' points for the PCS numerical integration to use iteratively in the optimisations after the grid search (for the PCS data subset).
        @type num_int_pts_subset:       list of int
        @keyword func_tol_subset:       The minimisation function tolerance cutoff to terminate optimisation (for the PCS data subset, see the minimise user function).
        @type func_tol_subset:          list of float
        @keyword num_int_pts_full:      The list of the number of Sobol' points for the PCS numerical integration to use iteratively in the optimisations after the grid search (for all PCS and RDC data).
        @type num_int_pts_full:         list of int
        @keyword func_tol_full:         The minimisation function tolerance cutoff to terminate optimisation (for all PCS and RDC data, see the minimise user function).
        @type func_tol_full:            list of float
        @keyword mc_sim_num:            The number of Monte Carlo simulations to be used for error analysis at the end of the analysis.
        @type mc_sim_num:               int
        @keyword mc_int_num:            The number of Sobol' points for the PCS numerical integration during Monte Carlo simulations.
        @type mc_int_num:               int
        @keyword mc_func_tol:           The minimisation function tolerance cutoff to terminate optimisation during Monte Carlo simulations.
        @type mc_func_tol:              float
        @keyword models:                The frame order models to use in the analysis.  The 'rigid' model must be included as this is essential for the analysis.
        @type models:                   list of str
        """

        # Execution lock.
        status.exec_lock.acquire(pipe_bundle, mode='auto-analysis')

        # Initial printout.
        title(file=sys.stdout, text="Frame order auto-analysis", prespace=7)

        # Store the args.
        self.data_pipe_full = data_pipe_full
        self.data_pipe_subset = data_pipe_subset
        self.pipe_bundle = pipe_bundle
        self.grid_inc = grid_inc
        self.grid_inc_rigid = grid_inc_rigid
        self.min_algor = min_algor
        self.num_int_pts_grid = num_int_pts_grid
        self.num_int_pts_subset = num_int_pts_subset
        self.func_tol_subset = func_tol_subset
        self.num_int_pts_full = num_int_pts_full
        self.func_tol_full = func_tol_full
        self.mc_sim_num = mc_sim_num
        self.mc_int_pts = mc_int_pts
        self.mc_func_tol = mc_func_tol
        self.models = models

        # A dictionary and list of the data pipe names.
        self.pipe_name_dict = {}
        self.pipe_name_list = []

        # Project directory (i.e. directory containing the model-free model results and the newly generated files)
        if results_dir:
            self.results_dir = results_dir + sep
        else:
            self.results_dir = getcwd() + sep

        # Data checks.
        self.check_vars()

        # Load the interpreter.
        self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True)
        self.interpreter.populate_self()
        self.interpreter.on(verbose=False)

        # Execute the full protocol.
        try:
            # The nested model optimisation protocol.
            self.nested_models()

            # The final results does not already exist.
            if not self.read_results(model='final', pipe_name='final'):
                # Model selection.
                self.interpreter.model_selection(method='AIC', modsel_pipe='final', pipes=self.pipe_name_list)

                # The number of integration points.
                self.interpreter.frame_order.num_int_pts(num=self.mc_int_pts)

                # Monte Carlo simulations.
                self.interpreter.monte_carlo.setup(number=self.mc_sim_num)
                self.interpreter.monte_carlo.create_data()
                self.interpreter.monte_carlo.initial_values()
                self.interpreter.minimise(self.min_algor, func_tol=self.mc_func_tol, constraints=False)
                self.interpreter.eliminate()
                self.interpreter.monte_carlo.error_analysis()

                # Finish.
                self.interpreter.results.write(file='results', dir=self.results_dir+'final', force=True)

            # Visualisation of the final results.
            self.visualisation(model='final')

        # Clean up.
        finally:
            # Finish and unlock execution.
            status.exec_lock.release()

        # Save the final program state.
        self.interpreter.state.save('final_state', dir=self.results_dir, force=True)


    def check_vars(self):
        """Check that the user has set the variables correctly."""

        # The pipe bundle.
        if not isinstance(self.pipe_bundle, str):
            raise RelaxError("The pipe bundle name '%s' is invalid." % self.pipe_bundle)

        # Minimisation variables.
        if not isinstance(self.grid_inc, int):
            raise RelaxError("The grid_inc user variable '%s' is incorrectly set.  It should be an integer." % self.grid_inc)
        if not isinstance(self.grid_inc_rigid, int):
            raise RelaxError("The grid_inc_rigid user variable '%s' is incorrectly set.  It should be an integer." % self.grid_inc)
        if not isinstance(self.min_algor, str):
            raise RelaxError("The min_algor user variable '%s' is incorrectly set.  It should be a string." % self.min_algor)
        if not isinstance(self.num_int_pts_grid, int):
            raise RelaxError("The num_int_pts_grid user variable '%s' is incorrectly set.  It should be an integer." % self.mc_sim_num)
        if not isinstance(self.mc_sim_num, int):
            raise RelaxError("The mc_sim_num user variable '%s' is incorrectly set.  It should be an integer." % self.mc_sim_num)
        if not isinstance(self.mc_int_pts, int):
            raise RelaxError("The mc_int_pts user variable '%s' is incorrectly set.  It should be an integer." % self.mc_int_pts)
        if not isinstance(self.mc_func_tol, float):
            raise RelaxError("The mc_func_tol user variable '%s' is incorrectly set.  It should be a floating point number." % self.mc_func_tol)

        # Zooming minimisation (PCS subset).
        if len(self.num_int_pts_subset) != len(self.func_tol_subset):
            raise RelaxError("The num_int_pts_subset and func_tol_subset user variables of '%s' and '%s' respectively must be of the same length." % (self.num_int_pts_subset, self.func_tol_subset))
        for i in range(len(self.num_int_pts_subset)):
            if not isinstance(self.num_int_pts_subset[i], int):
                raise RelaxError("The num_int_pts_subset user variable '%s' must be a list of integers." % self.num_int_pts_subset)
            if not isinstance(self.func_tol_subset[i], float):
                raise RelaxError("The func_tol_subset user variable '%s' must be a list of floats." % self.func_tol_subset)

        # Zooming minimisation (all RDC and PCS data).
        if len(self.num_int_pts_full) != len(self.func_tol_full):
            raise RelaxError("The num_int_pts_full and func_tol_full user variables of '%s' and '%s' respectively must be of the same length." % (self.num_int_pts_full, self.func_tol_full))
        for i in range(len(self.num_int_pts_full)):
            if not isinstance(self.num_int_pts_full[i], int):
                raise RelaxError("The num_int_pts_full user variable '%s' must be a list of integers." % self.num_int_pts_full)
            if not isinstance(self.func_tol_full[i], float):
                raise RelaxError("The func_tol_full user variable '%s' must be a list of floats." % self.func_tol_full)


    def custom_grid_incs(self, model):
        """Set up a customised grid search increment number for each model.

        @param model:   The frame order model.
        @type model:    str
        @return:        The list of increment values.
        @rtype:         list of int and None
        """

        # Initialise the structure.
        incs = []
        if hasattr(cdp, 'pivot_fixed') and not cdp.pivot_fixed:
            incs += [None, None, None]
        if hasattr(cdp, 'ave_pos_translation') and cdp.ave_pos_translation:
            incs += [None, None, None]

        # The rotor model.
        if model == 'rotor':
            incs += [None, None, None, self.grid_inc, self.grid_inc, self.grid_inc]

        # The free rotor model.
        if model == 'free rotor':
            incs += [self.grid_inc, self.grid_inc, self.grid_inc, self.grid_inc]

        # The torsionless isotropic cone model.
        if model == 'iso cone, torsionless':
            incs += [None, None, None, self.grid_inc, self.grid_inc, self.grid_inc]

        # The free rotor isotropic cone model.
        if model == 'iso cone, free rotor':
            incs += [None, None, None, None, self.grid_inc]

        # The isotropic cone model.
        if model == 'iso cone':
            incs += [None, None, None, self.grid_inc, self.grid_inc, self.grid_inc, None]

        # The torsionless pseudo-elliptic cone model.
        if model == 'pseudo-ellipse, torsionless':
            incs += [None, None, None, self.grid_inc, self.grid_inc, self.grid_inc, self.grid_inc, None]

        # The free rotor pseudo-elliptic cone model.
        if model == 'pseudo-ellipse, free rotor':
            incs += [None, None, None, self.grid_inc, self.grid_inc, self.grid_inc, self.grid_inc, None]

        # The pseudo-elliptic cone model.
        if model == 'pseudo-ellipse':
            incs += [None, None, None, self.grid_inc, self.grid_inc, self.grid_inc, self.grid_inc, None, None]

        # Return the increment list.
        return incs


    def nested_params(self, model):
        """Copy the parameters from the simpler nested models for faster optimisation.

        @param model:   The frame order model.
        @type model:    str
        """

        # The average position from the rigid model.
        if model not in []:
            # Get the rigid data pipe.
            rigid_pipe = get_pipe(self.pipe_name_dict['rigid'])

            # Copy the average position parameters from the rigid model.
            if hasattr(rigid_pipe, 'ave_pos_x'):
                cdp.ave_pos_x = rigid_pipe.ave_pos_x
            if hasattr(rigid_pipe, 'ave_pos_y'):
                cdp.ave_pos_y = rigid_pipe.ave_pos_y
            if hasattr(rigid_pipe, 'ave_pos_z'):
                cdp.ave_pos_z = rigid_pipe.ave_pos_z
            if model not in ['free rotor', 'iso cone, free rotor']:
                cdp.ave_pos_alpha = rigid_pipe.ave_pos_alpha
            cdp.ave_pos_beta = rigid_pipe.ave_pos_beta
            cdp.ave_pos_gamma = rigid_pipe.ave_pos_gamma

        # The cone axis from the rotor model.
        if model in ['iso cone']:
            # Get the rotor data pipe.
            rotor_pipe = get_pipe(self.pipe_name_dict['rotor'])

            # Copy the cone axis.
            cdp.axis_theta = rotor_pipe.axis_theta
            cdp.axis_phi = rotor_pipe.axis_phi

        # The cone axis from the free rotor model.
        if model in ['iso cone, free rotor']:
            # Get the rotor data pipe.
            free_rotor_pipe = get_pipe(self.pipe_name_dict['free rotor'])

            # Copy the cone axis.
            cdp.axis_theta = free_rotor_pipe.axis_theta
            cdp.axis_phi = free_rotor_pipe.axis_phi

        # The torsion from the rotor model.
        if model in ['iso cone', 'pseudo-ellipse']:
            # Get the rotor data pipe.
            rotor_pipe = get_pipe(self.pipe_name_dict['rotor'])

            # Copy the cone axis.
            cdp.cone_sigma_max = rotor_pipe.cone_sigma_max

        # The cone angles from from the torsionless isotropic cone model.
        if model in ['pseudo-ellipse, torsionless', 'pseudo-ellipse, free rotor', 'pseudo-ellipse']:
            # Get the rotor data pipe.
            pipe = get_pipe(self.pipe_name_dict['iso cone, torsionless'])

            # Copy the cone axis.
            cdp.cone_theta_x = pipe.cone_theta
            cdp.cone_theta_y = pipe.cone_theta


    def nested_models(self):
        """Protocol for the nested optimisation of the frame order models."""

        # First optimise the rigid model using all data.
        self.optimise_rigid()

        # Iteratively optimise the frame order models.
        for model in self.models:
            # Skip the already optimised rigid model.
            if model == 'rigid':
                continue

            # The model title.
            title = model[0].upper() + model[1:]

            # Printout.
            section(file=sys.stdout, text="%s frame order model"%title, prespace=5)

            # The data pipe name.
            self.pipe_name_dict[model] = '%s - %s' % (title, self.pipe_bundle)
            self.pipe_name_list.append(self.pipe_name_dict[model])

            # The results file already exists, so read its contents instead.
            if self.read_results(model=model, pipe_name=self.pipe_name_dict[model]):
                # Re-perform model elimination just in case.
                self.interpreter.eliminate()

                # The PDB representation of the model and visualisation script (in case this was not completed correctly).
                self.visualisation(model=model)

                # Skip to the next model.
                continue

            # Create the data pipe using the full data set, and switch to it.
            self.interpreter.pipe.copy(self.data_pipe_subset, self.pipe_name_dict[model], bundle_to=self.pipe_bundle)
            self.interpreter.pipe.switch(self.pipe_name_dict[model])

            # Select the Frame Order model.
            self.interpreter.frame_order.select_model(model=model)

            # Copy nested parameters.
            self.nested_params(model)

            # The optimisation settings.
            self.interpreter.frame_order.num_int_pts(num=self.num_int_pts_grid)
            self.interpreter.frame_order.quad_int(flag=False)

            # Grid search.
            incs = self.custom_grid_incs(model)
            self.interpreter.grid_search(inc=incs, constraints=False)

            # Minimise (for the PCS data subset and full RDC set).
            for i in range(len(self.num_int_pts_subset)):
                self.interpreter.frame_order.num_int_pts(num=self.num_int_pts_subset[i])
                self.interpreter.minimise(self.min_algor, func_tol=self.func_tol_subset[i], constraints=False)

            # Copy the PCS data.
            self.interpreter.pcs.copy(pipe_from=self.data_pipe_full, pipe_to=self.pipe_name_dict[model])

            # Minimise (for the full data set).
            for i in range(len(self.num_int_pts_full)):
                self.interpreter.frame_order.num_int_pts(num=self.num_int_pts_full[i])
                self.interpreter.minimise(self.min_algor, func_tol=self.func_tol_full[i], constraints=False)

            # Results printout.
            self.print_results()

            # Model elimination.
            self.interpreter.eliminate()

            # Save the results.
            self.interpreter.results.write(dir=self.results_dir+model, force=True)

            # The PDB representation of the model and visualisation script.
            self.visualisation(model=model)


    def optimise_rigid(self):
        """Optimise the rigid frame order model.

        The Sobol' integration is not used here, so the algorithm is different to the other frame order models.
        """

        # The model.
        model = 'rigid'
        title = model[0].upper() + model[1:]

        # Print out.
        section(file=sys.stdout, text="%s frame order model"%title, prespace=5)

        # The data pipe name.
        self.pipe_name_dict[model] = '%s - %s' % (title, self.pipe_bundle)
        self.pipe_name_list.append(self.pipe_name_dict[model])

        # The results file already exists, so read its contents instead.
        if self.read_results(model=model, pipe_name=self.pipe_name_dict[model]):
            # The PDB representation of the model (in case this was not completed correctly).
            self.interpreter.frame_order.pdb_model(dir=self.results_dir+model, force=True)

            # Nothing more to do.
            return

        # Create the data pipe using the full data set, and switch to it.
        self.interpreter.pipe.copy(self.data_pipe_full, self.pipe_name_dict[model], bundle_to=self.pipe_bundle)
        self.interpreter.pipe.switch(self.pipe_name_dict[model])

        # Select the Frame Order model.
        self.interpreter.frame_order.select_model(model=model)

        # Split grid search if translation is active.
        if cdp.ave_pos_translation:
            # Printout.
            print("\n\nTranslation active - splitting the grid search and iterating.")

            # Loop twice.
            for i in range(2):
                # First optimise the rotation.
                self.interpreter.grid_search(inc=[None, None, None, self.grid_inc_rigid, self.grid_inc_rigid, self.grid_inc_rigid], constraints=False)

                # Then the translation.
                self.interpreter.grid_search(inc=[self.grid_inc_rigid, self.grid_inc_rigid, self.grid_inc_rigid, None, None, None], constraints=False)

        # Standard grid search.
        else:
            self.interpreter.grid_search(inc=self.grid_inc_rigid, constraints=False)

        # Minimise.
        self.interpreter.minimise(self.min_algor, constraints=False)

        # Results printout.
        self.print_results()

        # Save the results.
        self.interpreter.results.write(dir=self.results_dir+model, force=True)

        # The PDB representation of the model.
        self.interpreter.frame_order.pdb_model(dir=self.results_dir+model, force=True)


    def print_results(self):
        """Print out the optimisation results for the current data pipe."""

        # Header.
        sys.stdout.write("\nFinal optimisation results:\n")

        # Formatting string.
        format_float = "    %-20s %20.15f\n"
        format_vect = "    %-20s %20s\n"

        # Average position.
        if hasattr(cdp, 'ave_pos_x') or hasattr(cdp, 'ave_pos_alpha') or hasattr(cdp, 'ave_pos_beta') or hasattr(cdp, 'ave_pos_gamma'):
            sys.stdout.write("\nAverage moving domain position:\n")
        if hasattr(cdp, 'ave_pos_x'):
            sys.stdout.write(format_float % ('x:', cdp.ave_pos_x))
        if hasattr(cdp, 'ave_pos_y'):
            sys.stdout.write(format_float % ('y:', cdp.ave_pos_y))
        if hasattr(cdp, 'ave_pos_z'):
            sys.stdout.write(format_float % ('z:', cdp.ave_pos_z))
        if hasattr(cdp, 'ave_pos_alpha'):
            sys.stdout.write(format_float % ('alpha:', cdp.ave_pos_alpha))
        if hasattr(cdp, 'ave_pos_beta'):
            sys.stdout.write(format_float % ('beta:', cdp.ave_pos_beta))
        if hasattr(cdp, 'ave_pos_gamma'):
            sys.stdout.write(format_float % ('gamma:', cdp.ave_pos_gamma))

        # Frame order eigenframe.
        if hasattr(cdp, 'eigen_alpha') or hasattr(cdp, 'eigen_beta') or hasattr(cdp, 'eigen_gamma') or hasattr(cdp, 'axis_theta') or hasattr(cdp, 'axis_phi'):
            sys.stdout.write("\nFrame order eigenframe:\n")
        if hasattr(cdp, 'eigen_alpha'):
            sys.stdout.write(format_float % ('eigen alpha:', cdp.eigen_alpha))
        if hasattr(cdp, 'eigen_beta'):
            sys.stdout.write(format_float % ('eigen beta:', cdp.eigen_beta))
        if hasattr(cdp, 'eigen_gamma'):
            sys.stdout.write(format_float % ('eigen gamma:', cdp.eigen_gamma))

        # The cone axis.
        if hasattr(cdp, 'axis_theta'):
            # The angles.
            sys.stdout.write(format_float % ('axis theta:', cdp.axis_theta))
            sys.stdout.write(format_float % ('axis phi:', cdp.axis_phi))

            # The axis.
            axis = zeros(3, float64)
            spherical_to_cartesian([1.0, cdp.axis_theta, cdp.axis_phi], axis)
            sys.stdout.write(format_vect % ('axis:', axis))

        # Frame ordering.
        if hasattr(cdp, 'cone_theta_x') or hasattr(cdp, 'cone_theta_y') or hasattr(cdp, 'cone_theta') or hasattr(cdp, 'cone_s1') or hasattr(cdp, 'cone_sigma_max'):
            sys.stdout.write("\nFrame ordering:\n")
        if hasattr(cdp, 'cone_theta_x'):
            sys.stdout.write(format_float % ('cone theta_x:', cdp.cone_theta_x))
        if hasattr(cdp, 'cone_theta_y'):
            sys.stdout.write(format_float % ('cone theta_y:', cdp.cone_theta_y))
        if hasattr(cdp, 'cone_theta'):
            sys.stdout.write(format_float % ('cone theta:', cdp.cone_theta))
        if hasattr(cdp, 'cone_s1'):
            sys.stdout.write(format_float % ('cone s1:', cdp.cone_s1))
        if hasattr(cdp, 'cone_sigma_max'):
            sys.stdout.write(format_float % ('sigma_max:', cdp.cone_sigma_max))

        # Minimisation statistics.
        if hasattr(cdp, 'chi2'):
            sys.stdout.write("\nMinimisation statistics:\n")
        if hasattr(cdp, 'chi2'):
            sys.stdout.write(format_float % ('chi2:', cdp.chi2))

        # Final spacing.
        sys.stdout.write("\n")


    def read_results(self, model=None, pipe_name=None):
        """Attempt to read old results files.

        @keyword model:     The frame order model.
        @type model:        str
        @keyword pipe_name: The name of the data pipe to use for this model.
        @type pipe_name:    str
        @return:            True if the file exists and has been read, False otherwise.
        @rtype:             bool
        """

        # The file name.
        path = self.results_dir + model + sep + 'results.bz2'

        # The file does not exist.
        if not access(path, F_OK):
            return False

        # Create an empty data pipe.
        self.interpreter.pipe.create(pipe_name=pipe_name, pipe_type='frame order')

        # Read the results file.
        self.interpreter.results.read(path)

        # Results printout.
        self.print_results()

        # Success.
        return True


    def visualisation(self, model=None):
        """Create visual representations of the frame order results for the given model.

        This includes a PDB representation of the motions (the 'cone.pdb' file located in each model directory) together with a relax script for displaying the average domain positions together with the cone/motion representation in PyMOL (the 'pymol_display.py' file, also created in the model directory).

        @keyword model:     The frame order model to visualise.  This should match the model of the current data pipe, unless the special value of 'final' is used to indicate the visualisation of the final results.
        @type model:        str
        """

        # Sanity check.
        if model != 'final' and model != cdp.model:
            raise RelaxError("The model '%s' does not match the model '%s' of the current data pipe." % (model, cdp.model))

        # The PDB representation of the model.
        self.interpreter.frame_order.pdb_model(dir=self.results_dir+model, force=True)

        # Create the visualisation script.
        subsection(file=sys.stdout, text="Creating a PyMOL visualisation script.")
        script = open_write_file(file_name='pymol_display.py', dir=self.results_dir+model, force=True)

        # Add a comment for the user.
        script.write("# relax script for displaying the frame order results of this '%s' model in PyMOL.\n\n" % model)

        # The script contents.
        script.write("# PyMOL visualisation.\n")
        script.write("pymol.view()\n")
        script.write("pymol.command('show spheres')\n")
        script.write("pymol.frame_order(file='frame_order.pdb', dist_file='frame_order_distribution.pdb')\n")

        # Close the file.
        script.close()
Exemplo n.º 4
0
class Relax_disp:
    """The relaxation dispersion auto-analysis."""

    # Some class variables.
    opt_func_tol = 1e-25
    opt_max_iterations = int(1e7)

    def __init__(self, pipe_name=None, pipe_bundle=None, results_dir=None, models=[MODEL_R2EFF], grid_inc=11, mc_sim_num=500, modsel='AIC', pre_run_dir=None, insignificance=0.0, numeric_only=False, mc_sim_all_models=False, eliminate=True):
        """Perform a full relaxation dispersion analysis for the given list of models.

        @keyword pipe_name:         The name of the data pipe containing all of the data for the analysis.
        @type pipe_name:            str
        @keyword pipe_bundle:       The data pipe bundle to associate all spawned data pipes with.
        @type pipe_bundle:          str
        @keyword results_dir:       The directory where results files are saved.
        @type results_dir:          str
        @keyword models:            The list of relaxation dispersion models to optimise.
        @type models:               list of str
        @keyword grid_inc:          Number of grid search increments.
        @type grid_inc:             int
        @keyword mc_sim_num:        The number of Monte Carlo simulations to be used for error analysis at the end of the analysis.
        @type mc_sim_num:           int
        @keyword modsel:            The model selection technique to use in the analysis to determine which model is the best for each spin cluster.  This can currently be one of 'AIC', 'AICc', and 'BIC'.
        @type modsel:               str
        @keyword pre_run_dir:       The optional directory containing the dispersion auto-analysis results from a previous run.  The optimised parameters from these previous results will be used as the starting point for optimisation rather than performing a grid search.  This is essential for when large spin clusters are specified, as a grid search becomes prohibitively expensive with clusters of three or more spins.  At some point a RelaxError will occur because the grid search is impossibly large.  For the cluster specific parameters, i.e. the populations of the states and the exchange parameters, an average value will be used as the starting point.  For all other parameters, the R20 values for each spin and magnetic field, as well as the parameters related to the chemical shift difference dw, the optimised values of the previous run will be directly copied.
        @type pre_run_dir:          None or str
        @keyword insignificance:    The R2eff/R1rho value in rad/s by which to judge insignificance.  If the maximum difference between two points on all dispersion curves for a spin is less than this value, that spin will be deselected.  This does not affect the 'No Rex' model.  Set this value to 0.0 to use all data.  The value will be passed on to the relax_disp.insignificance user function.
        @type insignificance:       float
        @keyword numeric_only:      The class of models to use in the model selection.  The default of False allows all dispersion models to be used in the analysis (no exchange, the analytic models and the numeric models).  The value of True will activate a pure numeric solution - the analytic models will be optimised, as they are very useful for replacing the grid search for the numeric models, but the final model selection will not include them.
        @type numeric_only:         bool
        @keyword mc_sim_all_models: A flag which if True will cause Monte Carlo simulations to be performed for each individual model.  Otherwise Monte Carlo simulations will be reserved for the final model.
        @type mc_sim_all_models:    bool
        @keyword eliminate:         A flag which if True will enable the elimination of failed models and failed Monte Carlo simulations through the eliminate user function.
        @type eliminate:            bool
        """

        # Printout.
        title(file=sys.stdout, text="Relaxation dispersion auto-analysis", prespace=4)

        # Execution lock.
        status.exec_lock.acquire(pipe_bundle, mode='auto-analysis')

        # Set up the analysis status object.
        status.init_auto_analysis(pipe_bundle, type='relax_disp')
        status.current_analysis = pipe_bundle

        # Store the args.
        self.pipe_name = pipe_name
        self.pipe_bundle = pipe_bundle
        self.results_dir = results_dir
        self.models = models
        self.grid_inc = grid_inc
        self.mc_sim_num = mc_sim_num
        self.modsel = modsel
        self.pre_run_dir = pre_run_dir
        self.insignificance = insignificance
        self.numeric_only = numeric_only
        self.mc_sim_all_models = mc_sim_all_models
        self.eliminate = eliminate

        # No results directory, so default to the current directory.
        if not self.results_dir:
            self.results_dir = getcwd()

        # Data checks.
        self.check_vars()

        # Load the interpreter.
        self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True)
        self.interpreter.populate_self()
        self.interpreter.on(verbose=False)

        # Execute.
        self.run()

        # Finish and unlock execution.
        status.auto_analysis[self.pipe_bundle].fin = True
        status.current_analysis = None
        status.exec_lock.release()


    def is_model_for_selection(self, model=None):
        """Determine if the model should be used for model selection.

        @keyword model: The model to check.
        @type model:    str
        @return:        True if the model should be included in the model selection list, False if not.
        @rtype:         bool
        """

        # Skip the 'R2eff' base model.
        if model == 'R2eff':
            return False

        # Do not use the analytic models.
        if self.numeric_only and model in MODEL_LIST_ANALYTIC:
            return False

        # All models allowed.
        return True


    def check_vars(self):
        """Check that the user has set the variables correctly."""

        # Printout.
        section(file=sys.stdout, text="Variable checking", prespace=2)

        # The pipe name.
        if not has_pipe(self.pipe_name):
            raise RelaxNoPipeError(self.pipe_name)

        # Check the model selection.
        allowed = ['AIC', 'AICc', 'BIC']
        if self.modsel not in allowed:
            raise RelaxError("The model selection technique '%s' is not in the allowed list of %s." % (self.modsel, allowed))

        # Some warning for the user if the pure numeric solution is selected.
        if self.numeric_only:
            # Loop over all models.
            for model in self.models:
                # Skip the models used for nesting.
                if model in [MODEL_CR72, MODEL_MMQ_CR72, MODEL_MP05]:
                    continue

                # Warnings for all other analytic models.
                if model in MODEL_LIST_ANALYTIC:
                    warn(RelaxWarning("The analytic model '%s' will be optimised but will not be used in any way in this numeric model only auto-analysis." % model))

        # Printout.
        print("The dispersion auto-analysis variables are OK.")


    def error_analysis(self):
        """Perform an error analysis of the peak intensities for each field strength separately."""

        # Printout.
        section(file=sys.stdout, text="Error analysis", prespace=2)

        # Check if intensity errors have already been calculated by the user.
        precalc = True
        for spin in spin_loop(skip_desel=True):
            # No structure.
            if not hasattr(spin, 'intensity_err'):
                precalc = False
                break

            # Determine if a spectrum ID is missing from the list.
            for id in cdp.spectrum_ids:
                if id not in spin.intensity_err:
                    precalc = False
                    break

        # Skip.
        if precalc:
            print("Skipping the error analysis as it has already been performed.")
            return

        # Loop over the spectrometer frequencies.
        for frq in loop_frq():
            # Generate a list of spectrum IDs matching the frequency.
            ids = []
            for id in cdp.spectrum_ids:
                # Check that the spectrometer frequency matches.
                match_frq = True
                if frq != None and cdp.spectrometer_frq[id] != frq:
                    match_frq = False

                # Add the ID.
                if match_frq:
                    ids.append(id)

            # Run the error analysis on the subset.
            self.interpreter.spectrum.error_analysis(subset=ids)


    def nesting(self, model=None):
        """Support for model nesting.

        If model nesting is detected, the optimised parameters from the simpler model will be used for the more complex model.  The method will then signal if the nesting condition is met for the model, allowing the grid search to be skipped.


        @keyword model: The model to be optimised.
        @type model:    str
        @return:        True if the model is the more complex model in a nested pair and the parameters of the simpler model have been copied.  False otherwise.
        @rtype:         bool
        """

        # Printout. 
        subsection(file=sys.stdout, text="Nesting and model equivalence checks", prespace=1)

        # The simpler model.
        nested_pipe = None
        if model == MODEL_LM63_3SITE and MODEL_LM63 in self.models:
            nested_pipe = MODEL_LM63
        if model == MODEL_CR72_FULL and MODEL_CR72 in self.models:
            nested_pipe = MODEL_CR72
        if model == MODEL_MMQ_CR72 and MODEL_CR72 in self.models:
            nested_pipe = MODEL_CR72
        if model == MODEL_NS_CPMG_2SITE_3D_FULL and MODEL_NS_CPMG_2SITE_3D in self.models:
            nested_pipe = MODEL_NS_CPMG_2SITE_3D
        if model == MODEL_NS_CPMG_2SITE_STAR_FULL and MODEL_NS_CPMG_2SITE_STAR in self.models:
            nested_pipe = MODEL_NS_CPMG_2SITE_STAR
        if model == MODEL_NS_MMQ_3SITE_LINEAR and MODEL_NS_MMQ_2SITE in self.models:
            nested_pipe = MODEL_NS_MMQ_2SITE
        if model == MODEL_NS_MMQ_3SITE:
            if MODEL_NS_MMQ_3SITE_LINEAR in self.models:
                nested_pipe = MODEL_NS_MMQ_3SITE_LINEAR
            elif MODEL_NS_MMQ_2SITE in self.models:
                nested_pipe = MODEL_NS_MMQ_2SITE
        if model == MODEL_NS_R1RHO_3SITE_LINEAR and MODEL_NS_R1RHO_2SITE in self.models:
            nested_pipe = MODEL_NS_R1RHO_2SITE
        if model == MODEL_NS_R1RHO_3SITE:
            if MODEL_NS_R1RHO_3SITE_LINEAR in self.models:
                nested_pipe = MODEL_NS_R1RHO_3SITE_LINEAR
            elif MODEL_NS_R1RHO_2SITE in self.models:
                nested_pipe = MODEL_NS_R1RHO_2SITE


        # Using the analytic solution.
        analytic = False
        if model in [MODEL_NS_CPMG_2SITE_3D, MODEL_NS_CPMG_2SITE_EXPANDED, MODEL_NS_CPMG_2SITE_STAR] and MODEL_CR72 in self.models:
            nested_pipe = MODEL_CR72
            analytic = True
        elif model == MODEL_NS_MMQ_2SITE and MODEL_MMQ_CR72 in self.models:
            nested_pipe = MODEL_MMQ_CR72
            analytic = True
        if model == MODEL_NS_R1RHO_2SITE and MODEL_MP05 in self.models:
            nested_pipe = MODEL_MP05
            analytic = True

        # No nesting.
        if not nested_pipe:
            print("No model nesting or model equivalence detected.")
            return False

        # Printout.
        if analytic:
            print("Model equivalence detected, copying the optimised parameters from the analytic '%s' model rather than performing a grid search." % nested_pipe)
        else:
            print("Model nesting detected, copying the optimised parameters from the '%s' model rather than performing a grid search." % nested_pipe)

        # Loop over the spins to copy the parameters.
        for spin, spin_id in spin_loop(return_id=True, skip_desel=True):
            # Get the nested spin.
            nested_spin = return_spin(spin_id=spin_id, pipe=nested_pipe)

            # The R20 parameters.
            if hasattr(nested_spin, 'r2'):
                if model in [MODEL_CR72_FULL, MODEL_NS_CPMG_2SITE_3D_FULL, MODEL_NS_CPMG_2SITE_STAR_FULL]:
                    setattr(spin, 'r2a', deepcopy(nested_spin.r2))
                    setattr(spin, 'r2b', deepcopy(nested_spin.r2))
                else:
                    setattr(spin, 'r2', deepcopy(nested_spin.r2))

            # The LM63 3-site model parameters.
            if model == MODEL_LM63_3SITE:
                setattr(spin, 'phi_ex_B', deepcopy(nested_spin.phi_ex))
                setattr(spin, 'phi_ex_C', deepcopy(nested_spin.phi_ex))
                setattr(spin, 'kB', deepcopy(nested_spin.kex))
                setattr(spin, 'kC', deepcopy(nested_spin.kex))

            # All other spin parameters.
            for param in spin.params:
                if param in ['r2', 'r2a', 'r2b']:
                    continue

                # The parameter does not exist.
                if not hasattr(nested_spin, param):
                    continue

                # Skip the LM63 3-site model parameters
                if model == MODEL_LM63_3SITE and param in ['phi_ex', 'kex']:
                    continue

                # Copy the parameter.
                setattr(spin, param, deepcopy(getattr(nested_spin, param)))

        # Nesting.
        return True


    def optimise(self, model=None):
        """Optimise the model, taking model nesting into account.

        @keyword model: The model to be optimised.
        @type model:    str
        """

        # Printout. 
        section(file=sys.stdout, text="Optimisation", prespace=2)

        # Deselect insignificant spins.
        if model not in ['R2eff', 'No Rex']:
            self.interpreter.relax_disp.insignificance(level=self.insignificance)

        # Use pre-run results as the optimisation starting point.
        if self.pre_run_dir:
            self.pre_run_parameters(model=model)

        # Otherwise use the normal nesting check and grid search if not nested.
        else:
            # Nested model simplification.
            nested = self.nesting(model=model)

            # Grid search.
            if not nested:
                self.interpreter.grid_search(inc=self.grid_inc)

        # Minimise.
        self.interpreter.minimise('simplex', func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations, constraints=True)

        # Model elimination.
        if self.eliminate:
            self.interpreter.eliminate()

        # Monte Carlo simulations.
        if self.mc_sim_all_models or len(self.models) < 2 or model == 'R2eff':
            self.interpreter.monte_carlo.setup(number=self.mc_sim_num)
            self.interpreter.monte_carlo.create_data()
            self.interpreter.monte_carlo.initial_values()
            self.interpreter.minimise('simplex', func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations, constraints=True)
            if self.eliminate:
                self.interpreter.eliminate()
            self.interpreter.monte_carlo.error_analysis()


    def pre_run_parameters(self, model=None):
        """Copy parameters from an earlier analysis.

        @keyword model: The model to be optimised.
        @type model:    str
        """

        # Printout.
        subsection(file=sys.stdout, text="Pre-run parameters", prespace=1)

        # Create a temporary data pipe for the previous run.
        self.interpreter.pipe.create(pipe_name='pre', pipe_type='relax_disp')

        # Load the previous results.
        path = self.pre_run_dir + sep + model
        self.interpreter.results.read(file='results', dir=path)

        # Copy the parameters.
        self.interpreter.relax_disp.parameter_copy(pipe_from='pre', pipe_to=model)

        # Finally, switch back to the original data pipe and delete the temporary one.
        self.interpreter.pipe.switch(pipe_name=model)
        self.interpreter.pipe.delete(pipe_name='pre')


    def run(self):
        """Execute the auto-analysis."""

        # Peak intensity error analysis.
        if MODEL_R2EFF in self.models:
            self.error_analysis()

        # Loop over the models.
        self.model_pipes = []
        for model in self.models:
            # Printout.
            subtitle(file=sys.stdout, text="The '%s' model" % model, prespace=3)

            # The results directory path.
            path = self.results_dir+sep+model

            # The name of the data pipe for the model.
            model_pipe = model
            if self.is_model_for_selection(model):
                self.model_pipes.append(model_pipe)

            # Check that results do not already exist - i.e. a previous run was interrupted.
            path1 = path + sep + 'results'
            path2 = path1 + '.bz2'
            path3 = path1 + '.gz'
            if access(path1, F_OK) or access(path2, F_OK) or access(path2, F_OK):
                # Printout.
                print("Detected the presence of results files for the '%s' model - loading these instead of performing optimisation for a second time." % model)

                # Create a data pipe and switch to it.
                self.interpreter.pipe.create(pipe_name=model_pipe, pipe_type='relax_disp', bundle=self.pipe_bundle)
                self.interpreter.pipe.switch(model_pipe)

                # Load the results.
                self.interpreter.results.read(file='results', dir=path)

                # Jump to the next model.
                continue

            # Create the data pipe by copying the base pipe, then switching to it.
            self.interpreter.pipe.copy(pipe_from=self.pipe_name, pipe_to=model_pipe, bundle_to=self.pipe_bundle)
            self.interpreter.pipe.switch(model_pipe)

            # Select the model.
            self.interpreter.relax_disp.select_model(model)

            # Copy the R2eff values from the R2eff model data pipe.
            if model != MODEL_R2EFF and MODEL_R2EFF in self.models:
                self.interpreter.value.copy(pipe_from=MODEL_R2EFF, pipe_to=model, param='r2eff')

            # Calculate the R2eff values for the fixed relaxation time period data types.
            if model == MODEL_R2EFF and not has_exponential_exp_type():
                self.interpreter.calc()

            # Optimise the model.
            else:
                self.optimise(model=model)

            # Write out the results.
            self.write_results(path=path, model=model)

        # The final model selection data pipe.
        if len(self.models) >= 2:
            # Printout.
            section(file=sys.stdout, text="Final results", prespace=2)

            # Perform model selection.
            self.interpreter.model_selection(method=self.modsel, modsel_pipe='final', bundle=self.pipe_bundle, pipes=self.model_pipes)

            # Final Monte Carlo simulations only.
            if not self.mc_sim_all_models:
                self.interpreter.monte_carlo.setup(number=self.mc_sim_num)
                self.interpreter.monte_carlo.create_data()
                self.interpreter.monte_carlo.initial_values()
                self.interpreter.minimise('simplex', func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations, constraints=True)
                if self.eliminate:
                    self.interpreter.eliminate()
                self.interpreter.monte_carlo.error_analysis()

            # Writing out the final results.
            self.write_results(path=self.results_dir+sep+'final')

        # No model selection.
        else:
            warn(RelaxWarning("Model selection in the dispersion auto-analysis has been skipped as only %s models have been optimised." % len(self.model_pipes)))

        # Finally save the program state.
        self.interpreter.state.save(state='final_state', dir=self.results_dir, force=True)


    def write_results(self, path=None, model=None):
        """Create a set of results, text and Grace files for the current data pipe.

        @keyword path:  The directory to place the files into.
        @type path:     str
        """

        # Printout.
        section(file=sys.stdout, text="Results writing", prespace=2)

        # Exponential curves.
        if model == 'R2eff' and has_exponential_exp_type():
            self.interpreter.relax_disp.plot_exp_curves(file='intensities.agr', dir=path, force=True)    # Average peak intensities.
            self.interpreter.relax_disp.plot_exp_curves(file='intensities_norm.agr', dir=path, force=True, norm=True)    # Average peak intensities (normalised).

        # Dispersion curves.
        self.interpreter.relax_disp.plot_disp_curves(dir=path, force=True)
        self.interpreter.relax_disp.write_disp_curves(dir=path, force=True)

        # The selected models for the final run.
        if model == None:
            self.interpreter.value.write(param='model', file='model.out', dir=path, force=True)

        # The R2eff parameter.
        if model == 'R2eff':
            self.interpreter.value.write(param='r2eff', file='r2eff.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='r2eff', file='r2eff.agr', dir=path, force=True)

        # The I0 parameter.
        if model == 'R2eff' and has_exponential_exp_type():
            self.interpreter.value.write(param='i0', file='i0.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='i0', file='i0.agr', dir=path, force=True)

        ## The R20 parameter.
        #if has_cpmg_exp_type() and model in [None, MODEL_LM63, MODEL_CR72, MODEL_IT99, MODEL_M61, MODEL_DPL94, MODEL_M61B, MODEL_MMQ_CR72, MODEL_NS_CPMG_2SITE_3D, MODEL_NS_CPMG_2SITE_STAR, MODEL_NS_CPMG_2SITE_EXPANDED, MODEL_NS_MMQ_2SITE, MODEL_NS_MMQ_3SITE, MODEL_NS_MMQ_3SITE_LINEAR]:
        #    self.interpreter.value.write(param='r2', file='r20.out', dir=path, force=True)
        #    self.interpreter.grace.write(x_data_type='res_num', y_data_type='r2', file='r20.agr', dir=path, force=True)

        ## The R20A and R20B parameters.
        #if has_cpmg_exp_type() and model in [None, MODEL_CR72_FULL, MODEL_NS_CPMG_2SITE_3D_FULL, MODEL_NS_CPMG_2SITE_STAR_FULL]:
        #    self.interpreter.value.write(param='r2a', file='r20a.out', dir=path, force=True)
        #    self.interpreter.value.write(param='r2b', file='r20b.out', dir=path, force=True)
        #    self.interpreter.grace.write(x_data_type='res_num', y_data_type='r2a', file='r20a.agr', dir=path, force=True)
        #    self.interpreter.grace.write(x_data_type='res_num', y_data_type='r2b', file='r20b.agr', dir=path, force=True)

        ## The R1rho parameter.
        #if has_r1rho_exp_type() and model in [None] + MODEL_LIST_R1RHO:
        #    self.interpreter.value.write(param='r2', file='r1rho0.out', dir=path, force=True)
        #    self.interpreter.grace.write(x_data_type='res_num', y_data_type='r2', file='r1rho0.agr', dir=path, force=True)

        # The pA, pB, and pC parameters.
        if model in [None, MODEL_CR72, MODEL_CR72_FULL, MODEL_IT99, MODEL_M61B, MODEL_MMQ_CR72, MODEL_NS_CPMG_2SITE_3D, MODEL_NS_CPMG_2SITE_3D_FULL, MODEL_NS_CPMG_2SITE_STAR, MODEL_NS_CPMG_2SITE_STAR_FULL, MODEL_NS_CPMG_2SITE_EXPANDED, MODEL_NS_MMQ_2SITE, MODEL_NS_R1RHO_2SITE, MODEL_NS_R1RHO_3SITE, MODEL_NS_R1RHO_3SITE_LINEAR, MODEL_TP02, MODEL_TAP03, MODEL_MP05, MODEL_NS_MMQ_3SITE, MODEL_NS_MMQ_3SITE_LINEAR]:
            self.interpreter.value.write(param='pA', file='pA.out', dir=path, force=True)
            self.interpreter.value.write(param='pB', file='pB.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='pA', file='pA.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='pB', file='pB.agr', dir=path, force=True)
        if model in [MODEL_NS_MMQ_3SITE, MODEL_NS_MMQ_3SITE_LINEAR, MODEL_NS_R1RHO_3SITE, MODEL_NS_R1RHO_3SITE_LINEAR]:
            self.interpreter.value.write(param='pC', file='pC.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='pC', file='pC.agr', dir=path, force=True)

        # The Phi_ex parameter.
        if model in [None, MODEL_LM63, MODEL_M61, MODEL_DPL94]:
            self.interpreter.value.write(param='phi_ex', file='phi_ex.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='phi_ex', file='phi_ex.agr', dir=path, force=True)

        # The Phi_ex_B nd Phi_ex_C parameters.
        if model in [None, MODEL_LM63_3SITE]:
            self.interpreter.value.write(param='phi_ex_B', file='phi_ex_B.out', dir=path, force=True)
            self.interpreter.value.write(param='phi_ex_C', file='phi_ex_C.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='phi_ex_B', file='phi_ex_B.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='phi_ex_C', file='phi_ex_C.agr', dir=path, force=True)

        # The dw parameter.
        if model in [None, MODEL_CR72, MODEL_CR72_FULL, MODEL_IT99, MODEL_M61B, MODEL_MMQ_CR72, MODEL_NS_CPMG_2SITE_3D, MODEL_NS_CPMG_2SITE_3D_FULL, MODEL_NS_CPMG_2SITE_STAR, MODEL_NS_CPMG_2SITE_STAR_FULL, MODEL_NS_CPMG_2SITE_EXPANDED, MODEL_NS_MMQ_2SITE, MODEL_NS_R1RHO_2SITE, MODEL_TP02, MODEL_TAP03, MODEL_MP05, MODEL_TSMFK01]:
            self.interpreter.value.write(param='dw', file='dw.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dw', file='dw.agr', dir=path, force=True)
        if model in [MODEL_NS_MMQ_3SITE, MODEL_NS_MMQ_3SITE_LINEAR, MODEL_NS_R1RHO_3SITE, MODEL_NS_R1RHO_3SITE_LINEAR]:
            self.interpreter.value.write(param='dw_AB', file='dw_AB.out', dir=path, force=True)
            self.interpreter.value.write(param='dw_BC', file='dw_BC.out', dir=path, force=True)
            self.interpreter.value.write(param='dw_AC', file='dw_AC.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dw_AB', file='dw_AB.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dw_BC', file='dw_BC.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dw_AC', file='dw_AC.agr', dir=path, force=True)

        # The dwH parameter.
        if model in [None, MODEL_MMQ_CR72, MODEL_NS_MMQ_2SITE]:
            self.interpreter.value.write(param='dwH', file='dwH.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dwH', file='dwH.agr', dir=path, force=True)
        if model in [MODEL_NS_MMQ_3SITE, MODEL_NS_MMQ_3SITE_LINEAR]:
            self.interpreter.value.write(param='dwH_AB', file='dwH_AB.out', dir=path, force=True)
            self.interpreter.value.write(param='dwH_BC', file='dwH_BC.out', dir=path, force=True)
            self.interpreter.value.write(param='dwH_AC', file='dwH_AC.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dwH_AB', file='dwH_AB.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dwH_BC', file='dwH_BC.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='dwH_AC', file='dwH_AC.agr', dir=path, force=True)

        # The k_AB, kex and tex parameters.
        if model in [None, MODEL_LM63, MODEL_CR72, MODEL_CR72_FULL, MODEL_IT99, MODEL_M61, MODEL_DPL94, MODEL_M61B, MODEL_MMQ_CR72, MODEL_NS_CPMG_2SITE_3D, MODEL_NS_CPMG_2SITE_3D_FULL, MODEL_NS_CPMG_2SITE_STAR, MODEL_NS_CPMG_2SITE_STAR_FULL, MODEL_NS_CPMG_2SITE_EXPANDED, MODEL_NS_MMQ_2SITE, MODEL_NS_R1RHO_2SITE, MODEL_TP02, MODEL_TAP03, MODEL_MP05]:
            self.interpreter.value.write(param='k_AB', file='k_AB.out', dir=path, force=True)
            self.interpreter.value.write(param='kex', file='kex.out', dir=path, force=True)
            self.interpreter.value.write(param='tex', file='tex.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='k_AB', file='k_AB.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='kex', file='kex.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='tex', file='tex.agr', dir=path, force=True)
        if model in [MODEL_NS_MMQ_3SITE, MODEL_NS_MMQ_3SITE_LINEAR, MODEL_NS_R1RHO_3SITE, MODEL_NS_R1RHO_3SITE_LINEAR]:
            self.interpreter.value.write(param='kex_AB', file='kex_AB.out', dir=path, force=True)
            self.interpreter.value.write(param='kex_BC', file='kex_BC.out', dir=path, force=True)
            self.interpreter.value.write(param='kex_AC', file='kex_AC.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='kex_AB', file='kex_AB.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='kex_BC', file='kex_BC.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='kex_AC', file='kex_AC.agr', dir=path, force=True)

        # The k_AB parameter.
        if model in [None, MODEL_TSMFK01]:
            self.interpreter.value.write(param='k_AB', file='k_AB.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='k_AB', file='k_AB.agr', dir=path, force=True)

        # The kB and kC parameters.
        if model in [None, MODEL_LM63_3SITE]:
            self.interpreter.value.write(param='kB', file='kB.out', dir=path, force=True)
            self.interpreter.value.write(param='kC', file='kC.out', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='kB', file='kB.agr', dir=path, force=True)
            self.interpreter.grace.write(x_data_type='res_num', y_data_type='kC', file='kC.agr', dir=path, force=True)

        # Minimisation statistics.
        if not (model == 'R2eff' and has_fixed_time_exp_type()):
            self.interpreter.grace.write(y_data_type='chi2', file='chi2.agr', dir=path, force=True)

        # Finally save the results.  This is last to allow the continuation of an interrupted analysis while ensuring that all results files have been created.
        self.interpreter.results.write(file='results', dir=path, force=True)
Exemplo n.º 5
0
class Relax_fit:
    def __init__(self, pipe_name=None, pipe_bundle=None, file_root='rx', results_dir=None, grid_inc='11', mc_sim_num=500, view_plots=True):
        """Perform relaxation curve fitting.

        To use this auto-analysis, a data pipe with all the required data needs to be set up.  This data pipe should contain the following:

            - All the spins loaded.
            - Unresolved spins deselected.
            - All the peak intensities loaded and relaxation delay times set.
            - Either the baseplane noise RMDS values should be set or replicated spectra loaded.

        @keyword pipe_name:     The name of the data pipe containing all of the data for the analysis.
        @type pipe_name:        str
        @keyword pipe_bundle:   The data pipe bundle to associate all spawned data pipes with.
        @type pipe_bundle:      str
        @keyword file_root:     File root of the output filea.
        @type file_root:        str
        @keyword results_dir:   The directory where results files are saved.
        @type results_dir:      str
        @keyword grid_inc:      Number of grid search increments.
        @type grid_inc:         int
        @keyword mc_sim_num:    The number of Monte Carlo simulations to be used for error analysis at the end of the analysis.
        @type mc_sim_num:       int
        @keyword view_plots:    Flag to automatically view grace plots after calculation.
        @type view_plots:       bool
        """

        # Execution lock.
        status.exec_lock.acquire(pipe_bundle, mode='auto-analysis')

        # Set up the analysis status object.
        status.init_auto_analysis(pipe_bundle, type='relax_fit')
        status.current_analysis = pipe_bundle

        # Store the args.
        self.pipe_name = pipe_name
        self.pipe_bundle = pipe_bundle
        self.file_root = file_root
        self.results_dir = results_dir
        if self.results_dir:
            self.grace_dir = results_dir + sep + 'grace'
        else:
            self.grace_dir = 'grace'
        self.mc_sim_num = mc_sim_num
        self.grid_inc = grid_inc
        self.view_plots = view_plots

        # Data checks.
        self.check_vars()

        # Set the data pipe to the current data pipe.
        if self.pipe_name != cdp_name():
            switch(self.pipe_name)

        # Load the interpreter.
        self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True)
        self.interpreter.populate_self()
        self.interpreter.on(verbose=False)

        # Execute.
        self.run()

        # Finish and unlock execution.
        status.auto_analysis[self.pipe_bundle].fin = True
        status.current_analysis = None
        status.exec_lock.release()


    def run(self):
        """Set up and run the curve-fitting."""

        # Peak intensity error analysis.
        self.interpreter.spectrum.error_analysis()

        # Set the relaxation curve type.
        self.interpreter.relax_fit.select_model('exp')

        # Grid search.
        self.interpreter.grid_search(inc=self.grid_inc)

        # Minimise.
        self.interpreter.minimise('simplex', scaling=False, constraints=False)

        # Monte Carlo simulations.
        self.interpreter.monte_carlo.setup(number=self.mc_sim_num)
        self.interpreter.monte_carlo.create_data()
        self.interpreter.monte_carlo.initial_values()
        self.interpreter.minimise('simplex', scaling=False, constraints=False)
        self.interpreter.monte_carlo.error_analysis()

        # Save the relaxation rates.
        self.interpreter.value.write(param='rx', file=self.file_root+'.out', dir=self.results_dir, force=True)

        # Save the results.
        self.interpreter.results.write(file='results', dir=self.results_dir, force=True)

        # Create Grace plots of the data.
        self.interpreter.grace.write(y_data_type='chi2', file='chi2.agr', dir=self.grace_dir, force=True)    # Minimised chi-squared value.
        self.interpreter.grace.write(y_data_type='i0', file='i0.agr', dir=self.grace_dir, force=True)    # Initial peak intensity.
        self.interpreter.grace.write(y_data_type='rx', file=self.file_root+'.agr', dir=self.grace_dir, force=True)    # Relaxation rate.
        self.interpreter.grace.write(x_data_type='relax_times', y_data_type='intensities', file='intensities.agr', dir=self.grace_dir, force=True)    # Average peak intensities.
        self.interpreter.grace.write(x_data_type='relax_times', y_data_type='intensities', norm=True, file='intensities_norm.agr', dir=self.grace_dir, force=True)    # Average peak intensities (normalised).

        # Display the Grace plots if selected.
        if self.view_plots:
            self.interpreter.grace.view(file='chi2.agr', dir=self.grace_dir)
            self.interpreter.grace.view(file='i0.agr', dir=self.grace_dir)
            self.interpreter.grace.view(file=self.file_root+'.agr', dir=self.grace_dir)
            self.interpreter.grace.view(file='intensities.agr', dir=self.grace_dir)
            self.interpreter.grace.view(file='intensities_norm.agr', dir=self.grace_dir)

        # Save the program state.
        self.interpreter.state.save(state=self.file_root+'.save', dir=self.results_dir, force=True)


    def check_vars(self):
        """Check that the user has set the variables correctly."""

        # The pipe name.
        if not has_pipe(self.pipe_name):
            raise RelaxNoPipeError(self.pipe_name)