Пример #1
0
def run(data_path, grid_search_path, ensemble_output_path, score_output_path,
        number_of_partitions, number_of_iterations, best_proportion,
        used_proportion):

    # Read partitioned input data
    data = read_partitioned_data(data_path, number_of_iterations,
                                 number_of_partitions)

    # Read true values from the partitioned data set
    true_values = get_true_values(data)

    # Read the grid search results as input data
    results = read_data(grid_search_path)

    # Construct the ensemble based on the results of the grid search and the
    # proportion parameters passed to this script
    ensemble = construct_ensemble(results, best_proportion, used_proportion)

    # Retrieve the classification results from the ensemble based on a
    # popularity vote
    predicted_values = ensemble_vote(ensemble)

    # Score the classification results of the ensemble against the true values
    result = Result()
    result.add_values(true_values, predicted_values)
    result.calculate()

    # Output the ensemble into the specified file
    write_data(ensemble_output_path, ensemble)

    # Output the ensemble score into the specified file
    write_data(score_output_path, result)
Пример #2
0
def write(align_id=None, file=None, dir=None, bc=False, force=False):
    """Display the RDC data corresponding to the alignment ID.

    @keyword align_id:  The alignment tensor ID string.
    @type align_id:     str
    @keyword file:      The file name or object to write to.
    @type file:         str or file object
    @keyword dir:       The name of the directory to place the file into (defaults to the current directory).
    @type dir:          str
    @keyword bc:        The back-calculation flag which if True will cause the back-calculated rather than measured data to be written.
    @type bc:           bool
    @keyword force:     A flag which if True will cause any pre-existing file to be overwritten.
    @type force:        bool
    """

    # Check the pipe setup.
    check_pipe_setup(sequence=True, rdc_id=align_id, rdc=True)

    # Open the file for writing.
    file = open_write_file(file, dir, force)

    # Loop over the interatomic data containers and collect the data.
    data = []
    for interatom in interatomic_loop():
        # Skip deselected containers.
        if not interatom.select:
            continue

        # Skip containers with no RDCs.
        if not bc and (not hasattr(interatom, 'rdc') or align_id not in interatom.rdc.keys()):
            continue
        elif bc and (not hasattr(interatom, 'rdc_bc') or align_id not in interatom.rdc_bc.keys()):
            continue

        # Append the spin data.
        data.append([])
        data[-1].append(interatom.spin_id1)
        data[-1].append(interatom.spin_id2)

        # Handle the missing rdc_data_types variable.
        data_type = None
        if hasattr(interatom, 'rdc_data_types'):
            data_type = interatom.rdc_data_types[align_id]

        # The value.
        if bc:
            data[-1].append(repr(convert(interatom.rdc_bc[align_id], data_type, align_id)))
        else:
            data[-1].append(repr(convert(interatom.rdc[align_id], data_type, align_id)))

        # The error.
        if hasattr(interatom, 'rdc_err') and align_id in interatom.rdc_err.keys():
            data[-1].append(repr(convert(interatom.rdc_err[align_id], data_type, align_id)))
        else:
            data[-1].append(repr(None))

    # Write out.
    write_data(out=file, headings=["Spin_ID1", "Spin_ID2", "RDCs", "RDC_error"], data=data)
def run(input_path, output_path):

    # Read the results as input data
    results = read_data(input_path)

    # Retrieve the best result
    best_result = sorted(results, key=lambda k: k.average_f1(),
                         reverse=True)[0]

    # Output the score into the specified file
    write_data(output_path, best_result)
Пример #4
0
    def create_par_chi2(self, file_prefix, par_chi2_vals):
        """Function for creating file with parameters and the chi2 value."""

        # Print out.
        print("\nCreating the file with parameters and the chi2 value.")

        # Open the file.
        par_file = open_write_file(file_name=file_prefix + '.par',
                                   dir=self.dir,
                                   force=True)

        # Copy the nested list to sort it.
        par_chi2_vals_sort = deepcopy(par_chi2_vals)

        # Then sort the value.
        par_chi2_vals_sort.sort(key=lambda values: values[4])

        # Collect the data structure, which is a list of list of strings.
        data = []
        for i, line in enumerate(par_chi2_vals):
            line_sort = par_chi2_vals_sort[i]

            # Convert values to strings.
            line_str = ["%3.5f" % j for j in line]
            line_sort_str = ["%3.5f" % j for j in line_sort]

            # Convert the index from float to index.
            line_str[0] = "%i" % line[0]
            line_sort_str[0] = "%i" % line_sort[0]

            # Merge the two lists and append to data.
            data_list = line_str + line_sort_str
            data.append(data_list)

        # Make the headings.
        headings = ['i'] + self.params + ['chi2']
        headings += headings

        # Add "_sort" to headings.
        headings[5] = headings[5] + "_sort"
        headings[6] = headings[6] + "_sort"
        headings[7] = headings[7] + "_sort"
        headings[8] = headings[8] + "_sort"
        headings[9] = headings[9] + "_sort"

        # Write the parameters and chi2 values to file.
        write_data(out=par_file, headings=headings, data=data)

        # Close the file.
        par_file.close()
Пример #5
0
def show_apod_rmsd_to_file(file_name=None, dir=None, path_to_command='showApod', outdir=None, force=False):
    """Extract showApod 'Noise Std Dev' from showApod, and write to file with same filename and ending '.rmsd'

    @keyword file:              The filename of the NMRPipe fourier transformed file.
    @type file:                 str
    @keyword dir:               The directory where the file is located.
    @type dir:                  str
    @keyword path_to_command:   If showApod not in PATH, then specify absolute path as: /path/to/showApod
    @type path_to_command:      str
    @keyword outdir:            The directory where to write the file.  If 'None', then write in same directory.
    @type outdir:               str
    @param force:               Boolean argument which if True causes the file to be overwritten if it already exists.
    @type force:                bool
    @return:                    Write the 'Noise Std Dev' from showApod to a file with same file filename, with ending '.rmsd'.  This will be a file path.
    @rtype:                     str
    """

    # Call extract function.
    apod_rmsd = show_apod_rmsd(file_name=file_name, dir=dir, path_to_command=path_to_command)

    # Get the filename striped of extension details.
    file_name_root = file_root(file_name)

    # Define extension.
    extension = ".rmsd"

    # Define file name for writing.
    file_name_out = file_name_root + extension

    # Define folder to write to.
    if outdir == None:
        write_outdir = dir
    else:
        write_outdir = outdir

    # Open file for writing,
    wfile, wfile_path = open_write_file(file_name=file_name_out, dir=write_outdir, force=force, verbosity=1, return_path=True)

    # Write to file.
    out_write_data = [['%s'%apod_rmsd]]

    # Write data
    write_data(out=wfile, headings=None, data=out_write_data, sep=None)

    # Close file.
    wfile.close()

    # Return path to file.
    return wfile_path
Пример #6
0
def set_dist(spin_id1=None, spin_id2=None, ave_dist=None, unit='meter'):
    """Set up the magnetic dipole-dipole interaction.

    @keyword spin_id1:      The spin identifier string of the first spin of the pair.
    @type spin_id1:         str
    @keyword spin_id2:      The spin identifier string of the second spin of the pair.
    @type spin_id2:         str
    @keyword ave_dist:      The r^-3 averaged interatomic distance.
    @type ave_dist:         float
    @keyword unit:          The measurement unit.  This can be either 'meter' or 'Angstrom'.
    @type unit:             str
    """

    # Check the units.
    if unit not in ['meter', 'Angstrom']:
        raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit)

    # Unit conversion.
    if unit == 'Angstrom':
        ave_dist = ave_dist * 1e-10

    # Generate the selection objects.
    sel_obj1 = Selection(spin_id1)
    sel_obj2 = Selection(spin_id2)

    # Loop over the interatomic containers.
    data = []
    for interatom in interatomic_loop():
        # Get the spin info.
        mol_name1, res_num1, res_name1, spin1 = return_spin(spin_hash=interatom._spin_hash1, full_info=True)
        mol_name2, res_num2, res_name2, spin2 = return_spin(spin_hash=interatom._spin_hash2, full_info=True)

        # No match, either way.
        if not (sel_obj1.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj2.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)) and not (sel_obj2.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj1.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)):
            continue

        # Store the averaged distance.
        interatom.r = ave_dist

        # Store the data for the printout.
        data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)])

    # No data, so fail!
    if not len(data):
        raise RelaxError("No data could be set.")

    # Print out.
    print("The following averaged distances have been set:\n")
    write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
Пример #7
0
def show_apod_rmsd_to_file(file_name=None, dir=None, path_to_command='showApod', outdir=None, force=False):
    """Extract showApod 'Noise Std Dev' from showApod, and write to file with same filename and ending '.rmsd'

    @keyword file:              The filename of the NMRPipe fourier transformed file.
    @type file:                 str
    @keyword dir:               The directory where the file is located.
    @type dir:                  str
    @keyword path_to_command:   If showApod not in PATH, then specify absolute path as: /path/to/showApod
    @type path_to_command:      str
    @keyword outdir:            The directory where to write the file.  If 'None', then write in same directory.
    @type outdir:               str
    @param force:               Boolean argument which if True causes the file to be overwritten if it already exists.
    @type force:                bool
    @return:                    Write the 'Noise Std Dev' from showApod to a file with same file filename, with ending '.rmsd'.  This will be a file path.
    @rtype:                     str
    """

    # Call extract function.
    apod_rmsd = show_apod_rmsd(file_name=file_name, dir=dir, path_to_command=path_to_command)

    # Get the filename striped of extension details.
    file_name_root = file_root(file_name)

    # Define extension.
    extension = ".rmsd"

    # Define file name for writing.
    file_name_out = file_name_root + extension

    # Define folder to write to.
    if outdir == None:
        write_outdir = dir
    else:
        write_outdir = outdir

    # Open file for writing,
    wfile, wfile_path = open_write_file(file_name=file_name_out, dir=write_outdir, force=force, verbosity=1, return_path=True)

    # Write to file.
    out_write_data = [['%s'%apod_rmsd]]

    # Write data
    write_data(out=wfile, headings=None, data=out_write_data, sep=None)

    # Close file.
    wfile.close()

    # Return path to file.
    return wfile_path
Пример #8
0
def set_dist(spin_id1=None, spin_id2=None, ave_dist=None, unit='meter'):
    """Set up the magnetic dipole-dipole interaction.

    @keyword spin_id1:      The spin identifier string of the first spin of the pair.
    @type spin_id1:         str
    @keyword spin_id2:      The spin identifier string of the second spin of the pair.
    @type spin_id2:         str
    @keyword ave_dist:      The r^-3 averaged interatomic distance.
    @type ave_dist:         float
    @keyword unit:          The measurement unit.  This can be either 'meter' or 'Angstrom'.
    @type unit:             str
    """

    # Check the units.
    if unit not in ['meter', 'Angstrom']:
        raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit)

    # Unit conversion.
    if unit == 'Angstrom':
        ave_dist = ave_dist * 1e-10

    # Generate the selection objects.
    sel_obj1 = Selection(spin_id1)
    sel_obj2 = Selection(spin_id2)

    # Loop over the interatomic containers.
    data = []
    for interatom in interatomic_loop():
        # Get the spin info.
        mol_name1, res_num1, res_name1, spin1 = return_spin(interatom.spin_id1, full_info=True)
        mol_name2, res_num2, res_name2, spin2 = return_spin(interatom.spin_id2, full_info=True)

        # No match, either way.
        if not (sel_obj1.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj2.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)) and not (sel_obj2.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj1.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)):
            continue

        # Store the averaged distance.
        interatom.r = ave_dist

        # Store the data for the printout.
        data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)])

    # No data, so fail!
    if not len(data):
        raise RelaxError("No data could be set.")

    # Print out.
    print("The following averaged distances have been set:\n")
    write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
Пример #9
0
    def create_par_chi2(self, file_prefix, par_chi2_vals):
        """Function for creating file with parameters and the chi2 value."""

        # Print out.
        print("\nCreating the file with parameters and the chi2 value.")

        # Open the file.
        par_file = open_write_file(file_name=file_prefix+'.par', dir=self.dir, force=True)

        # Copy the nested list to sort it.
        par_chi2_vals_sort = deepcopy(par_chi2_vals)

        # Then sort the value.
        par_chi2_vals_sort.sort(key=lambda values: values[4])

        # Collect the data structure, which is a list of list of strings.
        data = []
        for i, line in enumerate(par_chi2_vals):
            line_sort = par_chi2_vals_sort[i]

            # Convert values to strings.
            line_str = ["%3.5f"%j for j in line]
            line_sort_str = ["%3.5f"%j for j in line_sort]

            # Convert the index from float to index.
            line_str[0] = "%i" % line[0]
            line_sort_str[0] = "%i" % line_sort[0]

            # Merge the two lists and append to data.
            data_list = line_str + line_sort_str
            data.append(data_list)

        # Make the headings.
        headings = ['i'] + self.params + ['chi2']
        headings += headings

        # Add "_sort" to headings.
        headings[5] = headings[5] + "_sort"
        headings[6] = headings[6] + "_sort"
        headings[7] = headings[7] + "_sort"
        headings[8] = headings[8] + "_sort"
        headings[9] = headings[9] + "_sort"

        # Write the parameters and chi2 values to file.
        write_data(out=par_file, headings=headings, data=data)

        # Close the file.
        par_file.close()
Пример #10
0
def run(input_path, output_path, number_of_partitions, number_of_iterations,
        number_of_trials):

    # Read partitioned input data
    data = read_partitioned_data(input_path, number_of_iterations,
                                 number_of_partitions)

    # Define classification models and their corresponding parameters
    models = {
        'svm': {
            'C': (int, (5, 15)),
            'decision_function_shape': (tuple, ('ovo', 'ovr', None))
        },
        'random_forest': {
            'max_features': (int, (5, 15)),
            'class_weight': (tuple, ('balanced', 'balanced_subsample'))
        }
    }

    # Initialise the grid search result list
    results = []

    # If the number of trials is set, use grid search.
    if number_of_trials:

        # Iterate trough each classification model defined.
        for algorithm, parameter_model in models.items():

            # Perform grid search and append the results to the complete result
            # list
            results += grid_search(data, algorithm, parameter_model,
                                   number_of_trials)

    # If the number of trials is not set, use regular classification.
    else:

        # Iterate through each algorithm defined.
        for algorithm in models.keys():

            # Perform classification and append the results to the complete
            # result list
            results.append(classify(data,
                                    classifier_from_algorithm[algorithm]))

    # Output the grid search results into the specified file
    write_data(output_path, results)
Пример #11
0
def write(file=None, dir=None, force=False):
    """Write the J coupling data to file.

    @keyword file:      The file name or object to write to.
    @type file:         str or file object
    @keyword dir:       The name of the directory to place the file into (defaults to the current directory).
    @type dir:          str
    @keyword force:     A flag which if True will cause any pre-existing file to be overwritten.
    @type force:        bool
    """

    # Check the pipe setup.
    check_pipe_setup(sequence=True, j=True)

    # Open the file for writing.
    file = open_write_file(file, dir, force)

    # Loop over the interatomic data containers and collect the data.
    data = []
    for interatom in interatomic_loop():
        # Skip deselected containers.
        if not interatom.select:
            continue

        # Skip containers with no J coupling.
        if not hasattr(interatom, 'j_coupling'):
            continue

        # Append the spin data.
        data.append([])
        data[-1].append(interatom.spin_id1)
        data[-1].append(interatom.spin_id2)

        # The value.
        data[-1].append(repr(interatom.j_coupling))

        # The error.
        if hasattr(interatom, 'j_coupling_err'):
            data[-1].append(repr(interatom.j_coupling_err))
        else:
            data[-1].append(repr(None))

    # Write out.
    write_data(out=file, headings=["Spin_ID1", "Spin_ID2", "J coupling", "J coupling"], data=data)
Пример #12
0
def write(file=None, dir=None, force=False):
    """Write the J coupling data to file.

    @keyword file:      The file name or object to write to.
    @type file:         str or file object
    @keyword dir:       The name of the directory to place the file into (defaults to the current directory).
    @type dir:          str
    @keyword force:     A flag which if True will cause any pre-existing file to be overwritten.
    @type force:        bool
    """

    # Check the pipe setup.
    check_pipe_setup(sequence=True, j=True)

    # Open the file for writing.
    file = open_write_file(file, dir, force)

    # Loop over the interatomic data containers and collect the data.
    data = []
    for interatom in interatomic_loop():
        # Skip deselected containers.
        if not interatom.select:
            continue

        # Skip containers with no J coupling.
        if not hasattr(interatom, 'j_coupling'):
            continue

        # Append the spin data.
        data.append([])
        data[-1].append(interatom.spin_id1)
        data[-1].append(interatom.spin_id2)

        # The value.
        data[-1].append(repr(interatom.j_coupling))

        # The error.
        if hasattr(interatom, 'j_coupling_err'):
            data[-1].append(repr(interatom.j_coupling_err))
        else:
            data[-1].append(repr(None))

    # Write out.
    write_data(out=file, headings=["Spin_ID1", "Spin_ID2", "J coupling", "J coupling"], data=data)
Пример #13
0
def aic():
    """Calculate and store Akaike's Information Criterion (AIC) for each model."""

    # Checks.
    check_pipe()

    # The specific analysis API object.
    api = return_api()

    # Calculate the chi2.
    print(
        "Calculating the chi-squared value for the current parameter values.")
    api.calculate()

    # Loop over the base models.
    print("\nStoring the model statistics.")
    for model_info in api.model_loop():
        # Title printout.
        api.print_model_title(model_info=model_info)

        # Get the model statistics.
        k, n, chi2 = api.model_statistics(model_info=model_info)

        # Calculate the AIC value.
        aic = chi2 + 2.0 * k

        # The model container.
        container = api.get_model_container(model_info=model_info)

        # Store the statistics.
        container.chi2 = chi2
        container.num_params = k
        container.aic = aic

        # Statistics printout.
        data = [["Chi-squared value:", "%20f" % chi2],
                ["Number of parameters (k):",
                 "%20i" % k],
                ["Akaike's Information Criterion (AIC):",
                 "%20f" % aic]]
        write_data(out=sys.stdout, data=data)
Пример #14
0
def display(sort=False, rev=False):
    """Print the details of all the data pipes."""

    # Acquire the pipe lock, and make sure it is finally released.
    status.pipe_lock.acquire(sys._getframe().f_code.co_name)
    try:
        # Loop over the data pipes.
        pipe_names = []
        for pipe_name_i in ds:
            pipe_names.append(pipe_name_i)

        if sort:
            pipe_names = sort_filenames(filenames=pipe_names, rev=rev)

        data = []
        for pipe_name in pipe_names:
            # The current data pipe.
            current = ''
            if pipe_name == cdp_name():
                current = '*'

            # Store the data for the print out.
            data.append([
                repr(pipe_name),
                get_type(pipe_name),
                repr(get_bundle(pipe_name)), current
            ])

    # Release the lock.
    finally:
        status.pipe_lock.release(sys._getframe().f_code.co_name)

    # Print out.
    write_data(
        out=sys.stdout,
        headings=["Data pipe name", "Data pipe type", "Bundle", "Current"],
        data=data)

    # Return data
    return data
Пример #15
0
def aic():
    """Calculate and store Akaike's Information Criterion (AIC) for each model."""

    # Checks.
    check_pipe()

    # The specific analysis API object.
    api = return_api()

    # Calculate the chi2.
    print("Calculating the chi-squared value for the current parameter values.")
    api.calculate()

    # Loop over the base models.
    print("\nStoring the model statistics.")
    for model_info in api.model_loop():
        # Title printout.
        api.print_model_title(model_info=model_info)

        # Get the model statistics.
        k, n, chi2 = api.model_statistics(model_info=model_info)

        # Calculate the AIC value.
        aic = chi2 + 2.0*k

        # The model container.
        container = api.get_model_container(model_info=model_info)

        # Store the statistics.
        container.chi2 = chi2
        container.num_params = k
        container.aic = aic

        # Statistics printout.
        data = [
            ["Chi-squared value:", "%20f" % chi2],
            ["Number of parameters (k):", "%20i" % k],
            ["Akaike's Information Criterion (AIC):", "%20f" % aic]
        ]
        write_data(out=sys.stdout, data=data)
def run(input_path, output_path, classes):

    # Read the input data set from the specified input path
    input_data = read_data(input_path)

    # Change the list of classes to a set
    classes = set(classes)

    # Construct the output data set, filtering to only have the selected classes
    output_data = {
        'subjects': input_data['subjects'],
        'areas': input_data['areas'],
        'image_category': [],
        'neural_responses': []
    }
    for i in range(len(input_data['image_category'])):
        if input_data['image_category'][i] in classes:
            for field in ['image_category', 'neural_responses']:
                output_data[field].append(input_data[field][i])

    # Write the output data set to the specified output path
    write_data(output_path, output_data)
Пример #17
0
def display():
    """Print the details of all the data pipes."""

    # Acquire the pipe lock, and make sure it is finally released.
    status.pipe_lock.acquire(sys._getframe().f_code.co_name)
    try:
        # Loop over the data pipes.
        data = []
        for pipe_name in ds:
            # The current data pipe.
            current = ''
            if pipe_name == cdp_name():
                current = '*'

            # Store the data for the print out.
            data.append([repr(pipe_name), get_type(pipe_name), repr(get_bundle(pipe_name)), current])

    # Release the lock.
    finally:
        status.pipe_lock.release(sys._getframe().f_code.co_name)

    # Print out.
    write_data(out=sys.stdout, headings=["Data pipe name", "Data pipe type", "Bundle", "Current"], data=data)
Пример #18
0
def model_statistics():
    """Calculate and store the model statistics."""

    # Checks.
    check_pipe()

    # The specific analysis API object.
    api = return_api()

    # Calculate the chi2.
    print("Calculating the chi-squared value for the current parameter values.")
    api.calculate()

    # Loop over the base models.
    print("\nStoring the model statistics.")
    for model_info in api.model_loop():
        # Title printout.
        api.print_model_title(model_info=model_info)

        # Get the model statistics.
        k, n, chi2 = api.model_statistics(model_info=model_info)

        # The model container.
        container = api.get_model_container(model_info=model_info)

        # Store the values.
        container.chi2 = chi2
        container.num_params = k
        container.num_data_points = n

        # Statistics printout.
        data = [
            ['Chi-squared value:', "%20f" % chi2],
            ['Number of parameters (k):', "%20i" % k],
            ['Number of data points (n):', "%20i" % n]
        ]
        write_data(out=sys.stdout, data=data)
Пример #19
0
def model_statistics():
    """Calculate and store the model statistics."""

    # Checks.
    check_pipe()

    # The specific analysis API object.
    api = return_api()

    # Calculate the chi2.
    print(
        "Calculating the chi-squared value for the current parameter values.")
    api.calculate()

    # Loop over the base models.
    print("\nStoring the model statistics.")
    for model_info in api.model_loop():
        # Title printout.
        api.print_model_title(model_info=model_info)

        # Get the model statistics.
        k, n, chi2 = api.model_statistics(model_info=model_info)

        # The model container.
        container = api.get_model_container(model_info=model_info)

        # Store the values.
        container.chi2 = chi2
        container.num_params = k
        container.num_data_points = n

        # Statistics printout.
        data = [['Chi-squared value:', "%20f" % chi2],
                ['Number of parameters (k):',
                 "%20i" % k], ['Number of data points (n):',
                               "%20i" % n]]
        write_data(out=sys.stdout, data=data)
def run(raw_input_path, output_path_recall, output_path_precision,
        output_path_f1, time_windows, frequency_bands):

    # Convert time windows to integers
    time_windows = [int(time_window) for time_window in time_windows]

    # Initialise the integrated score data dictionaries
    integrated_recall = {}
    integrated_precision = {}
    integrated_f1 = {}
    for time_window in time_windows:
        integrated_recall[time_window] = {}
        integrated_precision[time_window] = {}
        integrated_f1[time_window] = {}
    for time_window in time_windows:
        for frequency_band in frequency_bands:
            integrated_recall[time_window][frequency_band] = None
            integrated_precision[time_window][frequency_band] = None
            integrated_f1[time_window][frequency_band] = None

    # Read F1-scores from the input files into the integrated data dictionary

    # Iterate through each time window and frequency band pair
    for time_window in time_windows:
        for frequency_band in frequency_bands:

            # Construct the input file path
            input_path = raw_input_path.replace('TIMEWINDOW', str(time_window))\
                .replace('FREQUENCYBAND', frequency_band)

            # Read the input file
            input_data = read_data(input_path)

            # Add the F1-score received from the data into the integrated data
            # dictionary
            integrated_recall[time_window][
                frequency_band] = input_data.average_recall()
            integrated_precision[time_window][
                frequency_band] = input_data.average_precision()
            integrated_f1[time_window][frequency_band] = input_data.average_f1(
            )

    # Output the integrated scores into the specified files
    write_data(output_path_recall, integrated_recall)
    write_data(output_path_precision, integrated_precision)
    write_data(output_path_f1, integrated_f1)
Пример #21
0
def run(input_path, output_path, cv_amount, use_even_distribution):

    # Read the data set
    data = read_data(input_path)

    # Find the number of images in the data set
    number_of_images = len(data['image_category'])

    # Find all image classes in the data set
    classes = sorted(set(data['image_category']))

    # Initialise the list of partitioned indices
    partitioned_indices = [[] for i in range(cv_amount)]

    # If even distribution is set to be used, partition data within each class
    # separately and merge the resulting partitions into the partitioned
    # indices list, so the image class distribution in each partition would be
    # roughly the same.
    if use_even_distribution:

        # Construct a list of image indices corresponding to each image class
        indices = {}
        for image_class in classes:
            indices[image_class] = []
        for i in range(number_of_images):
            indices[data['image_category'][i]].append(i)

        # Randomly split each of these lists into k nearly equal parts, and
        # merge them by partitions
        for image_class in classes:

            # Partition the indices list for the current image class into k
            # nearly equal parts
            partitions_list = partition_list(indices[image_class], cv_amount)

            # Shuffle the partition list to ensure that cumulative partitions
            # after merging by partitions are roughly of equal size
            shuffle(partitions_list)

            # Merge the partitioned indices list for the current image class
            # into the general partitioned indices list by partitions
            for i in range(cv_amount):
                partitioned_indices[i] += partitions_list[i]

    # If even distribution is not set to be used, partition data randomly.
    else:

        # Partition the indices list into k nearly equal parts
        partitioned_indices = partition_list(range(number_of_images),
                                             cv_amount)

    # Sort all of the partitions
    for partition in partitioned_indices:
        partition.sort()

    # Partition data
    partitions = []
    for i in range(cv_amount):
        partitions.append({
            'subjects':
            data['subjects'],
            'areas':
            data['areas'],
            'image_category':
            [data['image_category'][j] for j in partitioned_indices[i]],
            'neural_responses':
            [data['neural_responses'][j] for j in partitioned_indices[i]]
        })

    # Save partitioned data
    for i in range(cv_amount):
        write_data(add_suffix_to_path(output_path, '-', i + 1), partitions[i])
        #print(k_stat, n_stat, chi2, "point is %s=%3.3f, %s=%3.3f"% (params[0], values[0], params[1], values[1]))

        # Progress incrementation and printout.
        percent = percent + percent_inc
        print(
            "%-10s%8.3f%-8s%-8g" %
            ("Progress:", percent, "%,  " + repr(values) + ",  f(x): ", chi2))

        # Append to data.
        data.append(["%3.3f" % values[0], "%3.3f" % values[1], "%3.3f" % chi2])

        # Save all values of chi2. To help find reasonale level for the Innermost, Inner, Middle and Outer Isosurface.
        all_chi.append(chi2)

        # Increment the value of the second parameter.
        values[1] = values[1] + step_size[1]

    # Increment the value of the first parameter.
    values[0] = values[0] + step_size[0]

print("\nMin cluster point %s=%3.3f, %s=%3.3f, with chi2=%3.3f" %
      (params[0], pcm[0], params[1], pcm[1], pre_chi2))

# Open file
file_name = '1_create_surface_data_S65_dw_r2a_FT128.txt'
surface_file = open_write_file(file_name=file_name, dir=None, force=True)
write_data(out=surface_file, headings=headings, data=data)

# Close file
surface_file.close()
Пример #23
0
def read(file=None, dir=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True):
    """Read the peak intensity data.

    @keyword file:          The name of the file containing the peak intensities.
    @type file:             str
    @keyword dir:           The directory where the file is located.
    @type dir:              str
    @keyword spin_id_col:   The column containing the spin ID strings (used by the generic intensity file format).  If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
    @type spin_id_col:      int or None
    @keyword mol_name_col:  The column containing the molecule name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type mol_name_col:     int or None
    @keyword res_name_col:  The column containing the residue name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_name_col:     int or None
    @keyword res_num_col:   The column containing the residue number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_num_col:      int or None
    @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_name_col:    int or None
    @keyword spin_num_col:  The column containing the spin number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_num_col:     int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    @keyword spin_id:       The spin ID string used to restrict data loading to a subset of all spins.  If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
    @type spin_id:          None or str
    @keyword verbose:       A flag which if True will cause all chemical shift data loaded to be printed out.
    @type verbose:          bool
    """

    # Test if the current data pipe exists.
    check_pipe()

    # Test if sequence data is loaded.
    if not exists_mol_res_spin_data():
        raise RelaxNoSequenceError

    # Check the file name.
    if file == None:
        raise RelaxError("The file name must be supplied.")

    # Read the peak list data.
    peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)

    # Loop over the assignments.
    data = []
    data_flag = False
    for assign in peak_list:
        # Loop over the dimensions of the peak list.
        for i in range(peak_list.dimensionality):
            # Generate the spin_id.
            spin_id = generate_spin_id_unique(res_num=assign.res_nums[i], spin_name=assign.spin_names[i])

            # Get the spin container.
            spin = return_spin(spin_id)
            if not spin:
                warn(RelaxNoSpinWarning(spin_id))
                continue

            # Skip deselected spins.
            if not spin.select:
                continue

            # Store the shift.
            spin.chemical_shift = assign.shifts[i]

            # Switch the flag.
            data_flag = True

            # Append the data for printing out.
            data.append([spin_id, repr(spin.chemical_shift)])

    # No data.
    if not data_flag:
        raise RelaxError("No chemical shifts could be loaded from the peak list")

    # Print out.
    if verbose:
        print("\nThe following chemical shifts have been loaded into the relax data store:\n")
        write_data(out=sys.stdout, headings=["Spin_ID", "Chemical shift"], data=data)
Пример #24
0
def read_dist(file=None, dir=None, unit='meter', spin_id1_col=None, spin_id2_col=None, data_col=None, sep=None):
    """Set up the magnetic dipole-dipole interaction.

    @keyword file:          The name of the file to open.
    @type file:             str
    @keyword dir:           The directory containing the file (defaults to the current directory if None).
    @type dir:              str or None
    @keyword unit:          The measurement unit.  This can be either 'meter' or 'Angstrom'.
    @type unit:             str
    @keyword spin_id1_col:  The column containing the spin ID strings of the first spin.
    @type spin_id1_col:     int
    @keyword spin_id2_col:  The column containing the spin ID strings of the second spin.
    @type spin_id2_col:     int
    @keyword data_col:      The column containing the averaged distances in meters.
    @type data_col:         int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    """

    # Check the units.
    if unit not in ['meter', 'Angstrom']:
        raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit)

    # Test if the current data pipe exists.
    pipes.test()

    # Test if sequence data exists.
    if not exists_mol_res_spin_data():
        raise RelaxNoSequenceError

    # Extract the data from the file, and clean it up.
    file_data = extract_data(file, dir, sep=sep)
    file_data = strip(file_data, comments=True)

    # Loop over the RDC data.
    data = []
    for line in file_data:
        # Invalid columns.
        if spin_id1_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line))
            continue
        if spin_id2_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line))
            continue
        if data_col and data_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no data column can be found." % line))
            continue

        # Unpack.
        spin_id1 = line[spin_id1_col-1]
        spin_id2 = line[spin_id2_col-1]
        ave_dist = None
        if data_col:
            ave_dist = line[data_col-1]

        # Convert and check the value.
        if ave_dist != None:
            try:
                ave_dist = float(ave_dist)
            except ValueError:
                warn(RelaxWarning("The averaged distance of '%s' from the line %s is invalid." % (ave_dist, line)))
                continue

        # Unit conversion.
        if unit == 'Angstrom':
            ave_dist = ave_dist * 1e-10

        # Get the interatomic data container.
        interatom = return_interatom(spin_id1, spin_id2)

        # No container found, so create it.
        if interatom == None:
            interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2, verbose=True)

        # Store the averaged distance.
        interatom.r = ave_dist

        # Store the data for the printout.
        data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)])

    # No data, so fail!
    if not len(data):
        raise RelaxError("No data could be extracted from the file.")

    # Print out.
    print("The following averaged distances have been read:\n")
    write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
Пример #25
0
def pack_data(ri_id, ri_type, frq, values, errors, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, spin_id=None, gen_seq=False, verbose=True):
    """Pack the relaxation data into the data pipe and spin containers.

    The values, errors, and spin_ids arguments must be lists of equal length or None.  Each element i corresponds to a unique spin.

    @param ri_id:           The relaxation data ID string.
    @type ri_id:            str
    @param ri_type:         The relaxation data type, ie 'R1', 'R2', or 'NOE'.
    @type ri_type:          str
    @param frq:             The spectrometer proton frequency in Hz.
    @type frq:              float
    @keyword values:        The relaxation data for each spin.
    @type values:           None or list of float or float array
    @keyword errors:        The relaxation data errors for each spin.
    @type errors:           None or list of float or float array
    @keyword spin_ids:      The list of spin ID strings.  If the other spin identifiers are given, i.e. mol_names, res_nums, res_names, spin_nums, and/or spin_names, then this argument is not necessary.
    @type spin_ids:         None or list of str
    @keyword mol_names:     The list of molecule names used for creating the spin IDs (if not given) or for generating the sequence data.
    @type mol_names:        None or list of str
    @keyword res_nums:      The list of residue numbers used for creating the spin IDs (if not given) or for generating the sequence data.
    @type res_nums:         None or list of str
    @keyword res_names:     The list of residue names used for creating the spin IDs (if not given) or for generating the sequence data.
    @type res_names:        None or list of str
    @keyword spin_nums:     The list of spin numbers used for creating the spin IDs (if not given) or for generating the sequence data.
    @type spin_nums:        None or list of str
    @keyword spin_names:    The list of spin names used for creating the spin IDs (if not given) or for generating the sequence data.
    @type spin_names:       None or list of str
    @keyword gen_seq:       A flag which if True will cause the molecule, residue, and spin sequence data to be generated.
    @type gen_seq:          bool
    @keyword verbose:       A flag which if True will cause all relaxation data loaded to be printed out.
    @type verbose:          bool
    """

    # The number of spins.
    N = len(values)

    # Test the data.
    if errors != None and len(errors) != N:
        raise RelaxError("The length of the errors arg (%s) does not match that of the value arg (%s)." % (len(errors), N))
    if spin_ids and len(spin_ids) != N:
        raise RelaxError("The length of the spin ID strings arg (%s) does not match that of the value arg (%s)." % (len(mol_names), N))
    if mol_names and len(mol_names) != N:
        raise RelaxError("The length of the molecule names arg (%s) does not match that of the value arg (%s)." % (len(mol_names), N))
    if res_nums and len(res_nums) != N:
        raise RelaxError("The length of the residue numbers arg (%s) does not match that of the value arg (%s)." % (len(res_nums), N))
    if res_names and len(res_names) != N:
        raise RelaxError("The length of the residue names arg (%s) does not match that of the value arg (%s)." % (len(res_names), N))
    if spin_nums and len(spin_nums) != N:
        raise RelaxError("The length of the spin numbers arg (%s) does not match that of the value arg (%s)." % (len(spin_nums), N))
    if spin_names and len(spin_names) != N:
        raise RelaxError("The length of the spin names arg (%s) does not match that of the value arg (%s)." % (len(spin_names), N))

    # Generate some empty lists.
    if not mol_names:
        mol_names = [None] * N
    if not res_nums:
        res_nums = [None] * N
    if not res_names:
        res_names = [None] * N
    if not spin_nums:
        spin_nums = [None] * N
    if not spin_names:
        spin_names = [None] * N
    if errors == None:
        errors = [None] * N

    # Generate the spin IDs.
    if not spin_ids:
        spin_ids = []
        for i in range(N):
            spin_ids.append(generate_spin_id_unique(spin_num=spin_nums[i], spin_name=spin_names[i], res_num=res_nums[i], res_name=res_names[i], mol_name=mol_names[i]))

    # Initialise the global data for the current pipe if necessary.
    if not hasattr(cdp, 'ri_type'):
        cdp.ri_type = {}
    if not hasattr(cdp, 'ri_ids'):
        cdp.ri_ids = []

    # Set the spectrometer frequency.
    set_frequency(id=ri_id, frq=frq)

    # Update the global data.
    cdp.ri_ids.append(ri_id)
    cdp.ri_type[ri_id] = ri_type

    # The selection object.
    select_obj = None
    if spin_id:
        select_obj = Selection(spin_id)

    # Loop over the spin data.
    data = []
    for i in range(N):
        # Get the corresponding spin container.
        match_mol_names, match_res_nums, match_res_names, spins = return_spin_from_selection(spin_ids[i], full_info=True, multi=True)
        if spins in [None, []]:
            raise RelaxNoSpinError(spin_ids[i])

        # Remove non-matching spins.
        if select_obj:
            new_spins = []
            new_mol_names = []
            new_res_nums = []
            new_res_names = []
            new_ids = []
            for j in range(len(spins)):
                if select_obj.contains_spin(spin_num=spins[j].num, spin_name=spins[j].name, res_num=match_res_nums[j], res_name=match_res_names[j], mol=match_mol_names[j]):
                    new_spins.append(spins[j])
                    new_mol_names.append(match_mol_names[j])
                    new_res_nums.append(match_res_nums[j])
                    new_res_names.append(match_res_names[j])
                    new_ids.append(generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_num=spins[j].num, spin_name=spins[j].name))
            new_id = new_ids[0]

        # Aliases for normal operation.
        else:
            new_spins = spins
            new_mol_names = match_mol_names
            new_res_nums = match_res_nums
            new_res_names = match_res_names
            new_id = spin_ids[i]
            new_ids = None

        # Check that only a singe spin is present.
        if len(new_spins) > 1:
            if new_ids:
                raise RelaxMultiSpinIDError(spin_ids[i], new_ids)
            else:
                raise RelaxMultiSpinIDError(spin_ids[i], new_ids)
        if len(new_spins) == 0:
            raise RelaxNoSpinError(spin_ids[i])

        # Loop over the spins.
        for j in range(len(new_spins)):
            # No match to the selection.
            if select_obj and not select_obj.contains_spin(spin_num=new_spins[j].num, spin_name=new_spins[j].name, res_num=new_res_nums[j], res_name=new_res_names[j], mol=new_mol_names[j]):
                continue

            # Initialise the spin data if necessary.
            if not hasattr(new_spins[j], 'ri_data') or new_spins[j].ri_data == None:
                new_spins[j].ri_data = {}
            if not hasattr(new_spins[j], 'ri_data_err') or new_spins[j].ri_data_err == None:
                new_spins[j].ri_data_err = {}

            # Update all data structures.
            new_spins[j].ri_data[ri_id] = values[i]
            new_spins[j].ri_data_err[ri_id] = errors[i]

            # Append the data for printing out.
            data.append([new_id, repr(values[i]), repr(errors[i])])

    # Print out.
    if verbose:
        print("\nThe following %s MHz %s relaxation data with the ID '%s' has been loaded into the relax data store:\n" % (frq/1e6, ri_type, ri_id))
        write_data(out=sys.stdout, headings=["Spin_ID", "Value", "Error"], data=data)
Пример #26
0
def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
    """Model selection function.

    @keyword method:        The model selection method.  This can currently be one of:
                                - 'AIC', Akaike's Information Criteria.
                                - 'AICc', Small sample size corrected AIC.
                                - 'BIC', Bayesian or Schwarz Information Criteria.
                                - 'CV', Single-item-out cross-validation.
                            None of the other model selection techniques are currently supported.
    @type method:           str
    @keyword modsel_pipe:   The name of the new data pipe to be created by copying of the selected data pipe.
    @type modsel_pipe:      str
    @keyword bundle:        The optional data pipe bundle to associate the newly created pipe with.
    @type bundle:           str or None
    @keyword pipes:         A list of the data pipes to use in the model selection.
    @type pipes:            list of str
    """

    # Test if the pipe already exists.
    if has_pipe(modsel_pipe):
        raise RelaxPipeError(modsel_pipe)

    # Use all pipes.
    if pipes == None:
        # Get all data pipe names from the relax data store.
        pipes = pipe_names()

    # Select the model selection technique.
    if method == 'AIC':
        print("AIC model selection.")
        formula = aic
    elif method == 'AICc':
        print("AICc model selection.")
        formula = aicc
    elif method == 'BIC':
        print("BIC model selection.")
        formula = bic
    elif method == 'CV':
        print("CV model selection.")
        raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")
    else:
        raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")

    # No pipes.
    if len(pipes) == 0:
        raise RelaxError("No data pipes are available for use in model selection.")

    # Initialise.
    function_type = {}
    model_loop = {}
    model_type = {}
    duplicate_data = {}
    model_statistics = {}
    skip_function = {}
    modsel_pipe_exists = False

    # Cross validation setup.
    if isinstance(pipes[0], list):
        # No pipes.
        if len(pipes[0]) == 0:
            raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".")

        # Loop over the data pipes.
        for i in range(len(pipes)):
            for j in range(len(pipes[i])):
                # Specific functions.
                model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j]))
                model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j]))
                duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j]))
                model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j]))
                skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j]))

        # The model loop should be the same for all data pipes!
        for i in range(len(pipes)):
            for j in range(len(pipes[i])):
                if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]:
                    raise RelaxError("The models for each data pipes should be the same.")
        model_loop = model_loop[pipes[0][0]]

        # The model description.
        model_desc = get_specific_fn('model_desc', get_type(pipes[0]))

        # Global vs. local models.
        global_flag = False
        for i in range(len(pipes)):
            for j in range(len(pipes[i])):
                if model_type[pipes[i][j]]() == 'global':
                    global_flag = True

    # All other model selection setup.
    else:
        # Loop over the data pipes.
        for i in range(len(pipes)):
            # Specific functions.
            model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i]))
            model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i]))
            duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i]))
            model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i]))
            skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i]))

        model_loop = model_loop[pipes[0]]

        # The model description.
        model_desc = get_specific_fn('model_desc', get_type(pipes[0]))

        # Global vs. local models.
        global_flag = False
        for j in range(len(pipes)):
            if model_type[pipes[j]]() == 'global':
                global_flag = True


    # Loop over the base models.
    for model_info in model_loop():
        # Print out.
        print("\n")
        desc = model_desc(model_info)
        if desc:
            print(desc)

        # Initial model.
        best_model = None
        best_crit = 1e300
        data = []

        # Loop over the pipes.
        for j in range(len(pipes)):
            # Single-item-out cross validation.
            if method == 'CV':
                # Sum of chi-squared values.
                sum_crit = 0.0

                # Loop over the validation samples and sum the chi-squared values.
                for k in range(len(pipes[j])):
                    # Alias the data pipe name.
                    pipe = pipes[j][k]

                    # Switch to this pipe.
                    switch(pipe)

                    # Skip function.
                    if skip_function[pipe](model_info):
                        continue

                    # Get the model statistics.
                    k, n, chi2 = model_statistics[pipe](model_info)

                    # Missing data sets.
                    if k == None or n == None or chi2 == None:
                        continue

                    # Chi2 sum.
                    sum_crit = sum_crit + chi2

                # Cross-validation criterion (average chi-squared value).
                crit = sum_crit / float(len(pipes[j]))

            # Other model selection methods.
            else:
                # Reassign the pipe.
                pipe = pipes[j]

                # Switch to this pipe.
                switch(pipe)

                # Skip function.
                if skip_function[pipe](model_info):
                    continue

                # Get the model statistics.
                k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag)

                # Missing data sets.
                if k == None or n == None or chi2 == None:
                    continue

                # Calculate the criterion value.
                crit = formula(chi2, float(k), float(n))

                # Store the values for a later printout.
                data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit])

            # Select model.
            if crit < best_crit:
                best_model = pipe
                best_crit = crit

        # Write out the table.
        write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data)

        # Duplicate the data from the 'best_model' to the model selection data pipe.
        if best_model != None:
            # Print out of selected model.
            print("The model from the data pipe " + repr(best_model) + " has been selected.")

            # Switch to the selected data pipe.
            switch(best_model)

            # Duplicate.
            duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False)

            # Model selection pipe now exists.
            modsel_pipe_exists = True

        # No model selected.
        else:
            # Print out of selected model.
            print("No model has been selected.")

    # Switch to the model selection pipe.
    if modsel_pipe_exists:
        switch(modsel_pipe)

    # Bundle the data pipe.
    if bundle:
        pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
Пример #27
0
def signal_noise_ratio(verbose=True):
    """Calculate the signal to noise ratio per spin.

    @keyword verbose:       A flag which if True will print additional information out.
    @type verbose:          bool
    """

    # Tests.
    check_pipe()
    check_mol_res_spin_data()

    # Test if spectra have been loaded.
    if not hasattr(cdp, 'spectrum_ids'):
        raise RelaxError("No spectra have been loaded.")

    # Possible print.
    if verbose:
        print("\nThe following signal to noise ratios has been calculated:\n")

    # Set the spin specific signal to noise ratio.
    for spin, spin_id in spin_loop(return_id=True):
        # Skip deselected spins.
        if not spin.select:
            continue

        # Skip spins missing intensity data.
        if not hasattr(spin, 'peak_intensity'):
            continue

        # Test if error analysis has been performed.
        if not hasattr(spin, 'peak_intensity_err'):
            raise RelaxError("Intensity error analysis has not been performed.  Please see spectrum.error_analysis().")

        # If necessary, create the dictionary.
        if not hasattr(spin, 'sn_ratio'):
            spin.sn_ratio = {}

        # Loop over the ID.
        ids = []
        for id in spin.peak_intensity:
            # Append the ID to the list.
            ids.append(id)

            # Calculate the sn_ratio.
            pint = float(spin.peak_intensity[id])
            pint_err = float(spin.peak_intensity_err[id])
            sn_ratio = pint / pint_err

            # Assign the sn_ratio.
            spin.sn_ratio[id] = sn_ratio

        # Sort the ids alphanumeric.
        ids = sort_filenames(filenames=ids, rev=False)

        # Collect the data under sorted ids.
        data_i = []
        for id in ids:
            # Get the values.
            pint = spin.peak_intensity[id]
            pint_err = spin.peak_intensity_err[id]
            sn_ratio = spin.sn_ratio[id]

            # Store the data.
            data_i.append([id, repr(pint), repr(pint_err), repr(sn_ratio)])

        if verbose:
            section(file=sys.stdout, text="Signal to noise ratio for spin ID '%s'"%spin_id, prespace=1)
            write_data(out=sys.stdout, headings=["Spectrum ID", "Signal", "Noise", "S/N"], data=data_i)
Пример #28
0
def pack_data(ri_id,
              ri_type,
              frq,
              values,
              errors,
              spin_ids=None,
              mol_names=None,
              res_nums=None,
              res_names=None,
              spin_nums=None,
              spin_names=None,
              spin_id=None,
              gen_seq=False,
              verbose=True):
    """Pack the relaxation data into the data pipe and spin containers.

    The values, errors, and spin_ids arguments must be lists of equal length or None.  Each element i corresponds to a unique spin.

    @param ri_id:           The relaxation data ID string.
    @type ri_id:            str
    @param ri_type:         The relaxation data type, ie 'R1', 'R2', or 'NOE'.
    @type ri_type:          str
    @param frq:             The spectrometer proton frequency in Hz.
    @type frq:              float
    @keyword values:        The relaxation data for each spin.
    @type values:           None or list of float or float array
    @keyword errors:        The relaxation data errors for each spin.
    @type errors:           None or list of float or float array
    @keyword spin_ids:      The list of spin ID strings.  If the other spin identifiers are given, i.e. mol_names, res_nums, res_names, spin_nums, and/or spin_names, then this argument is not necessary.
    @type spin_ids:         None or list of str
    @keyword mol_names:     The list of molecule names used for creating the spin IDs (if not given) or for generating the sequence data.
    @type mol_names:        None or list of str
    @keyword res_nums:      The list of residue numbers used for creating the spin IDs (if not given) or for generating the sequence data.
    @type res_nums:         None or list of str
    @keyword res_names:     The list of residue names used for creating the spin IDs (if not given) or for generating the sequence data.
    @type res_names:        None or list of str
    @keyword spin_nums:     The list of spin numbers used for creating the spin IDs (if not given) or for generating the sequence data.
    @type spin_nums:        None or list of str
    @keyword spin_names:    The list of spin names used for creating the spin IDs (if not given) or for generating the sequence data.
    @type spin_names:       None or list of str
    @keyword gen_seq:       A flag which if True will cause the molecule, residue, and spin sequence data to be generated.
    @type gen_seq:          bool
    @keyword verbose:       A flag which if True will cause all relaxation data loaded to be printed out.
    @type verbose:          bool
    """

    # The number of spins.
    N = len(values)

    # Test the data.
    if errors != None and len(errors) != N:
        raise RelaxError(
            "The length of the errors arg (%s) does not match that of the value arg (%s)."
            % (len(errors), N))
    if spin_ids and len(spin_ids) != N:
        raise RelaxError(
            "The length of the spin ID strings arg (%s) does not match that of the value arg (%s)."
            % (len(mol_names), N))
    if mol_names and len(mol_names) != N:
        raise RelaxError(
            "The length of the molecule names arg (%s) does not match that of the value arg (%s)."
            % (len(mol_names), N))
    if res_nums and len(res_nums) != N:
        raise RelaxError(
            "The length of the residue numbers arg (%s) does not match that of the value arg (%s)."
            % (len(res_nums), N))
    if res_names and len(res_names) != N:
        raise RelaxError(
            "The length of the residue names arg (%s) does not match that of the value arg (%s)."
            % (len(res_names), N))
    if spin_nums and len(spin_nums) != N:
        raise RelaxError(
            "The length of the spin numbers arg (%s) does not match that of the value arg (%s)."
            % (len(spin_nums), N))
    if spin_names and len(spin_names) != N:
        raise RelaxError(
            "The length of the spin names arg (%s) does not match that of the value arg (%s)."
            % (len(spin_names), N))

    # Generate some empty lists.
    if not mol_names:
        mol_names = [None] * N
    if not res_nums:
        res_nums = [None] * N
    if not res_names:
        res_names = [None] * N
    if not spin_nums:
        spin_nums = [None] * N
    if not spin_names:
        spin_names = [None] * N
    if errors == None:
        errors = [None] * N

    # Generate the spin IDs.
    if not spin_ids:
        spin_ids = []
        for i in range(N):
            spin_ids.append(
                generate_spin_id_unique(spin_num=spin_nums[i],
                                        spin_name=spin_names[i],
                                        res_num=res_nums[i],
                                        res_name=res_names[i],
                                        mol_name=mol_names[i]))

    # Initialise the global data for the current pipe if necessary.
    if not hasattr(cdp, 'ri_type'):
        cdp.ri_type = {}
    if not hasattr(cdp, 'ri_ids'):
        cdp.ri_ids = []

    # Set the spectrometer frequency.
    set_frequency(id=ri_id, frq=frq)

    # Update the global data.
    cdp.ri_ids.append(ri_id)
    cdp.ri_type[ri_id] = ri_type

    # The selection object.
    select_obj = None
    if spin_id:
        select_obj = Selection(spin_id)

    # Loop over the spin data.
    data = []
    for i in range(N):
        # A selection union.
        select_id = spin_ids[i]
        if spin_id != None:
            select_id = "%s&%s" % (select_id, spin_id)

        # Get the corresponding spin container.
        match_mol_names, match_res_nums, match_res_names, spins = return_spin_from_selection(
            selection=select_id, full_info=True, multi=True)

        # No spin.
        if len(spins) == 0:
            continue

        # Check that multiple spins are not present.
        if len(spins) > 1:
            # Generate the list of spin IDs.
            new_ids = []
            for j in range(len(spins)):
                new_ids.append(
                    generate_spin_id_unique(mol_name=match_mol_names[j],
                                            res_num=match_res_nums[j],
                                            res_name=match_res_names[j],
                                            spin_num=spins[j].num,
                                            spin_name=spins[j].name))

            # Raise the error.
            raise RelaxMultiSpinIDError(spin_ids[i], new_ids)

        # Check that at least one spin is present.
        if len(spins) == 0:
            raise RelaxNoSpinError(spin_ids[i])

        # Loop over the spins.
        for j in range(len(spins)):
            # No match to the selection.
            if select_obj and not select_obj.contains_spin(
                    spin_num=spins[j].num,
                    spin_name=spins[j].name,
                    res_num=res_nums[j],
                    res_name=res_names[j],
                    mol=mol_names[j]):
                continue

            # Initialise the spin data if necessary.
            if not hasattr(spins[j], 'ri_data') or spins[j].ri_data == None:
                spins[j].ri_data = {}
            if not hasattr(spins[j],
                           'ri_data_err') or spins[j].ri_data_err == None:
                spins[j].ri_data_err = {}

            # Update all data structures.
            spins[j].ri_data[ri_id] = values[i]
            spins[j].ri_data_err[ri_id] = errors[i]

            # Append the data for printing out.
            data.append([spin_ids[i], repr(values[i]), repr(errors[i])])

    # Print out.
    if verbose:
        print(
            "\nThe following %s MHz %s relaxation data with the ID '%s' has been loaded into the relax data store:\n"
            % (frq / 1e6, ri_type, ri_id))
        write_data(out=sys.stdout,
                   headings=["Spin_ID", "Value", "Error"],
                   data=data)
Пример #29
0
def define(spin_id1=None, spin_id2=None, pipe=None, direct_bond=False, spin_selection=False, verbose=True):
    """Set up the magnetic dipole-dipole interaction.

    @keyword spin_id1:          The spin identifier string of the first spin of the pair.
    @type spin_id1:             str
    @keyword spin_id2:          The spin identifier string of the second spin of the pair.
    @type spin_id2:             str
    @param pipe:                The data pipe to operate on.  Defaults to the current data pipe.
    @type pipe:                 str
    @keyword direct_bond:       A flag specifying if the two spins are directly bonded.
    @type direct_bond:          bool
    @keyword spin_selection:    Define the interatomic data container selection based on the spin selection.  If either spin is deselected, the interatomic container will also be deselected.  Otherwise the container will be selected.
    @type spin_selection:       bool
    @keyword verbose:           A flag which if True will result in printouts of the created interatomoic data containers.
    @type verbose:              bool
    """

    # The data pipe.
    if pipe == None:
        pipe = pipes.cdp_name()

    # Get the data pipe.
    dp = pipes.get_pipe(pipe)

    # Initialise the spin ID pairs list.
    ids = []
    spin_selections = []

    # Use the structural data to find connected atoms.
    if hasattr(dp, 'structure'):
        # The selection objects.
        selection1 = cdp.structure.selection(atom_id=spin_id1)
        selection2 = cdp.structure.selection(atom_id=spin_id2)

        # Loop over the atoms of the first spin selection.
        for mol_name1, res_num1, res_name1, atom_num1, atom_name1, mol_index1, atom_index1 in dp.structure.atom_loop(selection=selection1, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True):
            # Generate the first spin ID.
            id1 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name1, res_num=res_num1, res_name=res_name1, spin_num=atom_num1, spin_name=atom_name1)

            # Do the spin exist?
            spin1 = return_spin(id1)
            if not spin1:
                continue

            # Loop over the atoms of the second spin selection.
            for mol_name2, res_num2, res_name2, atom_num2, atom_name2, mol_index2, atom_index2 in dp.structure.atom_loop(selection=selection2, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True):
                # Directly bonded atoms.
                if direct_bond:
                    # Different molecules.
                    if mol_name1 != mol_name2:
                        continue

                    # Skip non-bonded atom pairs.
                    if not dp.structure.are_bonded_index(mol_index1=mol_index1, atom_index1=atom_index1, mol_index2=mol_index2, atom_index2=atom_index2):
                        continue

                # Generate the second spin ID.
                id2 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name2, res_num=res_num2, res_name=res_name2, spin_num=atom_num2, spin_name=atom_name2)

                # Do the spin exist?
                spin2 = return_spin(id2)
                if not spin2:
                    continue

                # Store the IDs for the printout.
                ids.append([id1, id2])
                spin_selections.append([spin1.select, spin2.select])

    # No structural data present or the spin IDs are not in the structural data, so use spin loops and some basic rules.
    if ids == []:
        for spin1, mol_name1, res_num1, res_name1, id1 in spin_loop(spin_id1, pipe=pipe, full_info=True, return_id=True):
            for spin2, mol_name2, res_num2, res_name2, id2 in spin_loop(spin_id2, pipe=pipe, full_info=True, return_id=True):
                # Directly bonded atoms.
                if direct_bond:
                    # Different molecules.
                    if mol_name1 != mol_name2:
                        continue

                    # No element info.
                    if not hasattr(spin1, 'element'):
                        raise RelaxError("The spin '%s' does not have the element type set." % id1)
                    if not hasattr(spin2, 'element'):
                        raise RelaxError("The spin '%s' does not have the element type set." % id2)

                    # Backbone NH and CH pairs.
                    pair = False
                    if (spin1.element == 'N' and spin2.element == 'H') or (spin2.element == 'N' and spin1.element == 'H'):
                        pair = True
                    elif (spin1.element == 'C' and spin2.element == 'H') or (spin2.element == 'C' and spin1.element == 'H'):
                        pair = True

                    # Same residue, so skip.
                    if pair and res_num1 != None and res_num1 != res_num2:
                        continue
                    elif pair and res_num1 == None and res_name1 != res_name2:
                        continue

                # Store the IDs for the printout.
                ids.append([id1, id2])
                spin_selections.append([spin1.select, spin2.select])

    # No matches, so fail!
    if not len(ids):
        # Find the problem.
        count1 = 0
        count2 = 0
        for spin in spin_loop(spin_id1):
            count1 += 1
        for spin in spin_loop(spin_id2):
            count2 += 1

        # Report the problem.
        if count1 == 0 and count2 == 0:
            raise RelaxError("Neither spin IDs '%s' and '%s' match any spins." % (spin_id1, spin_id2))
        elif count1 == 0:
            raise RelaxError("The spin ID '%s' matches no spins." % spin_id1)
        elif count2 == 0:
            raise RelaxError("The spin ID '%s' matches no spins." % spin_id2)
        else:
            raise RelaxError("Unknown error.")

    # Define the interaction.
    for i in range(len(ids)):
        # Unpack.
        id1, id2 = ids[i]
        # Get the interatomic data object, if it exists.
        interatom = return_interatom(id1, id2, pipe=pipe)

        # Create the container if needed.
        if interatom == None:
            interatom = create_interatom(spin_id1=id1, spin_id2=id2, pipe=pipe)

        # Check that this has not already been set up.
        if interatom.dipole_pair:
            raise RelaxError("The magnetic dipole-dipole interaction already exists between the spins '%s' and '%s'." % (id1, id2))

        # Set a flag indicating that a dipole-dipole interaction is present.
        interatom.dipole_pair = True

        # Set the selection.
        if spin_selection:
            interatom.select = False
            if spin_selections[i][0] and spin_selections[i][1]:
                interatom.select = True

    # Printout.
    if verbose:
        # Conversion.
        for i in range(len(ids)):
            ids[i][0] = repr(ids[i][0])
            ids[i][1] = repr(ids[i][1])

        # The printout.
        print("Interatomic interactions are now defined for the following spins:\n")
        write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
Пример #30
0
def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):
    """Read the peak intensity data.

    @keyword file:          The name of the file(s) containing the peak intensities.
    @type file:             str or list of str
    @keyword dir:           The directory where the file is located.
    @type dir:              str
    @keyword spectrum_id:   The spectrum identification string.
    @type spectrum_id:      str or list of str
    @keyword dim:           The dimension of the peak list to associate the data with.
    @type dim:              int
    @keyword int_col:       The column containing the peak intensity data (used by the generic intensity file format).
    @type int_col:          int or list of int
    @keyword int_method:    The integration method, one of 'height', 'point sum' or 'other'.
    @type int_method:       str
    @keyword spin_id_col:   The column containing the spin ID strings (used by the generic intensity file format).  If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
    @type spin_id_col:      int or None
    @keyword mol_name_col:  The column containing the molecule name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type mol_name_col:     int or None
    @keyword res_name_col:  The column containing the residue name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_name_col:     int or None
    @keyword res_num_col:   The column containing the residue number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_num_col:      int or None
    @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_name_col:    int or None
    @keyword spin_num_col:  The column containing the spin number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_num_col:     int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    @keyword spin_id:       The spin ID string used to restrict data loading to a subset of all spins.  If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
    @type spin_id:          None or str
    @keyword ncproc:        The Bruker ncproc binary intensity scaling factor.
    @type ncproc:           int or None
    @keyword verbose:       A flag which if True will cause all relaxation data loaded to be printed out.
    @type verbose:          bool
    """

    # Data checks.
    check_pipe()
    check_mol_res_spin_data()

    # Check the file name.
    if file == None:
        raise RelaxError("The file name must be supplied.")

    # Test that the intensity measures are identical.
    if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
        raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method))

    # Multiple ID flags.
    flag_multi = False
    flag_multi_file = False
    flag_multi_col = False
    if isinstance(spectrum_id, list) or spectrum_id == 'auto':
        flag_multi = True
    if isinstance(file, list):
        flag_multi_file = True
    if isinstance(int_col, list) or spectrum_id == 'auto':
        flag_multi_col = True

    # List argument checks.
    if flag_multi:
        # Too many lists.
        if flag_multi_file and flag_multi_col:
            raise RelaxError("If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists.")

        # Not enough lists.
        if not flag_multi_file and not flag_multi_col:
            raise RelaxError("If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length.")

        # List lengths for multiple files.
        if flag_multi_file and len(spectrum_id) != len(file):
                raise RelaxError("The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id))

        # List lengths for multiple intensity columns.
        if flag_multi_col and spectrum_id != 'auto' and len(spectrum_id) != len(int_col):
            raise RelaxError("The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col))

    # More list argument checks (when only one spectrum ID is supplied).
    else:
        # Multiple files.
        if flag_multi_file:
            raise RelaxError("If multiple files are supplied, then multiple spectrum IDs must also be supplied.")

        # Multiple intensity columns.
        if flag_multi_col:
            raise RelaxError("If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied.")

    # Intensity column checks.
    if spectrum_id != 'auto' and not flag_multi and flag_multi_col:
        raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.")

    # Check the intensity measure.
    if not int_method in ['height', 'point sum', 'other']:
        raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method)

    # Set the peak intensity measure.
    cdp.int_method = int_method

    # Convert the file argument to a list if necessary.
    if not isinstance(file, list):
        file = [file]

    # Loop over all files.
    for file_index in range(len(file)):
        # Read the peak list data.
        peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)

        # Automatic spectrum IDs.
        if spectrum_id == 'auto':
            spectrum_id = peak_list[0].intensity_name

        # Loop over the assignments.
        data = []
        data_flag = False
        for assign in peak_list:
            # Generate the spin_id.
            spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1])

            # Convert the intensity data to a list if needed.
            intensity = assign.intensity
            if not isinstance(intensity, list):
                intensity = [intensity]

            # Loop over the intensity data.
            for int_index in range(len(intensity)):
                # Sanity check.
                if intensity[int_index] == 0.0:
                    warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id))

                # Get the spin container.
                spin = return_spin(spin_id)
                if not spin:
                    warn(RelaxNoSpinWarning(spin_id))
                    continue

                # Skip deselected spins.
                if not spin.select:
                    continue

                # Initialise.
                if not hasattr(spin, 'peak_intensity'):
                    spin.peak_intensity = {}

                # Intensity scaling.
                if ncproc != None:
                    intensity[int_index] = intensity[int_index] / float(2**ncproc)

                # Add the data.
                if flag_multi_file:
                    id = spectrum_id[file_index]
                elif flag_multi_col:
                    id = spectrum_id[int_index]
                else:
                    id = spectrum_id
                spin.peak_intensity[id] = intensity[int_index]

                # Switch the flag.
                data_flag = True

                # Append the data for printing out.
                data.append([spin_id, repr(intensity[int_index])])

        # Add the spectrum id (and ncproc) to the relax data store.
        spectrum_ids = spectrum_id
        if isinstance(spectrum_id, str):
            spectrum_ids = [spectrum_id]
        if ncproc != None and not hasattr(cdp, 'ncproc'):
            cdp.ncproc = {}
        for i in range(len(spectrum_ids)):
            add_spectrum_id(spectrum_ids[i])
            if ncproc != None:
                cdp.ncproc[spectrum_ids[i]] = ncproc

        # No data.
        if not data_flag:
            # Delete all the data.
            delete(spectrum_id)

            # Raise the error.
            raise RelaxError("No data could be loaded from the peak list")

        # Printout.
        if verbose:
            print("\nThe following intensities have been loaded into the relax data store:\n")
            write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data)
        print('')
Пример #31
0
def read(file=None, dir=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True):
    """Read the peak intensity data.

    @keyword file:          The name of the file containing the peak intensities.
    @type file:             str
    @keyword dir:           The directory where the file is located.
    @type dir:              str
    @keyword spin_id_col:   The column containing the spin ID strings (used by the generic intensity file format).  If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
    @type spin_id_col:      int or None
    @keyword mol_name_col:  The column containing the molecule name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type mol_name_col:     int or None
    @keyword res_name_col:  The column containing the residue name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_name_col:     int or None
    @keyword res_num_col:   The column containing the residue number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_num_col:      int or None
    @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_name_col:    int or None
    @keyword spin_num_col:  The column containing the spin number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_num_col:     int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    @keyword spin_id:       The spin ID string used to restrict data loading to a subset of all spins.  If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
    @type spin_id:          None or str
    @keyword verbose:       A flag which if True will cause all chemical shift data loaded to be printed out.
    @type verbose:          bool
    """

    # Test if the current data pipe exists.
    check_pipe()

    # Test if sequence data is loaded.
    if not exists_mol_res_spin_data():
        raise RelaxNoSequenceError

    # Check the file name.
    if file == None:
        raise RelaxError("The file name must be supplied.")

    # Read the peak list data.
    peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)

    # Loop over the assignments.
    data = []
    data_flag = False
    for assign in peak_list:
        # Loop over the dimensions of the peak list.
        for i in range(peak_list.dimensionality):
            # Generate the spin_id.
            spin_id = generate_spin_id_unique(res_num=assign.res_nums[i], spin_name=assign.spin_names[i])

            # Get the spin container.
            spin = return_spin(spin_id=spin_id)
            if not spin:
                warn(RelaxNoSpinWarning(spin_id))
                continue

            # Skip deselected spins.
            if not spin.select:
                continue

            # Store the shift.
            spin.chemical_shift = assign.shifts[i]

            # Switch the flag.
            data_flag = True

            # Append the data for printing out.
            data.append([spin_id, repr(spin.chemical_shift)])

    # No data.
    if not data_flag:
        raise RelaxError("No chemical shifts could be loaded from the peak list")

    # Print out.
    if verbose:
        print("\nThe following chemical shifts have been loaded into the relax data store:\n")
        write_data(out=sys.stdout, headings=["Spin_ID", "Chemical shift"], data=data)
Пример #32
0
def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None, float_format="%20.15g"):
    """Generator function for reading the spin specific data from file.

    Description
    ===========

    This function writes a columnar formatted file where each line corresponds to a spin system.  Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.


    @param file:            The name of the file to write the data to (or alternatively an already opened file object).
    @type file:             str or file object
    @keyword dir:           The directory to place the file into (defaults to the current directory if None and the file argument is not a file object).
    @type dir:              str or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    @keyword spin_ids:      The list of spin ID strings.
    @type spin_ids:         None or list of str
    @keyword mol_names:     The list of molecule names.
    @type mol_names:        None or list of str
    @keyword res_nums:      The list of residue numbers.
    @type res_nums:         None or list of int
    @keyword res_names:     The list of residue names.
    @type res_names:        None or list of str
    @keyword spin_nums:     The list of spin numbers.
    @type spin_nums:        None or list of int
    @keyword spin_names:    The list of spin names.
    @type spin_names:       None or list of str
    @keyword force:         A flag which if True will cause an existing file to be overwritten.
    @type force:            bool
    @keyword data:          A list of the data to write out.  The first dimension corresponds to the spins.  A second dimension can also be given if multiple data sets across multiple columns are desired.
    @type data:             list or list of lists
    @keyword data_name:     A name corresponding to the data argument.  If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg.
    @type data_name:        str or list of str
    @keyword error:         A list of the errors to write out.  The first dimension corresponds to the spins.  A second dimension can also be given if multiple data sets across multiple columns are desired.  These will be inter-dispersed between the data columns, if the data is given.  If the data arg is not None, then this must have the same dimensions as that object.
    @type error:            list or list of lists
    @keyword error_name:    A name corresponding to the error argument.  If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg.
    @type error_name:       str or list of str
    @keyword float_format:  A float formatting string to use for the data and error whenever a float is found.
    @type float_format:     str
    """

    # Data argument tests.
    if data:
        # Data is a list of lists.
        if isinstance(data[0], list):
            # Data and data_name don't match.
            if not isinstance(data_name, list):
                raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name)

            # Error doesn't match.
            if error and (len(data) != len(error) or len(data[0]) != len(error[0])):
                raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))

        # Data is a simple list.
        else:
            # Data and data_name don't match.
            if not isinstance(data_name, str):
                raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name)

            # Error doesn't match.
            if error and len(data) != len(error):
                raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))

    # Error argument tests.
    if error:
        # Error is a list of lists.
        if isinstance(error[0], list):
            # Error and error_name don't match.
            if not isinstance(error_name, list):
                raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name)

        # Error is a simple list.
        else:
            # Error and error_name don't match.
            if not isinstance(error_name, str):
                raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name)

    # Number of spins check.
    args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
    arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names']
    N = None
    first_arg = None
    first_arg_name = None
    for i in range(len(args)):
        if isinstance(args[i], list):
            # First list match.
            if N == None:
                N = len(args[i])
                first_arg = args[i]
                first_arg_name = arg_names[i]

            # Length check.
            if len(args[i]) != N:
                raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i])))

    # Nothing?!?
    if N == None:
        raise RelaxError("No spin ID data is present.")

    # Data and error length check.
    if data and len(data) != N:
        raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data)))
    if error and len(error) != N:
        raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error)))

    # The spin arguments.
    args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
    arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name']


    # Init.
    headings = []
    file_data = []

    # Headers - the spin ID info.
    for i in range(len(args)):
        if args[i]:
            headings.append(arg_names[i])

    # Headers - the data.
    if data:
        # List of lists.
        if isinstance(data[0], list):
            # Loop over the list.
            for i in range(len(data[0])):
                # The data.
                headings.append(data_name[i])

                # The error.
                if error:
                    headings.append(error_name[i])

        # Simple list.
        else:
            # The data.
            headings.append(data_name)

            # The error.
            if error:
                headings.append(error_name)

    # Headers - only errors.
    elif error:
        # List of lists.
        if isinstance(error[0], list):
            for i in range(len(error[0])):
                headings.append(error_name[i])

        # Simple list.
        else:
            headings.append(error_name)

    # No headings.
    if headings == []:
        headings = None

    # Spin specific data.
    for spin_index in range(N):
        # Append a new data row.
        file_data.append([])

        # The spin ID info.
        for i in range(len(args)):
            if args[i]:
                value = args[i][spin_index]
                if not isinstance(value, str):
                    value = repr(value)
                file_data[-1].append(value)

        # The data.
        if data:
            # List of lists.
            if isinstance(data[0], list):
                # Loop over the list.
                for i in range(len(data[0])):
                    # The data.
                    if is_float(data[spin_index][i]):
                        file_data[-1].append(float_format % data[spin_index][i])
                    else:
                        file_data[-1].append(repr(data[spin_index][i]))

                    # The error.
                    if error:
                        if is_float(error[spin_index][i]):
                            file_data[-1].append(float_format % error[spin_index][i])
                        else:
                            file_data[-1].append(repr(error[spin_index][i]))

            # Simple list.
            else:
                # The data.
                if is_float(data[spin_index]):
                    file_data[-1].append(float_format % data[spin_index])
                else:
                    file_data[-1].append(repr(data[spin_index]))

                # The error.
                if error:
                    if is_float(error[spin_index]):
                        file_data[-1].append(float_format % error[spin_index])
                    else:
                        file_data[-1].append(repr(error[spin_index]))

        # Only errors.
        elif error:
            # List of lists.
            if isinstance(error[0], list):
                for i in range(len(error[0])):
                    file_data[-1].append(repr(error[spin_index][i]))

            # Simple list.
            else:
                file_data[-1].append(repr(error[spin_index]))

    # No data to write, so do nothing!
    if file_data == [] or file_data == [[]]:
        return

    # Open the file for writing.
    file = open_write_file(file_name=file, dir=dir, force=force)

    # Write out the file data.
    write_data(out=file, headings=headings, data=file_data, sep=sep)
Пример #33
0
def copy(pipe_from=None, pipe_to=None, align_id=None, back_calc=True):
    """Copy the PCS data from one data pipe to another.

    @keyword pipe_from: The data pipe to copy the PCS data from.  This defaults to the current data pipe.
    @type pipe_from:    str
    @keyword pipe_to:   The data pipe to copy the PCS data to.  This defaults to the current data pipe.
    @type pipe_to:      str
    @keyword align_id:  The alignment ID string.
    @type align_id:     str
    @keyword back_calc: A flag which if True will cause any back-calculated RDCs present to also be copied with the real values and errors.
    @type back_calc:    bool
    """

    # Defaults.
    if pipe_from == None and pipe_to == None:
        raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.")
    elif pipe_from == None:
        pipe_from = pipes.cdp_name()
    elif pipe_to == None:
        pipe_to = pipes.cdp_name()

    # Check the pipe setup.
    check_pipe_setup(pipe=pipe_from, pcs_id=align_id, sequence=True, pcs=True)
    check_pipe_setup(pipe=pipe_to, sequence=True)

    # Get the data pipes.
    dp_from = pipes.get_pipe(pipe_from)
    dp_to = pipes.get_pipe(pipe_to)

    # The IDs.
    if align_id == None:
        align_ids = dp_from.align_ids
    else:
        align_ids = [align_id]

    # Init target pipe global structures.
    if not hasattr(dp_to, 'align_ids'):
        dp_to.align_ids = []
    if not hasattr(dp_to, 'pcs_ids'):
        dp_to.pcs_ids = []

    # Loop over the align IDs.
    for align_id in align_ids:
        # Printout.
        print("\nCoping PCSs for the alignment ID '%s'." % align_id)

        # Copy the global data.
        if align_id not in dp_to.align_ids and align_id not in dp_to.align_ids:
            dp_to.align_ids.append(align_id)
        if align_id in dp_from.pcs_ids and align_id not in dp_to.pcs_ids:
            dp_to.pcs_ids.append(align_id)

        # Spin loop.
        data = []
        for spin_from, spin_id in spin_loop(return_id=True, skip_desel=True, pipe=pipe_from):
            # Find the matching spin container in the target data pipe.
            spin_to = return_spin(spin_id, pipe=pipe_to)

            # No matching spin container.
            if spin_to == None:
                warn(RelaxWarning("The spin container for the spin '%s' cannot be found in the target data pipe." % spin_id))
                continue

            # No data or errors.
            if (not hasattr(spin_from, 'pcs') or not align_id in spin_from.pcs) and (not hasattr(spin_from, 'pcs_err') or not align_id in spin_from.pcs_err):
                continue

            # Initialise the spin data if necessary.
            if hasattr(spin_from, 'pcs') and not hasattr(spin_to, 'pcs'):
                spin_to.pcs = {}
            if back_calc and hasattr(spin_from, 'pcs_bc') and not hasattr(spin_to, 'pcs_bc'):
                spin_to.pcs_bc = {}
            if hasattr(spin_from, 'pcs_err') and not hasattr(spin_to, 'pcs_err'):
                spin_to.pcs_err = {}

            # Copy the value and error from pipe_from.
            value = None
            error = None
            value_bc = None
            if hasattr(spin_from, 'pcs'):
                value = spin_from.pcs[align_id]
                spin_to.pcs[align_id] = value
            if back_calc and hasattr(spin_from, 'pcs_bc'):
                value_bc = spin_from.pcs_bc[align_id]
                spin_to.pcs_bc[align_id] = value_bc
            if hasattr(spin_from, 'pcs_err'):
                error = spin_from.pcs_err[align_id]
                spin_to.pcs_err[align_id] = error

            # Append the data for printout.
            data.append([spin_id])
            if is_float(value):
                data[-1].append("%20.15f" % value)
            else:
                data[-1].append("%20s" % value)
            if back_calc:
                if is_float(value_bc):
                    data[-1].append("%20.15f" % value_bc)
                else:
                    data[-1].append("%20s" % value_bc)
            if is_float(error):
                data[-1].append("%20.15f" % error)
            else:
                data[-1].append("%20s" % error)

        # Printout.
        print("The following PCSs have been copied:\n")
        if back_calc:
            write_data(out=sys.stdout, headings=["Spin_ID", "Value", "Back-calculated", "Error"], data=data)
        else:
            write_data(out=sys.stdout, headings=["Spin_ID", "Value", "Error"], data=data)
Пример #34
0
def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None, float_format="%20.15g"):
    """Generator function for reading the spin specific data from file.

    Description
    ===========

    This function writes a columnar formatted file where each line corresponds to a spin system.  Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.


    @param file:            The name of the file to write the data to (or alternatively an already opened file object).
    @type file:             str or file object
    @keyword dir:           The directory to place the file into (defaults to the current directory if None and the file argument is not a file object).
    @type dir:              str or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    @keyword spin_ids:      The list of spin ID strings.
    @type spin_ids:         None or list of str
    @keyword mol_names:     The list of molecule names.
    @type mol_names:        None or list of str
    @keyword res_nums:      The list of residue numbers.
    @type res_nums:         None or list of int
    @keyword res_names:     The list of residue names.
    @type res_names:        None or list of str
    @keyword spin_nums:     The list of spin numbers.
    @type spin_nums:        None or list of int
    @keyword spin_names:    The list of spin names.
    @type spin_names:       None or list of str
    @keyword force:         A flag which if True will cause an existing file to be overwritten.
    @type force:            bool
    @keyword data:          A list of the data to write out.  The first dimension corresponds to the spins.  A second dimension can also be given if multiple data sets across multiple columns are desired.
    @type data:             list or list of lists
    @keyword data_name:     A name corresponding to the data argument.  If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg.
    @type data_name:        str or list of str
    @keyword error:         A list of the errors to write out.  The first dimension corresponds to the spins.  A second dimension can also be given if multiple data sets across multiple columns are desired.  These will be inter-dispersed between the data columns, if the data is given.  If the data arg is not None, then this must have the same dimensions as that object.
    @type error:            list or list of lists
    @keyword error_name:    A name corresponding to the error argument.  If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg.
    @type error_name:       str or list of str
    @keyword float_format:  A float formatting string to use for the data and error whenever a float is found.
    @type float_format:     str
    """

    # Data argument tests.
    if data:
        # Data is a list of lists.
        if isinstance(data[0], list):
            # Data and data_name don't match.
            if not isinstance(data_name, list):
                raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name)

            # Error doesn't match.
            if error and (len(data) != len(error) or len(data[0]) != len(error[0])):
                raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))

        # Data is a simple list.
        else:
            # Data and data_name don't match.
            if not isinstance(data_name, str):
                raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name)

            # Error doesn't match.
            if error and len(data) != len(error):
                raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))

    # Error argument tests.
    if error:
        # Error is a list of lists.
        if isinstance(error[0], list):
            # Error and error_name don't match.
            if not isinstance(error_name, list):
                raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name)

        # Error is a simple list.
        else:
            # Error and error_name don't match.
            if not isinstance(error_name, str):
                raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name)

    # Number of spins check.
    args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
    arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names']
    N = None
    first_arg = None
    first_arg_name = None
    for i in range(len(args)):
        if isinstance(args[i], list):
            # First list match.
            if N == None:
                N = len(args[i])
                first_arg = args[i]
                first_arg_name = arg_names[i]

            # Length check.
            if len(args[i]) != N:
                raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i])))

    # Nothing?!?
    if N == None:
        raise RelaxError("No spin ID data is present.")

    # Data and error length check.
    if data and len(data) != N:
        raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data)))
    if error and len(error) != N:
        raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error)))

    # The spin arguments.
    args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
    arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name']


    # Init.
    headings = []
    file_data = []

    # Headers - the spin ID info.
    for i in range(len(args)):
        if args[i]:
            headings.append(arg_names[i])

    # Headers - the data.
    if data:
        # List of lists.
        if isinstance(data[0], list):
            # Loop over the list.
            for i in range(len(data[0])):
                # The data.
                headings.append(data_name[i])

                # The error.
                if error:
                    headings.append(error_name[i])

        # Simple list.
        else:
            # The data.
            headings.append(data_name)

            # The error.
            if error:
                headings.append(error_name)

    # Headers - only errors.
    elif error:
        # List of lists.
        if isinstance(error[0], list):
            for i in range(len(error[0])):
                headings.append(error_name[i])

        # Simple list.
        else:
            headings.append(error_name)

    # No headings.
    if headings == []:
        headings = None

    # Spin specific data.
    for spin_index in range(N):
        # Append a new data row.
        file_data.append([])

        # The spin ID info.
        for i in range(len(args)):
            if args[i]:
                value = args[i][spin_index]
                if not isinstance(value, str):
                    value = repr(value)
                file_data[-1].append(value)

        # The data.
        if data:
            # List of lists.
            if isinstance(data[0], list):
                # Loop over the list.
                for i in range(len(data[0])):
                    # The data.
                    if is_float(data[spin_index][i]):
                        file_data[-1].append(float_format % data[spin_index][i])
                    else:
                        file_data[-1].append(repr(data[spin_index][i]))

                    # The error.
                    if error:
                        if is_float(error[spin_index][i]):
                            file_data[-1].append(float_format % error[spin_index][i])
                        else:
                            file_data[-1].append(repr(error[spin_index][i]))

            # Simple list.
            else:
                # The data.
                if is_float(data[spin_index]):
                    file_data[-1].append(float_format % data[spin_index])
                else:
                    file_data[-1].append(repr(data[spin_index]))

                # The error.
                if error:
                    if is_float(error[spin_index]):
                        file_data[-1].append(float_format % error[spin_index])
                    else:
                        file_data[-1].append(repr(error[spin_index]))

        # Only errors.
        elif error:
            # List of lists.
            if isinstance(error[0], list):
                for i in range(len(error[0])):
                    file_data[-1].append(repr(error[spin_index][i]))

            # Simple list.
            else:
                file_data[-1].append(repr(error[spin_index]))

    # No data to write, so do nothing!
    if file_data == [] or file_data == [[]]:
        return

    # Open the file for writing.
    file = open_write_file(file_name=file, dir=dir, force=force)

    # Write out the file data.
    write_data(out=file, headings=headings, data=file_data, sep=sep)
        # Save all values of chi2. To help find reasonale level for the Innermost, Inner, Middle and Outer Isosurface.
        all_chi.append(chi2)

        # Increment the value of the second parameter.
        values[1] = values[1] + step_size[1]

        counter += 1

    # Increment the value of the first parameter.
    values[0] = values[0] + step_size[0]

print("\nMin cluster point %s=%3.3f, %s=%3.3f, with chi2=%3.3f" % (params[0], pcm[0], params[1], pcm[1], pre_chi2))

# Open file
file_name = '3_simulate_graphs_S65_dw_r2a_FT128.txt'
surface_file = open_write_file(file_name=file_name, dir=None, force=True)
write_data(out=surface_file, headings=headings, data=data)

# Close file
surface_file.close()

# Check spins.
display_spin()

# Now de-select spins from cluster.
for spin_id in cur_spin_ids:
    deselect.spin(spin_id=spin_id)

relax_disp.plot_disp_curves(dir='grace', y_axis='r2_eff', x_axis='disp', num_points=1000, extend_hz=500.0, extend_ppm=500.0, interpolate='disp', force=True)
Пример #36
0
def read(align_id=None, file=None, dir=None, file_data=None, data_type='D', spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sep=None, neg_g_corr=False, absolute=False):
    """Read the RDC data from file.

    @keyword align_id:      The alignment tensor ID string.
    @type align_id:         str
    @keyword file:          The name of the file to open.
    @type file:             str
    @keyword dir:           The directory containing the file (defaults to the current directory if None).
    @type dir:              str or None
    @keyword file_data:     An alternative to opening a file, if the data already exists in the correct format.  The format is a list of lists where the first index corresponds to the row and the second the column.
    @type file_data:        list of lists
    @keyword data_type:     A string which is set to 'D' means that the splitting in the aligned sample was assumed to be J + D, or if set to '2D' then the splitting was taken as J + 2D.  If set to 'T', then the data will be marked as being J+D values.
    @keyword spin_id1_col:  The column containing the spin ID strings of the first spin.
    @type spin_id1_col:     int
    @keyword spin_id2_col:  The column containing the spin ID strings of the second spin.
    @type spin_id2_col:     int
    @keyword data_col:      The column containing the RDC data in Hz.
    @type data_col:         int or None
    @keyword error_col:     The column containing the RDC errors.
    @type error_col:        int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    @keyword neg_g_corr:    A flag which is used to correct for the negative gyromagnetic ratio of 15N.  If True, a sign inversion will be applied to all RDC values to be loaded.
    @type neg_g_corr:       bool
    @keyword absolute:      A flag which if True indicates that the RDCs to load are signless.  All RDCs will then be converted to positive values.
    @type absolute:         bool
    """

    # Check the pipe setup.
    check_pipe_setup(sequence=True)

    # Either the data or error column must be supplied.
    if data_col == None and error_col == None:
        raise RelaxError("One of either the data or error column must be supplied.")

    # Check the data types.
    rdc_types = ['D', '2D', 'T']
    if data_type not in rdc_types:
        raise RelaxError("The RDC data type '%s' must be one of %s." % (data_type, rdc_types))

    # Spin specific data.
    #####################

    # Extract the data from the file, and remove comments and blank lines.
    file_data = extract_data(file, dir, sep=sep)
    file_data = strip(file_data, comments=True)

    # Loop over the RDC data.
    data = []
    for line in file_data:
        # Invalid columns.
        if spin_id1_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line))
            continue
        if spin_id2_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line))
            continue
        if data_col and data_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no data column can be found." % line))
            continue
        if error_col and error_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no error column can be found." % line))
            continue

        # Unpack.
        spin_id1 = line[spin_id1_col-1]
        spin_id2 = line[spin_id2_col-1]
        value = None
        if data_col:
            value = line[data_col-1]
        error = None
        if error_col:
            error = line[error_col-1]

        # Convert the spin IDs.
        if spin_id1[0] in ["\"", "\'"]:
            spin_id1 = eval(spin_id1)
        if spin_id2[0] in ["\"", "\'"]:
            spin_id2 = eval(spin_id2)

        # Convert and check the value.
        if value == 'None':
            value = None
        if value != None:
            try:
                value = float(value)
            except ValueError:
                warn(RelaxWarning("The RDC value of the line %s is invalid." % line))
                continue

        # Convert and check the error.
        if error == 'None':
            error = None
        if error != None:
            try:
                error = float(error)
            except ValueError:
                warn(RelaxWarning("The error value of the line %s is invalid." % line))
                continue

        # Get the spins.
        spin1 = return_spin(spin_id1)
        spin2 = return_spin(spin_id2)

        # Check the spin IDs.
        if not spin1:
            warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line)))
            continue
        if not spin2:
            warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line)))
            continue

        # Get the interatomic data container.
        interatom = return_interatom(spin_id1, spin_id2)

        # Create the container if needed.
        if interatom == None:
            interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2)

        # Test the error value (a value of 0.0 will cause the interatomic container to be deselected).
        if error == 0.0:
            interatom.select = False
            warn(RelaxWarning("An error value of zero has been encountered, deselecting the interatomic container between spin '%s' and '%s'." % (spin_id1, spin_id2)))
            continue

        # Store the data type as global data (need for the conversion of RDC data).
        if not hasattr(interatom, 'rdc_data_types'):
            interatom.rdc_data_types = {}
        if not align_id in interatom.rdc_data_types:
            interatom.rdc_data_types[align_id] = data_type

        # Convert and add the data.
        if data_col:
            # Data conversion.
            value = convert(value, data_type, align_id, to_intern=True)

            # Correction for the negative gyromagnetic ratio of 15N.
            if neg_g_corr and value != None:
                value = -value

            # Absolute values.
            if absolute:
                # Force the value to be positive.
                value = abs(value)

            # Initialise.
            if not hasattr(interatom, 'rdc'):
                interatom.rdc = {}

            # Add the value.
            interatom.rdc[align_id] = value

            # Store the absolute value flag.
            if not hasattr(interatom, 'absolute_rdc'):
                interatom.absolute_rdc = {}
            interatom.absolute_rdc[align_id] = absolute

        # Convert and add the error.
        if error_col:
            # Data conversion.
            error = convert(error, data_type, align_id, to_intern=True)

            # Initialise.
            if not hasattr(interatom, 'rdc_err'):
                interatom.rdc_err = {}

            # Append the error.
            interatom.rdc_err[align_id] = error

        # Append the data for printout.
        data.append([spin_id1, spin_id2])
        if is_float(value):
            data[-1].append("%20.15f" % value)
        else:
            data[-1].append("%20s" % value)
        if is_float(error):
            data[-1].append("%20.15f" % error)
        else:
            data[-1].append("%20s" % error)

    # No data, so fail hard!
    if not len(data):
        raise RelaxError("No RDC data could be extracted.")

    # Print out.
    print("The following RDCs have been loaded into the relax data store:\n")
    write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data)

    # Initialise some global structures.
    if not hasattr(cdp, 'align_ids'):
        cdp.align_ids = []
    if not hasattr(cdp, 'rdc_ids'):
        cdp.rdc_ids = []

    # Add the RDC id string.
    if align_id not in cdp.align_ids:
        cdp.align_ids.append(align_id)
    if align_id not in cdp.rdc_ids:
        cdp.rdc_ids.append(align_id)
Пример #37
0
def grid_setup(lower=None,
               upper=None,
               inc=None,
               verbosity=1,
               skip_preset=True):
    """Determine the per-model grid bounds, allowing for the zooming grid search.

    @keyword lower:         The user supplied lower bounds of the grid search which must be equal to the number of parameters in the model.
    @type lower:            list of numbers
    @keyword upper:         The user supplied upper bounds of the grid search which must be equal to the number of parameters in the model.
    @type upper:            list of numbers
    @keyword inc:           The user supplied grid search increments.
    @type inc:              int or list of int
    @keyword verbosity:     The amount of information to print.  The higher the value, the greater the verbosity.
    @type verbosity:        int
    @keyword skip_preset:   This argument, when True, allows any parameter which already has a value set to be skipped in the grid search.
    @type skip_preset:      bool
    @return:                The per-model grid upper and lower bounds.  The first dimension of each structure corresponds to the model, the second the model parameters.
    @rtype:                 tuple of lists of lists of float, lists of lists of float, list of lists of int
    """

    # The specific analysis API object and parameter object.
    api = return_api()
    param_object = return_parameter_object()

    # Initialise.
    model_lower = []
    model_upper = []
    model_inc = []

    # Loop over the models.
    for model_info in api.model_loop():
        # Get the parameter names and current values.
        names = api.get_param_names(model_info)
        values = api.get_param_values(model_info)

        # No parameters for this model.
        if names == None or len(names) == 0:
            model_lower.append([])
            model_upper.append([])
            model_inc.append([])
            continue

        # The parameter number.
        n = len(names)

        # Make sure that the length of the parameter array is > 0.
        if n == 0:
            raise RelaxError(
                "Cannot run a grid search on a model with zero parameters.")

        # Check that the user supplied bound lengths are ok.
        if lower != None and len(lower) != n:
            raise RelaxLenError('lower bounds', n)
        if upper != None and len(upper) != n:
            raise RelaxLenError('upper bounds', n)

        # Check the user supplied increments.
        if isinstance(inc, list) and len(inc) != n:
            raise RelaxLenError('increment', n)
        if isinstance(inc, list):
            for i in range(n):
                if not (isinstance(inc[i], int) or inc[i] == None):
                    raise RelaxIntListIntError('increment', inc)
        elif not isinstance(inc, int):
            raise RelaxIntListIntError('increment', inc)

        # Convert to the model increment list.
        if isinstance(inc, int):
            model_inc.append([inc] * n)
        else:
            model_inc.append(inc)

        # Print out the model title.
        api.print_model_title(prefix="Grid search setup:  ",
                              model_info=model_info)

        # The grid zoom level.
        zoom = 0
        if hasattr(cdp, 'grid_zoom_level'):
            zoom = cdp.grid_zoom_level
        zoom_factor = 1.0 / 2.0**zoom
        if zoom > 0:
            print(
                "Zooming grid level of %s, scaling the grid size by a factor of %s.\n"
                % (zoom, zoom_factor))

        # Append empty lists for the bounds to be built up.
        model_lower.append([])
        model_upper.append([])

        # Loop over the parameters.
        data = []
        for i in range(n):
            # A comment for user feedback.
            comment = 'Default bounds'
            if lower != None and upper != None:
                comment = 'User supplied lower and upper bound'
            elif lower != None:
                comment = 'User supplied lower bound'
            elif upper != None:
                comment = 'User supplied upper bound'

            # Alias the number of increments for this parameter.
            incs = model_inc[-1][i]

            # Error checking for increment values of None.
            if incs == None and values[i] in [None, {}, []]:
                raise RelaxError(
                    "The parameter '%s' has no preset value, therefore a grid increment of None is not valid."
                    % names[i])

            # The lower bound for this parameter.
            if lower != None:
                lower_i = lower[i]
            else:
                lower_i = param_object.grid_lower(names[i],
                                                  incs=incs,
                                                  model_info=model_info)

            # The upper bound for this parameter.
            if upper != None:
                upper_i = upper[i]
            else:
                upper_i = param_object.grid_upper(names[i],
                                                  incs=incs,
                                                  model_info=model_info)

            # The skipping logic.
            skip = False
            if skip_preset:
                # Override the flag if the zoom is on.
                if zoom:
                    skip = False

                # No preset value.
                elif values[i] in [None, {}, []]:
                    skip = False

                # The preset value is a NaN value due to numpy conversions of None.
                elif isNaN(values[i]):
                    skip = False

                # Ok, now the parameter can be skipped.
                else:
                    skip = True

            # Override the skip flag if the incs value is None.
            if incs == None:
                skip = True

            # Skip preset values.
            if skip:
                lower_i = values[i]
                upper_i = values[i]
                model_inc[-1][i] = incs = 1
                comment = 'Preset value'

            # Zooming grid.
            elif zoom:
                # The full size and scaled size.
                size = upper_i - lower_i
                zoom_size = size * zoom_factor
                half_size = zoom_size / 2.0
                comment = 'Zoom grid width of %s %s' % (
                    zoom_size, param_object.units(names[i]))

                # The new size around the current value.
                lower_zoom = values[i] - half_size
                upper_zoom = values[i] + half_size

                # Outside of the original lower bound, so shift the grid to fit.
                if zoom > 0 and lower_zoom < lower_i:
                    # The amount to shift by.
                    shift = lower_i - lower_zoom

                    # Set the new bounds.
                    upper_i = upper_zoom + shift

                # Outside of the original upper bound, so shift the grid to fit.
                elif zoom > 0 and upper_zoom > upper_i:
                    # The amount to shift by.
                    shift = upper_i - upper_zoom

                    # Set the new bounds.
                    lower_i = lower_zoom + shift

                # Inside the original bounds.
                else:
                    lower_i = lower_zoom
                    upper_i = upper_zoom

            # Add to the data list for printing out.
            data.append([
                names[i],
                "%15s" % lower_i,
                "%15s" % upper_i,
                "%15s" % incs, comment
            ])

            # Scale the bounds.
            scaling = param_object.scaling(names[i], model_info=model_info)
            lower_i /= scaling
            upper_i /= scaling

            # Append.
            model_lower[-1].append(lower_i)
            model_upper[-1].append(upper_i)

        # Printout.
        if verbosity:
            write_data(out=sys.stdout,
                       headings=[
                           "Parameter", "Lower bound", "Upper bound",
                           "Increments", "Comment"
                       ],
                       data=data)
            sys.stdout.write('\n')

    # Return the bounds.
    return model_lower, model_upper, model_inc
Пример #38
0
def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
    """Model selection function.

    @keyword method:        The model selection method.  This can currently be one of:
                                - 'AIC', Akaike's Information Criteria.
                                - 'AICc', Small sample size corrected AIC.
                                - 'BIC', Bayesian or Schwarz Information Criteria.
                                - 'CV', Single-item-out cross-validation.
                            None of the other model selection techniques are currently supported.
    @type method:           str
    @keyword modsel_pipe:   The name of the new data pipe to be created by copying of the selected data pipe.
    @type modsel_pipe:      str
    @keyword bundle:        The optional data pipe bundle to associate the newly created pipe with.
    @type bundle:           str or None
    @keyword pipes:         A list of the data pipes to use in the model selection.
    @type pipes:            list of str
    """

    # Test if the pipe already exists.
    if has_pipe(modsel_pipe):
        raise RelaxPipeError(modsel_pipe)

    # Use all pipes.
    if pipes == None:
        # Get all data pipe names from the relax data store.
        pipes = pipe_names()

    # Select the model selection technique.
    if method == 'AIC':
        print("AIC model selection.")
        formula = aic
    elif method == 'AICc':
        print("AICc model selection.")
        formula = aicc
    elif method == 'BIC':
        print("BIC model selection.")
        formula = bic
    elif method == 'CV':
        print("CV model selection.")
        raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")
    else:
        raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")

    # No pipes.
    if len(pipes) == 0:
        raise RelaxError("No data pipes are available for use in model selection.")

    # Initialise.
    function_type = {}
    model_loop = {}
    model_type = {}
    duplicate_data = {}
    model_statistics = {}
    skip_function = {}
    modsel_pipe_exists = False

    # Cross validation setup.
    if isinstance(pipes[0], list):
        # No pipes.
        if len(pipes[0]) == 0:
            raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".")

        # Loop over the data pipes.
        for i in range(len(pipes)):
            for j in range(len(pipes[i])):
                # The specific analysis API object.
                api = return_api(pipe_name=pipes[i][j])

                # Store the specific functions.
                model_loop[pipes[i][j]] = api.model_loop
                model_type[pipes[i][j]] = api.model_type
                duplicate_data[pipes[i][j]] = api.duplicate_data
                model_statistics[pipes[i][j]] = api.model_statistics
                skip_function[pipes[i][j]] = api.skip_function

        # The model loop should be the same for all data pipes!
        for i in range(len(pipes)):
            for j in range(len(pipes[i])):
                if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]:
                    raise RelaxError("The models for each data pipes should be the same.")

        # Alias some function from the specific API of the first data pipe.
        api = return_api(pipe_name=pipes[0][0])
        model_loop = api.model_loop
        model_desc = api.model_desc

        # Global vs. local models.
        global_flag = False
        for i in range(len(pipes)):
            for j in range(len(pipes[i])):
                if model_type[pipes[i][j]]() == 'global':
                    global_flag = True

    # All other model selection setup.
    else:
        # Loop over the data pipes.
        for i in range(len(pipes)):
            # The specific analysis API object.
            api = return_api()

            # Store the specific functions.
            model_loop[pipes[i]] = api.model_loop
            model_type[pipes[i]] = api.model_type
            duplicate_data[pipes[i]] = api.duplicate_data
            model_statistics[pipes[i]] = api.model_statistics
            skip_function[pipes[i]] = api.skip_function

        # Alias some function from the specific API of the first data pipe.
        api = return_api(pipe_name=pipes[0])
        model_loop = api.model_loop
        model_desc = api.model_desc

        # Global vs. local models.
        global_flag = False
        for j in range(len(pipes)):
            if model_type[pipes[j]]() == 'global':
                global_flag = True


    # Loop over the base models.
    for model_info in model_loop():
        # Print out.
        print("\n")
        desc = model_desc(model_info)
        if desc:
            print(desc)

        # Initial model.
        best_model = None
        best_crit = 1e300
        data = []

        # Loop over the pipes.
        for j in range(len(pipes)):
            # Single-item-out cross validation.
            if method == 'CV':
                # Sum of chi-squared values.
                sum_crit = 0.0

                # Loop over the validation samples and sum the chi-squared values.
                for k in range(len(pipes[j])):
                    # Alias the data pipe name.
                    pipe = pipes[j][k]

                    # Switch to this pipe.
                    switch(pipe)

                    # Skip function.
                    if skip_function[pipe](model_info):
                        continue

                    # Get the model statistics.
                    k, n, chi2 = model_statistics[pipe](model_info)

                    # Missing data sets.
                    if k == None or n == None or chi2 == None:
                        continue

                    # Chi2 sum.
                    sum_crit = sum_crit + chi2

                # Cross-validation criterion (average chi-squared value).
                crit = sum_crit / float(len(pipes[j]))

            # Other model selection methods.
            else:
                # Reassign the pipe.
                pipe = pipes[j]

                # Switch to this pipe.
                switch(pipe)

                # Skip function.
                if skip_function[pipe](model_info):
                    continue

                # Get the model statistics.
                k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag)

                # Missing data sets.
                if k == None or n == None or chi2 == None:
                    continue

                # Calculate the criterion value.
                crit = formula(chi2, float(k), float(n))

                # Store the values for a later printout.
                data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit])

            # Select model.
            if crit < best_crit:
                best_model = pipe
                best_crit = crit

        # Write out the table.
        write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data)

        # Duplicate the data from the 'best_model' to the model selection data pipe.
        if best_model != None:
            # Print out of selected model.
            print("The model from the data pipe " + repr(best_model) + " has been selected.")

            # Switch to the selected data pipe.
            switch(best_model)

            # Duplicate.
            duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False)

            # Model selection pipe now exists.
            modsel_pipe_exists = True

        # No model selected.
        else:
            # Print out of selected model.
            print("No model has been selected.")

    # Switch to the model selection pipe.
    if modsel_pipe_exists:
        switch(modsel_pipe)

    # Bundle the data pipe.
    if bundle:
        pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe)

    # Update all of the required metadata structures.
    mol_res_spin.metadata_update()
    interatomic.metadata_update()
Пример #39
0
def grid_setup(lower=None, upper=None, inc=None, verbosity=1, skip_preset=True):
    """Determine the per-model grid bounds, allowing for the zooming grid search.

    @keyword lower:         The user supplied lower bounds of the grid search which must be equal to the number of parameters in the model.
    @type lower:            list of numbers
    @keyword upper:         The user supplied upper bounds of the grid search which must be equal to the number of parameters in the model.
    @type upper:            list of numbers
    @keyword inc:           The user supplied grid search increments.
    @type inc:              int or list of int
    @keyword verbosity:     The amount of information to print.  The higher the value, the greater the verbosity.
    @type verbosity:        int
    @keyword skip_preset:   This argument, when True, allows any parameter which already has a value set to be skipped in the grid search.
    @type skip_preset:      bool
    @return:                The per-model grid upper and lower bounds.  The first dimension of each structure corresponds to the model, the second the model parameters.
    @rtype:                 tuple of lists of lists of float, lists of lists of float, list of lists of int
    """

    # The specific analysis API object and parameter object.
    api = return_api()
    param_object = return_parameter_object()

    # Initialise.
    model_lower = []
    model_upper = []
    model_inc = []

    # Loop over the models.
    for model_info in api.model_loop():
        # Get the parameter names and current values.
        names = api.get_param_names(model_info)
        values = api.get_param_values(model_info)

        # No parameters for this model.
        if names == None or len(names) == 0:
            model_lower.append([])
            model_upper.append([])
            model_inc.append([])
            continue

        # The parameter number.
        n = len(names)

        # Make sure that the length of the parameter array is > 0.
        if n == 0:
            raise RelaxError("Cannot run a grid search on a model with zero parameters.")

        # Check that the user supplied bound lengths are ok.
        if lower != None and len(lower) != n:
            raise RelaxLenError('lower bounds', n)
        if upper != None and len(upper) != n:
            raise RelaxLenError('upper bounds', n)

        # Check the user supplied increments.
        if isinstance(inc, list) and len(inc) != n:
            raise RelaxLenError('increment', n)
        if isinstance(inc, list):
            for i in range(n):
                if not (isinstance(inc[i], int) or inc[i] == None):
                    raise RelaxIntListIntError('increment', inc)
        elif not isinstance(inc, int):
            raise RelaxIntListIntError('increment', inc)

        # Convert to the model increment list.
        if isinstance(inc, int):
            model_inc.append([inc]*n)
        else:
            model_inc.append(inc)

        # Print out the model title.
        api.print_model_title(prefix="Grid search setup:  ", model_info=model_info)

        # The grid zoom level.
        zoom = 0
        if hasattr(cdp, 'grid_zoom_level'):
            zoom = cdp.grid_zoom_level
        zoom_factor = 1.0 / 2.0**zoom
        if zoom > 0:
            print("Zooming grid level of %s, scaling the grid size by a factor of %s.\n" % (zoom, zoom_factor))

        # Append empty lists for the bounds to be built up.
        model_lower.append([])
        model_upper.append([])

        # Loop over the parameters.
        data = []
        for i in range(n):
            # A comment for user feedback.
            comment = 'Default bounds'
            if lower != None and upper != None:
                comment = 'User supplied lower and upper bound'
            elif lower != None:
                comment = 'User supplied lower bound'
            elif upper != None:
                comment = 'User supplied upper bound'

            # Alias the number of increments for this parameter.
            incs = model_inc[-1][i]

            # Error checking for increment values of None.
            if incs == None and values[i] in [None, {}, []]:
                raise RelaxError("The parameter '%s' has no preset value, therefore a grid increment of None is not valid." % names[i])

            # The lower bound for this parameter.
            if lower != None:
                lower_i = lower[i]
            else:
                lower_i = param_object.grid_lower(names[i], incs=incs, model_info=model_info)

            # The upper bound for this parameter.
            if upper != None:
                upper_i = upper[i]
            else:
                upper_i = param_object.grid_upper(names[i], incs=incs, model_info=model_info)

            # The skipping logic.
            skip = False
            if skip_preset:
                # Override the flag if the zoom is on.
                if zoom:
                    skip = False

                # No preset value.
                elif values[i] in [None, {}, []]:
                    skip = False

                # The preset value is a NaN value due to numpy conversions of None.
                elif isNaN(values[i]):
                    skip = False

                # Ok, now the parameter can be skipped.
                else:
                    skip = True

            # Override the skip flag if the incs value is None.
            if incs == None:
                skip = True

            # Skip preset values.
            if skip:
                lower_i = values[i]
                upper_i = values[i]
                model_inc[-1][i] = incs = 1
                comment = 'Preset value'

            # Zooming grid.
            elif zoom:
                # The full size and scaled size.
                size = upper_i - lower_i
                zoom_size = size * zoom_factor
                half_size = zoom_size / 2.0
                comment = 'Zoom grid width of %s %s' % (zoom_size, param_object.units(names[i]))

                # The new size around the current value.
                lower_zoom = values[i] - half_size
                upper_zoom = values[i] + half_size

                # Outside of the original lower bound, so shift the grid to fit.
                if zoom > 0 and lower_zoom < lower_i:
                    # The amount to shift by.
                    shift = lower_i - lower_zoom

                    # Set the new bounds.
                    upper_i = upper_zoom + shift

                # Outside of the original upper bound, so shift the grid to fit.
                elif zoom > 0 and upper_zoom > upper_i:
                    # The amount to shift by.
                    shift = upper_i - upper_zoom

                    # Set the new bounds.
                    lower_i = lower_zoom + shift

                # Inside the original bounds.
                else:
                    lower_i = lower_zoom
                    upper_i = upper_zoom

            # Add to the data list for printing out.
            data.append([names[i], "%15s" % lower_i, "%15s" % upper_i, "%15s" % incs, comment])

            # Scale the bounds.
            scaling = param_object.scaling(names[i], model_info=model_info)
            lower_i /= scaling
            upper_i /= scaling

            # Append.
            model_lower[-1].append(lower_i)
            model_upper[-1].append(upper_i)

        # Printout.
        if verbosity:
            write_data(out=sys.stdout, headings=["Parameter", "Lower bound", "Upper bound", "Increments", "Comment"], data=data)
            sys.stdout.write('\n')

    # Return the bounds.
    return model_lower, model_upper, model_inc
Пример #40
0
def read(file=None,
         dir=None,
         spectrum_id=None,
         dim=1,
         int_col=None,
         int_method=None,
         spin_id_col=None,
         mol_name_col=None,
         res_num_col=None,
         res_name_col=None,
         spin_num_col=None,
         spin_name_col=None,
         sep=None,
         spin_id=None,
         ncproc=None,
         verbose=True):
    """Read the peak intensity data.

    @keyword file:          The name of the file(s) containing the peak intensities.
    @type file:             str or list of str
    @keyword dir:           The directory where the file is located.
    @type dir:              str
    @keyword spectrum_id:   The spectrum identification string.
    @type spectrum_id:      str or list of str
    @keyword dim:           The dimension of the peak list to associate the data with.
    @type dim:              int
    @keyword int_col:       The column containing the peak intensity data (used by the generic intensity file format).
    @type int_col:          int or list of int
    @keyword int_method:    The integration method, one of 'height', 'point sum' or 'other'.
    @type int_method:       str
    @keyword spin_id_col:   The column containing the spin ID strings (used by the generic intensity file format).  If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
    @type spin_id_col:      int or None
    @keyword mol_name_col:  The column containing the molecule name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type mol_name_col:     int or None
    @keyword res_name_col:  The column containing the residue name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_name_col:     int or None
    @keyword res_num_col:   The column containing the residue number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type res_num_col:      int or None
    @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_name_col:    int or None
    @keyword spin_num_col:  The column containing the spin number information (used by the generic intensity file format).  If supplied, spin_id_col must be None.
    @type spin_num_col:     int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    @keyword spin_id:       The spin ID string used to restrict data loading to a subset of all spins.  If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
    @type spin_id:          None or str
    @keyword ncproc:        The Bruker ncproc binary intensity scaling factor.
    @type ncproc:           int or None
    @keyword verbose:       A flag which if True will cause all relaxation data loaded to be printed out.
    @type verbose:          bool
    """

    # Data checks.
    check_pipe()
    check_mol_res_spin_data()

    # Check the file name.
    if file == None:
        raise RelaxError("The file name must be supplied.")

    # Test that the intensity measures are identical.
    if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
        raise RelaxError(
            "The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra."
            % (int_method, cdp.int_method))

    # Multiple ID flags.
    flag_multi = False
    flag_multi_file = False
    flag_multi_col = False
    if isinstance(spectrum_id, list) or spectrum_id == 'auto':
        flag_multi = True
    if isinstance(file, list):
        flag_multi_file = True
    if isinstance(int_col, list) or spectrum_id == 'auto':
        flag_multi_col = True

    # List argument checks.
    if flag_multi:
        # Too many lists.
        if flag_multi_file and flag_multi_col:
            raise RelaxError(
                "If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists."
            )

        # Not enough lists.
        if not flag_multi_file and not flag_multi_col:
            raise RelaxError(
                "If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length."
            )

        # List lengths for multiple files.
        if flag_multi_file and len(spectrum_id) != len(file):
            raise RelaxError(
                "The file list %s and spectrum ID list %s do not have the same number of elements."
                % (file, spectrum_id))

        # List lengths for multiple intensity columns.
        if flag_multi_col and spectrum_id != 'auto' and len(
                spectrum_id) != len(int_col):
            raise RelaxError(
                "The spectrum ID list %s and intensity column list %s do not have the same number of elements."
                % (spectrum_id, int_col))

    # More list argument checks (when only one spectrum ID is supplied).
    else:
        # Multiple files.
        if flag_multi_file:
            raise RelaxError(
                "If multiple files are supplied, then multiple spectrum IDs must also be supplied."
            )

        # Multiple intensity columns.
        if flag_multi_col:
            raise RelaxError(
                "If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied."
            )

    # Intensity column checks.
    if spectrum_id != 'auto' and not flag_multi and flag_multi_col:
        raise RelaxError(
            "If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length."
        )

    # Check the intensity measure.
    if not int_method in ['height', 'point sum', 'other']:
        raise RelaxError(
            "The intensity measure '%s' is not one of 'height', 'point sum', 'other'."
            % int_method)

    # Set the peak intensity measure.
    cdp.int_method = int_method

    # Convert the file argument to a list if necessary.
    if not isinstance(file, list):
        file = [file]

    # Loop over all files.
    for file_index in range(len(file)):
        # Read the peak list data.
        peak_list = read_peak_list(file=file[file_index],
                                   dir=dir,
                                   int_col=int_col,
                                   spin_id_col=spin_id_col,
                                   mol_name_col=mol_name_col,
                                   res_num_col=res_num_col,
                                   res_name_col=res_name_col,
                                   spin_num_col=spin_num_col,
                                   spin_name_col=spin_name_col,
                                   sep=sep,
                                   spin_id=spin_id)

        # Automatic spectrum IDs.
        if spectrum_id == 'auto':
            spectrum_id = peak_list[0].intensity_name

        # Loop over the assignments.
        data = []
        data_flag = False
        for assign in peak_list:
            # Generate the spin_id.
            spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim - 1],
                                              spin_name=assign.spin_names[dim -
                                                                          1])

            # Convert the intensity data to a list if needed.
            intensity = assign.intensity
            if not isinstance(intensity, list):
                intensity = [intensity]

            # Loop over the intensity data.
            for int_index in range(len(intensity)):
                # Sanity check.
                if intensity[int_index] == 0.0:
                    warn(
                        RelaxWarning(
                            "A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on."
                            % spin_id))

                # Get the spin container.
                spin = return_spin(spin_id=spin_id)
                if not spin:
                    warn(RelaxNoSpinWarning(spin_id))
                    continue

                # Skip deselected spins.
                if not spin.select:
                    continue

                # Initialise.
                if not hasattr(spin, 'peak_intensity'):
                    spin.peak_intensity = {}

                # Intensity scaling.
                if ncproc != None:
                    intensity[int_index] = intensity[int_index] / float(2**
                                                                        ncproc)

                # Add the data.
                if flag_multi_file:
                    id = spectrum_id[file_index]
                elif flag_multi_col:
                    id = spectrum_id[int_index]
                else:
                    id = spectrum_id
                spin.peak_intensity[id] = intensity[int_index]

                # Switch the flag.
                data_flag = True

                # Append the data for printing out.
                data.append([spin_id, repr(intensity[int_index])])

        # Add the spectrum id (and ncproc) to the relax data store.
        spectrum_ids = spectrum_id
        if isinstance(spectrum_id, str):
            spectrum_ids = [spectrum_id]
        if ncproc != None and not hasattr(cdp, 'ncproc'):
            cdp.ncproc = {}
        for i in range(len(spectrum_ids)):
            add_spectrum_id(spectrum_ids[i])
            if ncproc != None:
                cdp.ncproc[spectrum_ids[i]] = ncproc

        # No data.
        if not data_flag:
            # Delete all the data.
            delete(spectrum_id)

            # Raise the error.
            raise RelaxError("No data could be loaded from the peak list")

        # Printout.
        if verbose:
            print(
                "\nThe following intensities have been loaded into the relax data store:\n"
            )
            write_data(out=sys.stdout,
                       headings=["Spin_ID", "Intensity"],
                       data=data)
        print('')
Пример #41
0
def define(spin_id1=None, spin_id2=None, pipe=None, direct_bond=False, verbose=True):
    """Set up the magnetic dipole-dipole interaction.

    @keyword spin_id1:      The spin identifier string of the first spin of the pair.
    @type spin_id1:         str
    @keyword spin_id2:      The spin identifier string of the second spin of the pair.
    @type spin_id2:         str
    @param pipe:        The data pipe to operate on.  Defaults to the current data pipe.
    @type pipe:         str
    @keyword direct_bond:   A flag specifying if the two spins are directly bonded.
    @type direct_bond:      bool
    @keyword verbose:       A flag which if True will result in printouts of the created interatomoic data containers.
    @type verbose:          bool
    """

    # The data pipe.
    if pipe == None:
        pipe = pipes.cdp_name()

    # Get the data pipe.
    dp = pipes.get_pipe(pipe)

    # Loop over both spin selections.
    ids = []
    for spin1, mol_name1, res_num1, res_name1, id1 in spin_loop(spin_id1, pipe=pipe, full_info=True, return_id=True):
        for spin2, mol_name2, res_num2, res_name2, id2 in spin_loop(spin_id2, pipe=pipe, full_info=True, return_id=True):
            # Directly bonded atoms.
            if direct_bond:
                # Different molecules.
                if mol_name1 != mol_name2:
                    continue

                # From structural info.
                if hasattr(dp, 'structure') and dp.structure.get_molecule(mol_name1, model=1):
                    if not dp.structure.are_bonded(atom_id1=id1, atom_id2=id2):
                        continue

                # From the residue info.
                else:
                    # No element info.
                    if not hasattr(spin1, 'element'):
                        raise RelaxError("The spin '%s' does not have the element type set." % id1)
                    if not hasattr(spin2, 'element'):
                        raise RelaxError("The spin '%s' does not have the element type set." % id2)

                    # Backbone NH and CH pairs.
                    pair = False
                    if (spin1.element == 'N' and spin2.element == 'H') or (spin2.element == 'N' and spin1.element == 'H'):
                        pair = True
                    elif (spin1.element == 'C' and spin2.element == 'H') or (spin2.element == 'C' and spin1.element == 'H'):
                        pair = True

                    # Same residue, so skip.
                    if pair and res_num1 != None and res_num1 != res_num2:
                        continue
                    elif pair and res_num1 == None and res_name1 != res_name2:
                        continue

            # Get the interatomic data object, if it exists.
            interatom = return_interatom(id1, id2, pipe=pipe)

            # Create the container if needed.
            if interatom == None:
                interatom = create_interatom(spin_id1=id1, spin_id2=id2, pipe=pipe)

            # Check that this has not already been set up.
            if interatom.dipole_pair:
                raise RelaxError("The magnetic dipole-dipole interaction already exists between the spins '%s' and '%s'." % (id1, id2))

            # Set a flag indicating that a dipole-dipole interaction is present.
            interatom.dipole_pair = True

            # Store the IDs for the printout.
            ids.append([repr(id1), repr(id2)])

    # No matches, so fail!
    if not len(ids):
        # Find the problem.
        count1 = 0
        count2 = 0
        for spin in spin_loop(spin_id1):
            count1 += 1
        for spin in spin_loop(spin_id2):
            count2 += 1

        # Report the problem.
        if count1 == 0 and count2 == 0:
            raise RelaxError("Neither spin IDs '%s' and '%s' match any spins." % (spin_id1, spin_id2))
        elif count1 == 0:
            raise RelaxError("The spin ID '%s' matches no spins." % spin_id1)
        elif count2 == 0:
            raise RelaxError("The spin ID '%s' matches no spins." % spin_id2)
        else:
            raise RelaxError("Unknown error.")

    # Print out.
    if verbose:
        print("Interatomic interactions are now defined for the following spins:\n")
        write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
Пример #42
0
def read(file=None, dir=None, file_data=None, spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sign_col=None, sep=None):
    """Read the J coupling data from file.

    @keyword file:          The name of the file to open.
    @type file:             str
    @keyword dir:           The directory containing the file (defaults to the current directory if None).
    @type dir:              str or None
    @keyword file_data:     An alternative to opening a file, if the data already exists in the correct format.  The format is a list of lists where the first index corresponds to the row and the second the column.
    @type file_data:        list of lists
    @keyword spin_id1_col:  The column containing the spin ID strings of the first spin.
    @type spin_id1_col:     int
    @keyword spin_id2_col:  The column containing the spin ID strings of the second spin.
    @type spin_id2_col:     int
    @keyword data_col:      The column containing the J coupling data in Hz.
    @type data_col:         int or None
    @keyword error_col:     The column containing the J coupling errors.
    @type error_col:        int or None
    @keyword sign_col:      The optional column containing the sign of the J coupling.
    @type sign_col:         int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    """

    # Check the pipe setup.
    check_pipe_setup(sequence=True)

    # Either the data or error column must be supplied.
    if data_col == None and error_col == None:
        raise RelaxError("One of either the data or error column must be supplied.")

    # Extract the data from the file, and remove comments and blank lines.
    file_data = extract_data(file, dir, sep=sep)
    file_data = strip(file_data, comments=True)

    # Loop over the J coupling data.
    data = []
    for line in file_data:
        # Invalid columns.
        if spin_id1_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line))
            continue
        if spin_id2_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line))
            continue
        if data_col and data_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no data column can be found." % line))
            continue
        if error_col and error_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no error column can be found." % line))
            continue
        if sign_col and sign_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no sign column can be found." % line))
            continue

        # Unpack.
        spin_id1 = line[spin_id1_col-1]
        spin_id2 = line[spin_id2_col-1]
        value = None
        if data_col:
            value = line[data_col-1]
        error = None
        if error_col:
            error = line[error_col-1]
        sign = None
        if sign_col:
            sign = line[sign_col-1]

        # Convert the spin IDs.
        if spin_id1[0] in ["\"", "\'"]:
            spin_id1 = eval(spin_id1)
        if spin_id2[0] in ["\"", "\'"]:
            spin_id2 = eval(spin_id2)

        # Convert and check the value.
        if value == 'None':
            value = None
        if value != None:
            try:
                value = float(value)
            except ValueError:
                warn(RelaxWarning("The J coupling value of the line %s is invalid." % line))
                continue

        # The sign data.
        if sign == 'None':
            sign = None
        if sign != None:
            try:
                sign = float(sign)
            except ValueError:
                warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line))
                continue
            if sign not in [1.0, -1.0]:
                warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line))
                continue

        # Convert and check the error.
        if error == 'None':
            error = None
        if error != None:
            try:
                error = float(error)
            except ValueError:
                warn(RelaxWarning("The error value of the line %s is invalid." % line))
                continue

        # Get the spins.
        spin1 = return_spin(spin_id1)
        spin2 = return_spin(spin_id2)

        # Check the spin IDs.
        if not spin1:
            warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line)))
            continue
        if not spin2:
            warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line)))
            continue

        # Test the error value (cannot be 0.0).
        if error == 0.0:
            raise RelaxError("An invalid error value of zero has been encountered.")

        # Get the interatomic data container.
        interatom = return_interatom(spin_id1, spin_id2)

        # Create the container if needed.
        if interatom == None:
            interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2)

        # Add the data.
        if data_col:
            # Sign conversion.
            if sign != None:
                value = value * sign

            # Add the value.
            interatom.j_coupling = value

        # Add the error.
        if error_col:
            interatom.j_coupling_err = error

        # Append the data for printout.
        data.append([spin_id1, spin_id2])
        if is_float(value):
            data[-1].append("%20.15f" % value)
        else:
            data[-1].append("%20s" % value)
        if is_float(error):
            data[-1].append("%20.15f" % error)
        else:
            data[-1].append("%20s" % error)

    # No data, so fail hard!
    if not len(data):
        raise RelaxError("No J coupling data could be extracted.")

    # Print out.
    print("The following J coupling have been loaded into the relax data store:\n")
    write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data)
Пример #43
0
def signal_noise_ratio(verbose=True):
    """Calculate the signal to noise ratio per spin.

    @keyword verbose:       A flag which if True will print additional information out.
    @type verbose:          bool
    """

    # Tests.
    check_pipe()
    check_mol_res_spin_data()

    # Test if spectra have been loaded.
    if not hasattr(cdp, 'spectrum_ids'):
        raise RelaxError("No spectra have been loaded.")

    # Possible print.
    if verbose:
        print("\nThe following signal to noise ratios has been calculated:\n")

    # Set the spin specific signal to noise ratio.
    for spin, spin_id in spin_loop(return_id=True):
        # Skip deselected spins.
        if not spin.select:
            continue

        # Skip spins missing intensity data.
        if not hasattr(spin, 'peak_intensity'):
            continue

        # Test if error analysis has been performed.
        if not hasattr(spin, 'peak_intensity_err'):
            raise RelaxError(
                "Intensity error analysis has not been performed.  Please see spectrum.error_analysis()."
            )

        # If necessary, create the dictionary.
        if not hasattr(spin, 'sn_ratio'):
            spin.sn_ratio = {}

        # Loop over the ID.
        ids = []
        for id in spin.peak_intensity:
            # Append the ID to the list.
            ids.append(id)

            # Calculate the sn_ratio.
            pint = float(spin.peak_intensity[id])
            pint_err = float(spin.peak_intensity_err[id])
            sn_ratio = pint / pint_err

            # Assign the sn_ratio.
            spin.sn_ratio[id] = sn_ratio

        # Sort the ids alphanumeric.
        ids = sort_filenames(filenames=ids, rev=False)

        # Collect the data under sorted ids.
        data_i = []
        for id in ids:
            # Get the values.
            pint = spin.peak_intensity[id]
            pint_err = spin.peak_intensity_err[id]
            sn_ratio = spin.sn_ratio[id]

            # Store the data.
            data_i.append([id, repr(pint), repr(pint_err), repr(sn_ratio)])

        if verbose:
            section(file=sys.stdout,
                    text="Signal to noise ratio for spin ID '%s'" % spin_id,
                    prespace=1)
            write_data(out=sys.stdout,
                       headings=["Spectrum ID", "Signal", "Noise", "S/N"],
                       data=data_i)
Пример #44
0
def define_dipole_pair(spin_id1=None, spin_id2=None, spin1=None, spin2=None, pipe=None, direct_bond=False, spin_selection=False, verbose=True):
    """Set up the magnetic dipole-dipole interaction.

    @keyword spin_id1:          The spin identifier string of the first spin of the pair.
    @type spin_id1:             str
    @keyword spin_id2:          The spin identifier string of the second spin of the pair.
    @type spin_id2:             str
    @keyword spin1:             An optional single spin container for the first atom.  This is for speeding up the interatomic data container creation, if the spin containers are already available in the calling function.
    @type spin1:                str
    @keyword spin2:             An optional single spin container for the second atom.  This is for speeding up the interatomic data container creation, if the spin containers are already available in the calling function.
    @type spin2:                str
    @param pipe:                The data pipe to operate on.  Defaults to the current data pipe.
    @type pipe:                 str
    @keyword direct_bond:       A flag specifying if the two spins are directly bonded.
    @type direct_bond:          bool
    @keyword spin_selection:    Define the interatomic data container selection based on the spin selection.  If either spin is deselected, the interatomic container will also be deselected.  Otherwise the container will be selected.
    @type spin_selection:       bool
    @keyword verbose:           A flag which if True will result in printouts of the created interatomoic data containers.
    @type verbose:              bool
    """

    # The data pipe.
    if pipe == None:
        pipe = pipes.cdp_name()

    # Get the data pipe.
    dp = pipes.get_pipe(pipe)

    # Initialise data structures for storing spin data.
    ids = []
    spins = []
    spin_selections = []

    # Pre-supplied spins.
    if spin1 and spin2:
        # Store the IDs for the printout.
        ids.append([spin_id1, spin_id2])

        # Store the spin data.
        spins.append([spin1, spin2])
        spin_selections.append([spin1.select, spin2.select])

    # Use the structural data to find connected atoms.
    elif hasattr(dp, 'structure'):
        # The selection objects.
        selection1 = cdp.structure.selection(atom_id=spin_id1)
        selection2 = cdp.structure.selection(atom_id=spin_id2)

        # Loop over the atoms of the first spin selection.
        for mol_name1, res_num1, res_name1, atom_num1, atom_name1, mol_index1, atom_index1 in dp.structure.atom_loop(selection=selection1, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True):
            # Generate the first spin ID.
            id1 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name1, res_num=res_num1, res_name=res_name1, spin_num=atom_num1, spin_name=atom_name1)

            # Do the spin exist?
            spin1 = return_spin(spin_id=id1)
            if not spin1:
                continue

            # Loop over the atoms of the second spin selection.
            for mol_name2, res_num2, res_name2, atom_num2, atom_name2, mol_index2, atom_index2 in dp.structure.atom_loop(selection=selection2, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True):
                # Directly bonded atoms.
                if direct_bond:
                    # Different molecules.
                    if mol_name1 != mol_name2:
                        continue

                    # Skip non-bonded atom pairs.
                    if not dp.structure.are_bonded_index(mol_index1=mol_index1, atom_index1=atom_index1, mol_index2=mol_index2, atom_index2=atom_index2):
                        continue

                # Generate the second spin ID.
                id2 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name2, res_num=res_num2, res_name=res_name2, spin_num=atom_num2, spin_name=atom_name2)

                # Do the spin exist?
                spin2 = return_spin(spin_id=id2)
                if not spin2:
                    continue

                # Store the IDs for the printout.
                ids.append([id1, id2])

                # Store the spin data.
                spins.append([spin1, spin2])
                spin_selections.append([spin1.select, spin2.select])

    # No structural data present or the spin IDs are not in the structural data, so use spin loops and some basic rules.
    if ids == []:
        for spin1, mol_name1, res_num1, res_name1, id1 in spin_loop(spin_id1, pipe=pipe, full_info=True, return_id=True):
            for spin2, mol_name2, res_num2, res_name2, id2 in spin_loop(spin_id2, pipe=pipe, full_info=True, return_id=True):
                # Directly bonded atoms.
                if direct_bond:
                    # Different molecules.
                    if mol_name1 != mol_name2:
                        continue

                    # No element info.
                    if not hasattr(spin1, 'element'):
                        raise RelaxError("The spin '%s' does not have the element type set." % id1)
                    if not hasattr(spin2, 'element'):
                        raise RelaxError("The spin '%s' does not have the element type set." % id2)

                    # Backbone NH and CH pairs.
                    pair = False
                    if (spin1.element == 'N' and spin2.element == 'H') or (spin2.element == 'N' and spin1.element == 'H'):
                        pair = True
                    elif (spin1.element == 'C' and spin2.element == 'H') or (spin2.element == 'C' and spin1.element == 'H'):
                        pair = True

                    # Same residue, so skip.
                    if pair and res_num1 != None and res_num1 != res_num2:
                        continue
                    elif pair and res_num1 == None and res_name1 != res_name2:
                        continue

                # Store the IDs for the printout.
                ids.append([id1, id2])

                # Store the spin data.
                spins.append([spin1, spin2])
                spin_selections.append([spin1.select, spin2.select])

    # No matches, so fail!
    if not len(ids):
        # Find the problem.
        count1 = 0
        count2 = 0
        for spin in spin_loop(spin_id1):
            count1 += 1
        for spin in spin_loop(spin_id2):
            count2 += 1

        # Report the problem.
        if count1 == 0 and count2 == 0:
            raise RelaxError("Neither spin IDs '%s' and '%s' match any spins." % (spin_id1, spin_id2))
        elif count1 == 0:
            raise RelaxError("The spin ID '%s' matches no spins." % spin_id1)
        elif count2 == 0:
            raise RelaxError("The spin ID '%s' matches no spins." % spin_id2)
        else:
            raise RelaxError("Unknown error.")

    # Define the interaction.
    for i in range(len(ids)):
        # Unpack.
        id1, id2 = ids[i]
        spin1, spin2 = spins[i]

        # Get the interatomic data object, if it exists.
        interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash, pipe=pipe)

        # Create the container if needed.
        if interatom == None:
            interatom = create_interatom(spin_id1=id1, spin_id2=id2, spin1=spins[i][0], spin2=spins[i][1], pipe=pipe)

        # Check that this has not already been set up.
        if interatom.dipole_pair:
            raise RelaxError("The magnetic dipole-dipole interaction already exists between the spins '%s' and '%s'." % (id1, id2))

        # Set a flag indicating that a dipole-dipole interaction is present.
        interatom.dipole_pair = True

        # Set the selection.
        if spin_selection:
            interatom.select = False
            if spin_selections[i][0] and spin_selections[i][1]:
                interatom.select = True

    # Printout.
    if verbose:
        # Conversion.
        for i in range(len(ids)):
            ids[i][0] = repr(ids[i][0])
            ids[i][1] = repr(ids[i][1])

        # The printout.
        print("Interatomic interactions are now defined for the following spins:\n")
        write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
Пример #45
0
def copy(pipe_from=None, pipe_to=None, spin_id1=None, spin_id2=None, verbose=True):
    """Copy the interatomic data from one data pipe to another.

    @keyword pipe_from:         The data pipe to copy the interatomic data from.  This defaults to the current data pipe.
    @type pipe_from:            str
    @keyword pipe_to:           The data pipe to copy the interatomic data to.  This defaults to the current data pipe.
    @type pipe_to:              str
    @keyword spin_id1:          The spin ID string of the first atom.
    @type spin_id1:             str
    @keyword spin_id2:          The spin ID string of the second atom.
    @type spin_id2:             str
    @keyword verbose:           A flag which if True will cause info about each spin pair to be printed out.
    @type verbose:              bool
    """

    # Defaults.
    if pipe_from == None and pipe_to == None:
        raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.")
    elif pipe_from == None:
        pipe_from = pipes.cdp_name()
    elif pipe_to == None:
        pipe_to = pipes.cdp_name()

    # Test if the pipe_from and pipe_to data pipes exist.
    check_pipe(pipe_from)
    check_pipe(pipe_to)

    # Check that the spin IDs exist.
    if spin_id1:
        if count_spins(selection=spin_id1, pipe=pipe_from, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id1, pipe_from)
        if count_spins(selection=spin_id1, pipe=pipe_to, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id1, pipe_to)
    if spin_id2:
        if count_spins(selection=spin_id2, pipe=pipe_from, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id2, pipe_from)
        if count_spins(selection=spin_id2, pipe=pipe_to, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id2, pipe_to)

    # Check for the sequence data in the target pipe if no spin IDs are given.
    if not spin_id1 and not spin_id2:
        for spin, spin_id in spin_loop(pipe=pipe_from, return_id=True):
            if not return_spin(spin_id=spin_id, pipe=pipe_to):
                raise RelaxNoSpinError(spin_id, pipe_to)

    # Test if pipe_from contains interatomic data (skipping the rest of the function if it is missing).
    if not exists_data(pipe_from):
        return

    # Loop over the interatomic data of the pipe_from data pipe.
    ids = []
    for interatom in interatomic_loop(selection1=spin_id1, selection2=spin_id2, pipe=pipe_from):
        # Create a new container.
        new_interatom = create_interatom(spin_id1=interatom.spin_id1, spin_id2=interatom.spin_id2, pipe=pipe_to)

        # Duplicate all the objects of the container.
        for name in dir(interatom):
            # Skip special objects.
            if search('^_', name):
                continue

            # Skip the spin IDs.
            if name in ['spin_id1', 'spin_id2']:
                continue

            # Skip class methods.
            if name in interatom.__class__.__dict__:
                continue

            # Duplicate all other objects.
            obj = deepcopy(getattr(interatom, name))
            setattr(new_interatom, name, obj)

        # Store the IDs for the printout.
        ids.append([repr(interatom.spin_id1), repr(interatom.spin_id2)])

        # Reconfigure the spin hashes.
        hash_update(interatom=new_interatom, pipe=pipe_to)

    # Print out.
    if verbose:
        write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
Пример #46
0
def read(file=None, dir=None, file_data=None, spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sign_col=None, sep=None):
    """Read the J coupling data from file.

    @keyword file:          The name of the file to open.
    @type file:             str
    @keyword dir:           The directory containing the file (defaults to the current directory if None).
    @type dir:              str or None
    @keyword file_data:     An alternative to opening a file, if the data already exists in the correct format.  The format is a list of lists where the first index corresponds to the row and the second the column.
    @type file_data:        list of lists
    @keyword spin_id1_col:  The column containing the spin ID strings of the first spin.
    @type spin_id1_col:     int
    @keyword spin_id2_col:  The column containing the spin ID strings of the second spin.
    @type spin_id2_col:     int
    @keyword data_col:      The column containing the J coupling data in Hz.
    @type data_col:         int or None
    @keyword error_col:     The column containing the J coupling errors.
    @type error_col:        int or None
    @keyword sign_col:      The optional column containing the sign of the J coupling.
    @type sign_col:         int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    """

    # Check the pipe setup.
    check_pipe_setup(sequence=True)

    # Either the data or error column must be supplied.
    if data_col == None and error_col == None:
        raise RelaxError("One of either the data or error column must be supplied.")

    # Extract the data from the file, and remove comments and blank lines.
    file_data = extract_data(file, dir, sep=sep)
    file_data = strip(file_data, comments=True)

    # Loop over the J coupling data.
    data = []
    for line in file_data:
        # Invalid columns.
        if spin_id1_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line))
            continue
        if spin_id2_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line))
            continue
        if data_col and data_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no data column can be found." % line))
            continue
        if error_col and error_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no error column can be found." % line))
            continue
        if sign_col and sign_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no sign column can be found." % line))
            continue

        # Unpack.
        spin_id1 = line[spin_id1_col-1]
        spin_id2 = line[spin_id2_col-1]
        value = None
        if data_col:
            value = line[data_col-1]
        error = None
        if error_col:
            error = line[error_col-1]
        sign = None
        if sign_col:
            sign = line[sign_col-1]

        # Convert the spin IDs.
        if spin_id1[0] in ["\"", "\'"]:
            spin_id1 = eval(spin_id1)
        if spin_id2[0] in ["\"", "\'"]:
            spin_id2 = eval(spin_id2)

        # Convert and check the value.
        if value == 'None':
            value = None
        if value != None:
            try:
                value = float(value)
            except ValueError:
                warn(RelaxWarning("The J coupling value of the line %s is invalid." % line))
                continue

        # The sign data.
        if sign == 'None':
            sign = None
        if sign != None:
            try:
                sign = float(sign)
            except ValueError:
                warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line))
                continue
            if sign not in [1.0, -1.0]:
                warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line))
                continue

        # Convert and check the error.
        if error == 'None':
            error = None
        if error != None:
            try:
                error = float(error)
            except ValueError:
                warn(RelaxWarning("The error value of the line %s is invalid." % line))
                continue

        # Get the spins.
        spin1 = return_spin(spin_id=spin_id1)
        spin2 = return_spin(spin_id=spin_id2)

        # Check the spin IDs.
        if not spin1:
            warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line)))
            continue
        if not spin2:
            warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line)))
            continue

        # Test the error value (cannot be 0.0).
        if error == 0.0:
            raise RelaxError("An invalid error value of zero has been encountered.")

        # Get the interatomic data container.
        interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash)

        # Create the container if needed.
        if interatom == None:
            interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2)

        # Add the data.
        if data_col:
            # Sign conversion.
            if sign != None:
                value = value * sign

            # Add the value.
            interatom.j_coupling = value

        # Add the error.
        if error_col:
            interatom.j_coupling_err = error

        # Append the data for printout.
        data.append([spin_id1, spin_id2])
        if is_float(value):
            data[-1].append("%20.15f" % value)
        else:
            data[-1].append("%20s" % value)
        if is_float(error):
            data[-1].append("%20.15f" % error)
        else:
            data[-1].append("%20s" % error)

    # No data, so fail hard!
    if not len(data):
        raise RelaxError("No J coupling data could be extracted.")

    # Print out.
    print("The following J coupling have been loaded into the relax data store:\n")
    write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data)
Пример #47
0
def read_dist(file=None, dir=None, unit='meter', spin_id1_col=None, spin_id2_col=None, data_col=None, sep=None):
    """Set up the magnetic dipole-dipole interaction.

    @keyword file:          The name of the file to open.
    @type file:             str
    @keyword dir:           The directory containing the file (defaults to the current directory if None).
    @type dir:              str or None
    @keyword unit:          The measurement unit.  This can be either 'meter' or 'Angstrom'.
    @type unit:             str
    @keyword spin_id1_col:  The column containing the spin ID strings of the first spin.
    @type spin_id1_col:     int
    @keyword spin_id2_col:  The column containing the spin ID strings of the second spin.
    @type spin_id2_col:     int
    @keyword data_col:      The column containing the averaged distances in meters.
    @type data_col:         int or None
    @keyword sep:           The column separator which, if None, defaults to whitespace.
    @type sep:              str or None
    """

    # Check the units.
    if unit not in ['meter', 'Angstrom']:
        raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit)

    # Test if the current data pipe exists.
    check_pipe()

    # Test if sequence data exists.
    if not exists_mol_res_spin_data():
        raise RelaxNoSequenceError

    # Extract the data from the file, and clean it up.
    file_data = extract_data(file, dir, sep=sep)
    file_data = strip(file_data, comments=True)

    # Loop over the RDC data.
    data = []
    for line in file_data:
        # Invalid columns.
        if spin_id1_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line))
            continue
        if spin_id2_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line))
            continue
        if data_col and data_col > len(line):
            warn(RelaxWarning("The data %s is invalid, no data column can be found." % line))
            continue

        # Unpack.
        spin_id1 = line[spin_id1_col-1]
        spin_id2 = line[spin_id2_col-1]
        ave_dist = None
        if data_col:
            ave_dist = line[data_col-1]

        # Convert and check the value.
        if ave_dist != None:
            try:
                ave_dist = float(ave_dist)
            except ValueError:
                warn(RelaxWarning("The averaged distance of '%s' from the line %s is invalid." % (ave_dist, line)))
                continue

        # Unit conversion.
        if unit == 'Angstrom':
            ave_dist = ave_dist * 1e-10

        # Get the interatomic data container.
        spin1 = return_spin(spin_id=spin_id1)
        spin2 = return_spin(spin_id=spin_id2)
        interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash)

        # No container found, so create it.
        if interatom == None:
            interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2, verbose=True)

        # Store the averaged distance.
        interatom.r = ave_dist

        # Store the data for the printout.
        data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)])

    # No data, so fail!
    if not len(data):
        raise RelaxError("No data could be extracted from the file.")

    # Print out.
    print("The following averaged distances have been read:\n")
    write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
Пример #48
0
def get_pos(spin_id=None, str_id=None, ave_pos=False):
    """Load the spins from the structural object into the relax data store.

    @keyword spin_id:           The molecule, residue, and spin identifier string.
    @type spin_id:              str
    @keyword str_id:            The structure identifier.  This can be the file name, model number, or structure number.
    @type str_id:               int or str
    @keyword ave_pos:           A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer.
    @type ave_pos:              bool
    """

    # Test if the current data pipe exists.
    pipes.test()

    # Test if the structure exists.
    if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules():
        raise RelaxNoPdbError

    # Loop over all atoms of the spin_id selection.
    data = []
    for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos):
        # Remove the '+' regular expression character from the mol, res, and spin names!
        if mol_name and search('\+', mol_name):
            mol_name = mol_name.replace('+', '')
        if res_name and search('\+', res_name):
            res_name = res_name.replace('+', '')
        if atom_name and search('\+', atom_name):
            atom_name = atom_name.replace('+', '')

        # The spin identification string.
        id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name)

        # Get the spin container.
        spin_cont = return_spin(id)

        # Skip the spin if it doesn't exist.
        if spin_cont == None:
            continue

        # Add the position vector to the spin container.
        spin_cont.pos = pos

        # Store the data for a printout at the end.
        data.append([id, repr(pos)])

    # No positions found.
    if not len(data):
        raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id)

    # Update pseudo-atoms.
    for spin in spin_loop():
        if hasattr(spin, 'members'):
            # Get the spin positions.
            positions = []
            for atom in spin.members:
                # Get the spin container.
                subspin = return_spin(atom)

                # Test that the spin exists.
                if subspin == None:
                    raise RelaxNoSpinError(atom)

                # Test the position.
                if not hasattr(subspin, 'pos') or subspin.pos == None or not len(subspin.pos):
                    raise RelaxError("Positional information is not available for the atom '%s'." % atom)

                # Alias the position.
                pos = subspin.pos

                # Convert to a list of lists if not already.
                multi_model = True
                if type(pos[0]) in [float, float64]:
                    multi_model = False
                    pos = [pos]

                # Store the position.
                positions.append([])
                for i in range(len(pos)):
                    positions[-1].append(pos[i].tolist())

            # The averaging.
            if spin.averaging == 'linear':
                # Average pos.
                ave = linear_ave(positions)

                # Convert to the correct structure.
                if multi_model:
                    spin.pos = ave
                else:
                    spin.pos = ave[0]

    # Print out.
    write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)
Пример #49
0
def copy(pipe_from=None, pipe_to=None, spin_id1=None, spin_id2=None, verbose=True):
    """Copy the interatomic data from one data pipe to another.

    @keyword pipe_from:         The data pipe to copy the interatomic data from.  This defaults to the current data pipe.
    @type pipe_from:            str
    @keyword pipe_to:           The data pipe to copy the interatomic data to.  This defaults to the current data pipe.
    @type pipe_to:              str
    @keyword spin_id1:          The spin ID string of the first atom.
    @type spin_id1:             str
    @keyword spin_id2:          The spin ID string of the second atom.
    @type spin_id2:             str
    @keyword verbose:           A flag which if True will cause info about each spin pair to be printed out.
    @type verbose:              bool
    """

    # Defaults.
    if pipe_from == None and pipe_to == None:
        raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.")
    elif pipe_from == None:
        pipe_from = pipes.cdp_name()
    elif pipe_to == None:
        pipe_to = pipes.cdp_name()

    # Test if the pipe_from and pipe_to data pipes exist.
    pipes.test(pipe_from)
    pipes.test(pipe_to)

    # Check that the spin IDs exist.
    if spin_id1:
        if count_spins(selection=spin_id1, pipe=pipe_from, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id1, pipe_from)
        if count_spins(selection=spin_id1, pipe=pipe_to, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id1, pipe_to)
    if spin_id2:
        if count_spins(selection=spin_id2, pipe=pipe_from, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id2, pipe_from)
        if count_spins(selection=spin_id2, pipe=pipe_to, skip_desel=False) == 0:
            raise RelaxNoSpinError(spin_id2, pipe_to)

    # Check for the sequence data in the target pipe if no spin IDs are given.
    if not spin_id1 and not spin_id2:
        for spin, spin_id in spin_loop(pipe=pipe_from, return_id=True):
            if not return_spin(spin_id, pipe=pipe_to):
                raise RelaxNoSpinError(spin_id, pipe_to)

    # Test if pipe_from contains interatomic data (skipping the rest of the function if it is missing).
    if not exists_data(pipe_from):
        return

    # Loop over the interatomic data of the pipe_from data pipe.
    ids = []
    for interatom in interatomic_loop(selection1=spin_id1, selection2=spin_id2, pipe=pipe_from):
        # Create a new container.
        new_interatom = create_interatom(spin_id1=interatom.spin_id1, spin_id2=interatom.spin_id2, pipe=pipe_to)

        # Duplicate all the objects of the container.
        for name in dir(interatom):
            # Skip special objects.
            if search('^_', name):
                continue

            # Skip the spin IDs.
            if name in ['spin_id1', 'spin_id2']:
                continue

            # Skip class methods.
            if name in list(interatom.__class__.__dict__.keys()):
                continue

            # Duplicate all other objects.
            obj = deepcopy(getattr(interatom, name))
            setattr(new_interatom, name, obj)

        # Store the IDs for the printout.
        ids.append([repr(interatom.spin_id1), repr(interatom.spin_id2)])

    # Print out.
    if verbose:
        write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)