def run(data_path, grid_search_path, ensemble_output_path, score_output_path, number_of_partitions, number_of_iterations, best_proportion, used_proportion): # Read partitioned input data data = read_partitioned_data(data_path, number_of_iterations, number_of_partitions) # Read true values from the partitioned data set true_values = get_true_values(data) # Read the grid search results as input data results = read_data(grid_search_path) # Construct the ensemble based on the results of the grid search and the # proportion parameters passed to this script ensemble = construct_ensemble(results, best_proportion, used_proportion) # Retrieve the classification results from the ensemble based on a # popularity vote predicted_values = ensemble_vote(ensemble) # Score the classification results of the ensemble against the true values result = Result() result.add_values(true_values, predicted_values) result.calculate() # Output the ensemble into the specified file write_data(ensemble_output_path, ensemble) # Output the ensemble score into the specified file write_data(score_output_path, result)
def write(align_id=None, file=None, dir=None, bc=False, force=False): """Display the RDC data corresponding to the alignment ID. @keyword align_id: The alignment tensor ID string. @type align_id: str @keyword file: The file name or object to write to. @type file: str or file object @keyword dir: The name of the directory to place the file into (defaults to the current directory). @type dir: str @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written. @type bc: bool @keyword force: A flag which if True will cause any pre-existing file to be overwritten. @type force: bool """ # Check the pipe setup. check_pipe_setup(sequence=True, rdc_id=align_id, rdc=True) # Open the file for writing. file = open_write_file(file, dir, force) # Loop over the interatomic data containers and collect the data. data = [] for interatom in interatomic_loop(): # Skip deselected containers. if not interatom.select: continue # Skip containers with no RDCs. if not bc and (not hasattr(interatom, 'rdc') or align_id not in interatom.rdc.keys()): continue elif bc and (not hasattr(interatom, 'rdc_bc') or align_id not in interatom.rdc_bc.keys()): continue # Append the spin data. data.append([]) data[-1].append(interatom.spin_id1) data[-1].append(interatom.spin_id2) # Handle the missing rdc_data_types variable. data_type = None if hasattr(interatom, 'rdc_data_types'): data_type = interatom.rdc_data_types[align_id] # The value. if bc: data[-1].append(repr(convert(interatom.rdc_bc[align_id], data_type, align_id))) else: data[-1].append(repr(convert(interatom.rdc[align_id], data_type, align_id))) # The error. if hasattr(interatom, 'rdc_err') and align_id in interatom.rdc_err.keys(): data[-1].append(repr(convert(interatom.rdc_err[align_id], data_type, align_id))) else: data[-1].append(repr(None)) # Write out. write_data(out=file, headings=["Spin_ID1", "Spin_ID2", "RDCs", "RDC_error"], data=data)
def run(input_path, output_path): # Read the results as input data results = read_data(input_path) # Retrieve the best result best_result = sorted(results, key=lambda k: k.average_f1(), reverse=True)[0] # Output the score into the specified file write_data(output_path, best_result)
def create_par_chi2(self, file_prefix, par_chi2_vals): """Function for creating file with parameters and the chi2 value.""" # Print out. print("\nCreating the file with parameters and the chi2 value.") # Open the file. par_file = open_write_file(file_name=file_prefix + '.par', dir=self.dir, force=True) # Copy the nested list to sort it. par_chi2_vals_sort = deepcopy(par_chi2_vals) # Then sort the value. par_chi2_vals_sort.sort(key=lambda values: values[4]) # Collect the data structure, which is a list of list of strings. data = [] for i, line in enumerate(par_chi2_vals): line_sort = par_chi2_vals_sort[i] # Convert values to strings. line_str = ["%3.5f" % j for j in line] line_sort_str = ["%3.5f" % j for j in line_sort] # Convert the index from float to index. line_str[0] = "%i" % line[0] line_sort_str[0] = "%i" % line_sort[0] # Merge the two lists and append to data. data_list = line_str + line_sort_str data.append(data_list) # Make the headings. headings = ['i'] + self.params + ['chi2'] headings += headings # Add "_sort" to headings. headings[5] = headings[5] + "_sort" headings[6] = headings[6] + "_sort" headings[7] = headings[7] + "_sort" headings[8] = headings[8] + "_sort" headings[9] = headings[9] + "_sort" # Write the parameters and chi2 values to file. write_data(out=par_file, headings=headings, data=data) # Close the file. par_file.close()
def show_apod_rmsd_to_file(file_name=None, dir=None, path_to_command='showApod', outdir=None, force=False): """Extract showApod 'Noise Std Dev' from showApod, and write to file with same filename and ending '.rmsd' @keyword file: The filename of the NMRPipe fourier transformed file. @type file: str @keyword dir: The directory where the file is located. @type dir: str @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod @type path_to_command: str @keyword outdir: The directory where to write the file. If 'None', then write in same directory. @type outdir: str @param force: Boolean argument which if True causes the file to be overwritten if it already exists. @type force: bool @return: Write the 'Noise Std Dev' from showApod to a file with same file filename, with ending '.rmsd'. This will be a file path. @rtype: str """ # Call extract function. apod_rmsd = show_apod_rmsd(file_name=file_name, dir=dir, path_to_command=path_to_command) # Get the filename striped of extension details. file_name_root = file_root(file_name) # Define extension. extension = ".rmsd" # Define file name for writing. file_name_out = file_name_root + extension # Define folder to write to. if outdir == None: write_outdir = dir else: write_outdir = outdir # Open file for writing, wfile, wfile_path = open_write_file(file_name=file_name_out, dir=write_outdir, force=force, verbosity=1, return_path=True) # Write to file. out_write_data = [['%s'%apod_rmsd]] # Write data write_data(out=wfile, headings=None, data=out_write_data, sep=None) # Close file. wfile.close() # Return path to file. return wfile_path
def set_dist(spin_id1=None, spin_id2=None, ave_dist=None, unit='meter'): """Set up the magnetic dipole-dipole interaction. @keyword spin_id1: The spin identifier string of the first spin of the pair. @type spin_id1: str @keyword spin_id2: The spin identifier string of the second spin of the pair. @type spin_id2: str @keyword ave_dist: The r^-3 averaged interatomic distance. @type ave_dist: float @keyword unit: The measurement unit. This can be either 'meter' or 'Angstrom'. @type unit: str """ # Check the units. if unit not in ['meter', 'Angstrom']: raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit) # Unit conversion. if unit == 'Angstrom': ave_dist = ave_dist * 1e-10 # Generate the selection objects. sel_obj1 = Selection(spin_id1) sel_obj2 = Selection(spin_id2) # Loop over the interatomic containers. data = [] for interatom in interatomic_loop(): # Get the spin info. mol_name1, res_num1, res_name1, spin1 = return_spin(spin_hash=interatom._spin_hash1, full_info=True) mol_name2, res_num2, res_name2, spin2 = return_spin(spin_hash=interatom._spin_hash2, full_info=True) # No match, either way. if not (sel_obj1.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj2.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)) and not (sel_obj2.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj1.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)): continue # Store the averaged distance. interatom.r = ave_dist # Store the data for the printout. data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)]) # No data, so fail! if not len(data): raise RelaxError("No data could be set.") # Print out. print("The following averaged distances have been set:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
def set_dist(spin_id1=None, spin_id2=None, ave_dist=None, unit='meter'): """Set up the magnetic dipole-dipole interaction. @keyword spin_id1: The spin identifier string of the first spin of the pair. @type spin_id1: str @keyword spin_id2: The spin identifier string of the second spin of the pair. @type spin_id2: str @keyword ave_dist: The r^-3 averaged interatomic distance. @type ave_dist: float @keyword unit: The measurement unit. This can be either 'meter' or 'Angstrom'. @type unit: str """ # Check the units. if unit not in ['meter', 'Angstrom']: raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit) # Unit conversion. if unit == 'Angstrom': ave_dist = ave_dist * 1e-10 # Generate the selection objects. sel_obj1 = Selection(spin_id1) sel_obj2 = Selection(spin_id2) # Loop over the interatomic containers. data = [] for interatom in interatomic_loop(): # Get the spin info. mol_name1, res_num1, res_name1, spin1 = return_spin(interatom.spin_id1, full_info=True) mol_name2, res_num2, res_name2, spin2 = return_spin(interatom.spin_id2, full_info=True) # No match, either way. if not (sel_obj1.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj2.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)) and not (sel_obj2.contains_spin(spin_num=spin1.num, spin_name=spin1.name, res_num=res_num1, res_name=res_name1, mol=mol_name1) and sel_obj1.contains_spin(spin_num=spin2.num, spin_name=spin2.name, res_num=res_num2, res_name=res_name2, mol=mol_name2)): continue # Store the averaged distance. interatom.r = ave_dist # Store the data for the printout. data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)]) # No data, so fail! if not len(data): raise RelaxError("No data could be set.") # Print out. print("The following averaged distances have been set:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
def create_par_chi2(self, file_prefix, par_chi2_vals): """Function for creating file with parameters and the chi2 value.""" # Print out. print("\nCreating the file with parameters and the chi2 value.") # Open the file. par_file = open_write_file(file_name=file_prefix+'.par', dir=self.dir, force=True) # Copy the nested list to sort it. par_chi2_vals_sort = deepcopy(par_chi2_vals) # Then sort the value. par_chi2_vals_sort.sort(key=lambda values: values[4]) # Collect the data structure, which is a list of list of strings. data = [] for i, line in enumerate(par_chi2_vals): line_sort = par_chi2_vals_sort[i] # Convert values to strings. line_str = ["%3.5f"%j for j in line] line_sort_str = ["%3.5f"%j for j in line_sort] # Convert the index from float to index. line_str[0] = "%i" % line[0] line_sort_str[0] = "%i" % line_sort[0] # Merge the two lists and append to data. data_list = line_str + line_sort_str data.append(data_list) # Make the headings. headings = ['i'] + self.params + ['chi2'] headings += headings # Add "_sort" to headings. headings[5] = headings[5] + "_sort" headings[6] = headings[6] + "_sort" headings[7] = headings[7] + "_sort" headings[8] = headings[8] + "_sort" headings[9] = headings[9] + "_sort" # Write the parameters and chi2 values to file. write_data(out=par_file, headings=headings, data=data) # Close the file. par_file.close()
def run(input_path, output_path, number_of_partitions, number_of_iterations, number_of_trials): # Read partitioned input data data = read_partitioned_data(input_path, number_of_iterations, number_of_partitions) # Define classification models and their corresponding parameters models = { 'svm': { 'C': (int, (5, 15)), 'decision_function_shape': (tuple, ('ovo', 'ovr', None)) }, 'random_forest': { 'max_features': (int, (5, 15)), 'class_weight': (tuple, ('balanced', 'balanced_subsample')) } } # Initialise the grid search result list results = [] # If the number of trials is set, use grid search. if number_of_trials: # Iterate trough each classification model defined. for algorithm, parameter_model in models.items(): # Perform grid search and append the results to the complete result # list results += grid_search(data, algorithm, parameter_model, number_of_trials) # If the number of trials is not set, use regular classification. else: # Iterate through each algorithm defined. for algorithm in models.keys(): # Perform classification and append the results to the complete # result list results.append(classify(data, classifier_from_algorithm[algorithm])) # Output the grid search results into the specified file write_data(output_path, results)
def write(file=None, dir=None, force=False): """Write the J coupling data to file. @keyword file: The file name or object to write to. @type file: str or file object @keyword dir: The name of the directory to place the file into (defaults to the current directory). @type dir: str @keyword force: A flag which if True will cause any pre-existing file to be overwritten. @type force: bool """ # Check the pipe setup. check_pipe_setup(sequence=True, j=True) # Open the file for writing. file = open_write_file(file, dir, force) # Loop over the interatomic data containers and collect the data. data = [] for interatom in interatomic_loop(): # Skip deselected containers. if not interatom.select: continue # Skip containers with no J coupling. if not hasattr(interatom, 'j_coupling'): continue # Append the spin data. data.append([]) data[-1].append(interatom.spin_id1) data[-1].append(interatom.spin_id2) # The value. data[-1].append(repr(interatom.j_coupling)) # The error. if hasattr(interatom, 'j_coupling_err'): data[-1].append(repr(interatom.j_coupling_err)) else: data[-1].append(repr(None)) # Write out. write_data(out=file, headings=["Spin_ID1", "Spin_ID2", "J coupling", "J coupling"], data=data)
def aic(): """Calculate and store Akaike's Information Criterion (AIC) for each model.""" # Checks. check_pipe() # The specific analysis API object. api = return_api() # Calculate the chi2. print( "Calculating the chi-squared value for the current parameter values.") api.calculate() # Loop over the base models. print("\nStoring the model statistics.") for model_info in api.model_loop(): # Title printout. api.print_model_title(model_info=model_info) # Get the model statistics. k, n, chi2 = api.model_statistics(model_info=model_info) # Calculate the AIC value. aic = chi2 + 2.0 * k # The model container. container = api.get_model_container(model_info=model_info) # Store the statistics. container.chi2 = chi2 container.num_params = k container.aic = aic # Statistics printout. data = [["Chi-squared value:", "%20f" % chi2], ["Number of parameters (k):", "%20i" % k], ["Akaike's Information Criterion (AIC):", "%20f" % aic]] write_data(out=sys.stdout, data=data)
def display(sort=False, rev=False): """Print the details of all the data pipes.""" # Acquire the pipe lock, and make sure it is finally released. status.pipe_lock.acquire(sys._getframe().f_code.co_name) try: # Loop over the data pipes. pipe_names = [] for pipe_name_i in ds: pipe_names.append(pipe_name_i) if sort: pipe_names = sort_filenames(filenames=pipe_names, rev=rev) data = [] for pipe_name in pipe_names: # The current data pipe. current = '' if pipe_name == cdp_name(): current = '*' # Store the data for the print out. data.append([ repr(pipe_name), get_type(pipe_name), repr(get_bundle(pipe_name)), current ]) # Release the lock. finally: status.pipe_lock.release(sys._getframe().f_code.co_name) # Print out. write_data( out=sys.stdout, headings=["Data pipe name", "Data pipe type", "Bundle", "Current"], data=data) # Return data return data
def aic(): """Calculate and store Akaike's Information Criterion (AIC) for each model.""" # Checks. check_pipe() # The specific analysis API object. api = return_api() # Calculate the chi2. print("Calculating the chi-squared value for the current parameter values.") api.calculate() # Loop over the base models. print("\nStoring the model statistics.") for model_info in api.model_loop(): # Title printout. api.print_model_title(model_info=model_info) # Get the model statistics. k, n, chi2 = api.model_statistics(model_info=model_info) # Calculate the AIC value. aic = chi2 + 2.0*k # The model container. container = api.get_model_container(model_info=model_info) # Store the statistics. container.chi2 = chi2 container.num_params = k container.aic = aic # Statistics printout. data = [ ["Chi-squared value:", "%20f" % chi2], ["Number of parameters (k):", "%20i" % k], ["Akaike's Information Criterion (AIC):", "%20f" % aic] ] write_data(out=sys.stdout, data=data)
def run(input_path, output_path, classes): # Read the input data set from the specified input path input_data = read_data(input_path) # Change the list of classes to a set classes = set(classes) # Construct the output data set, filtering to only have the selected classes output_data = { 'subjects': input_data['subjects'], 'areas': input_data['areas'], 'image_category': [], 'neural_responses': [] } for i in range(len(input_data['image_category'])): if input_data['image_category'][i] in classes: for field in ['image_category', 'neural_responses']: output_data[field].append(input_data[field][i]) # Write the output data set to the specified output path write_data(output_path, output_data)
def display(): """Print the details of all the data pipes.""" # Acquire the pipe lock, and make sure it is finally released. status.pipe_lock.acquire(sys._getframe().f_code.co_name) try: # Loop over the data pipes. data = [] for pipe_name in ds: # The current data pipe. current = '' if pipe_name == cdp_name(): current = '*' # Store the data for the print out. data.append([repr(pipe_name), get_type(pipe_name), repr(get_bundle(pipe_name)), current]) # Release the lock. finally: status.pipe_lock.release(sys._getframe().f_code.co_name) # Print out. write_data(out=sys.stdout, headings=["Data pipe name", "Data pipe type", "Bundle", "Current"], data=data)
def model_statistics(): """Calculate and store the model statistics.""" # Checks. check_pipe() # The specific analysis API object. api = return_api() # Calculate the chi2. print("Calculating the chi-squared value for the current parameter values.") api.calculate() # Loop over the base models. print("\nStoring the model statistics.") for model_info in api.model_loop(): # Title printout. api.print_model_title(model_info=model_info) # Get the model statistics. k, n, chi2 = api.model_statistics(model_info=model_info) # The model container. container = api.get_model_container(model_info=model_info) # Store the values. container.chi2 = chi2 container.num_params = k container.num_data_points = n # Statistics printout. data = [ ['Chi-squared value:', "%20f" % chi2], ['Number of parameters (k):', "%20i" % k], ['Number of data points (n):', "%20i" % n] ] write_data(out=sys.stdout, data=data)
def model_statistics(): """Calculate and store the model statistics.""" # Checks. check_pipe() # The specific analysis API object. api = return_api() # Calculate the chi2. print( "Calculating the chi-squared value for the current parameter values.") api.calculate() # Loop over the base models. print("\nStoring the model statistics.") for model_info in api.model_loop(): # Title printout. api.print_model_title(model_info=model_info) # Get the model statistics. k, n, chi2 = api.model_statistics(model_info=model_info) # The model container. container = api.get_model_container(model_info=model_info) # Store the values. container.chi2 = chi2 container.num_params = k container.num_data_points = n # Statistics printout. data = [['Chi-squared value:', "%20f" % chi2], ['Number of parameters (k):', "%20i" % k], ['Number of data points (n):', "%20i" % n]] write_data(out=sys.stdout, data=data)
def run(raw_input_path, output_path_recall, output_path_precision, output_path_f1, time_windows, frequency_bands): # Convert time windows to integers time_windows = [int(time_window) for time_window in time_windows] # Initialise the integrated score data dictionaries integrated_recall = {} integrated_precision = {} integrated_f1 = {} for time_window in time_windows: integrated_recall[time_window] = {} integrated_precision[time_window] = {} integrated_f1[time_window] = {} for time_window in time_windows: for frequency_band in frequency_bands: integrated_recall[time_window][frequency_band] = None integrated_precision[time_window][frequency_band] = None integrated_f1[time_window][frequency_band] = None # Read F1-scores from the input files into the integrated data dictionary # Iterate through each time window and frequency band pair for time_window in time_windows: for frequency_band in frequency_bands: # Construct the input file path input_path = raw_input_path.replace('TIMEWINDOW', str(time_window))\ .replace('FREQUENCYBAND', frequency_band) # Read the input file input_data = read_data(input_path) # Add the F1-score received from the data into the integrated data # dictionary integrated_recall[time_window][ frequency_band] = input_data.average_recall() integrated_precision[time_window][ frequency_band] = input_data.average_precision() integrated_f1[time_window][frequency_band] = input_data.average_f1( ) # Output the integrated scores into the specified files write_data(output_path_recall, integrated_recall) write_data(output_path_precision, integrated_precision) write_data(output_path_f1, integrated_f1)
def run(input_path, output_path, cv_amount, use_even_distribution): # Read the data set data = read_data(input_path) # Find the number of images in the data set number_of_images = len(data['image_category']) # Find all image classes in the data set classes = sorted(set(data['image_category'])) # Initialise the list of partitioned indices partitioned_indices = [[] for i in range(cv_amount)] # If even distribution is set to be used, partition data within each class # separately and merge the resulting partitions into the partitioned # indices list, so the image class distribution in each partition would be # roughly the same. if use_even_distribution: # Construct a list of image indices corresponding to each image class indices = {} for image_class in classes: indices[image_class] = [] for i in range(number_of_images): indices[data['image_category'][i]].append(i) # Randomly split each of these lists into k nearly equal parts, and # merge them by partitions for image_class in classes: # Partition the indices list for the current image class into k # nearly equal parts partitions_list = partition_list(indices[image_class], cv_amount) # Shuffle the partition list to ensure that cumulative partitions # after merging by partitions are roughly of equal size shuffle(partitions_list) # Merge the partitioned indices list for the current image class # into the general partitioned indices list by partitions for i in range(cv_amount): partitioned_indices[i] += partitions_list[i] # If even distribution is not set to be used, partition data randomly. else: # Partition the indices list into k nearly equal parts partitioned_indices = partition_list(range(number_of_images), cv_amount) # Sort all of the partitions for partition in partitioned_indices: partition.sort() # Partition data partitions = [] for i in range(cv_amount): partitions.append({ 'subjects': data['subjects'], 'areas': data['areas'], 'image_category': [data['image_category'][j] for j in partitioned_indices[i]], 'neural_responses': [data['neural_responses'][j] for j in partitioned_indices[i]] }) # Save partitioned data for i in range(cv_amount): write_data(add_suffix_to_path(output_path, '-', i + 1), partitions[i])
#print(k_stat, n_stat, chi2, "point is %s=%3.3f, %s=%3.3f"% (params[0], values[0], params[1], values[1])) # Progress incrementation and printout. percent = percent + percent_inc print( "%-10s%8.3f%-8s%-8g" % ("Progress:", percent, "%, " + repr(values) + ", f(x): ", chi2)) # Append to data. data.append(["%3.3f" % values[0], "%3.3f" % values[1], "%3.3f" % chi2]) # Save all values of chi2. To help find reasonale level for the Innermost, Inner, Middle and Outer Isosurface. all_chi.append(chi2) # Increment the value of the second parameter. values[1] = values[1] + step_size[1] # Increment the value of the first parameter. values[0] = values[0] + step_size[0] print("\nMin cluster point %s=%3.3f, %s=%3.3f, with chi2=%3.3f" % (params[0], pcm[0], params[1], pcm[1], pre_chi2)) # Open file file_name = '1_create_surface_data_S65_dw_r2a_FT128.txt' surface_file = open_write_file(file_name=file_name, dir=None, force=True) write_data(out=surface_file, headings=headings, data=data) # Close file surface_file.close()
def read(file=None, dir=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True): """Read the peak intensity data. @keyword file: The name of the file containing the peak intensities. @type file: str @keyword dir: The directory where the file is located. @type dir: str @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. @type spin_id: None or str @keyword verbose: A flag which if True will cause all chemical shift data loaded to be printed out. @type verbose: bool """ # Test if the current data pipe exists. check_pipe() # Test if sequence data is loaded. if not exists_mol_res_spin_data(): raise RelaxNoSequenceError # Check the file name. if file == None: raise RelaxError("The file name must be supplied.") # Read the peak list data. peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) # Loop over the assignments. data = [] data_flag = False for assign in peak_list: # Loop over the dimensions of the peak list. for i in range(peak_list.dimensionality): # Generate the spin_id. spin_id = generate_spin_id_unique(res_num=assign.res_nums[i], spin_name=assign.spin_names[i]) # Get the spin container. spin = return_spin(spin_id) if not spin: warn(RelaxNoSpinWarning(spin_id)) continue # Skip deselected spins. if not spin.select: continue # Store the shift. spin.chemical_shift = assign.shifts[i] # Switch the flag. data_flag = True # Append the data for printing out. data.append([spin_id, repr(spin.chemical_shift)]) # No data. if not data_flag: raise RelaxError("No chemical shifts could be loaded from the peak list") # Print out. if verbose: print("\nThe following chemical shifts have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID", "Chemical shift"], data=data)
def read_dist(file=None, dir=None, unit='meter', spin_id1_col=None, spin_id2_col=None, data_col=None, sep=None): """Set up the magnetic dipole-dipole interaction. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword unit: The measurement unit. This can be either 'meter' or 'Angstrom'. @type unit: str @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the averaged distances in meters. @type data_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the units. if unit not in ['meter', 'Angstrom']: raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit) # Test if the current data pipe exists. pipes.test() # Test if sequence data exists. if not exists_mol_res_spin_data(): raise RelaxNoSequenceError # Extract the data from the file, and clean it up. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the RDC data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] ave_dist = None if data_col: ave_dist = line[data_col-1] # Convert and check the value. if ave_dist != None: try: ave_dist = float(ave_dist) except ValueError: warn(RelaxWarning("The averaged distance of '%s' from the line %s is invalid." % (ave_dist, line))) continue # Unit conversion. if unit == 'Angstrom': ave_dist = ave_dist * 1e-10 # Get the interatomic data container. interatom = return_interatom(spin_id1, spin_id2) # No container found, so create it. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2, verbose=True) # Store the averaged distance. interatom.r = ave_dist # Store the data for the printout. data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)]) # No data, so fail! if not len(data): raise RelaxError("No data could be extracted from the file.") # Print out. print("The following averaged distances have been read:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
def pack_data(ri_id, ri_type, frq, values, errors, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, spin_id=None, gen_seq=False, verbose=True): """Pack the relaxation data into the data pipe and spin containers. The values, errors, and spin_ids arguments must be lists of equal length or None. Each element i corresponds to a unique spin. @param ri_id: The relaxation data ID string. @type ri_id: str @param ri_type: The relaxation data type, ie 'R1', 'R2', or 'NOE'. @type ri_type: str @param frq: The spectrometer proton frequency in Hz. @type frq: float @keyword values: The relaxation data for each spin. @type values: None or list of float or float array @keyword errors: The relaxation data errors for each spin. @type errors: None or list of float or float array @keyword spin_ids: The list of spin ID strings. If the other spin identifiers are given, i.e. mol_names, res_nums, res_names, spin_nums, and/or spin_names, then this argument is not necessary. @type spin_ids: None or list of str @keyword mol_names: The list of molecule names used for creating the spin IDs (if not given) or for generating the sequence data. @type mol_names: None or list of str @keyword res_nums: The list of residue numbers used for creating the spin IDs (if not given) or for generating the sequence data. @type res_nums: None or list of str @keyword res_names: The list of residue names used for creating the spin IDs (if not given) or for generating the sequence data. @type res_names: None or list of str @keyword spin_nums: The list of spin numbers used for creating the spin IDs (if not given) or for generating the sequence data. @type spin_nums: None or list of str @keyword spin_names: The list of spin names used for creating the spin IDs (if not given) or for generating the sequence data. @type spin_names: None or list of str @keyword gen_seq: A flag which if True will cause the molecule, residue, and spin sequence data to be generated. @type gen_seq: bool @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. @type verbose: bool """ # The number of spins. N = len(values) # Test the data. if errors != None and len(errors) != N: raise RelaxError("The length of the errors arg (%s) does not match that of the value arg (%s)." % (len(errors), N)) if spin_ids and len(spin_ids) != N: raise RelaxError("The length of the spin ID strings arg (%s) does not match that of the value arg (%s)." % (len(mol_names), N)) if mol_names and len(mol_names) != N: raise RelaxError("The length of the molecule names arg (%s) does not match that of the value arg (%s)." % (len(mol_names), N)) if res_nums and len(res_nums) != N: raise RelaxError("The length of the residue numbers arg (%s) does not match that of the value arg (%s)." % (len(res_nums), N)) if res_names and len(res_names) != N: raise RelaxError("The length of the residue names arg (%s) does not match that of the value arg (%s)." % (len(res_names), N)) if spin_nums and len(spin_nums) != N: raise RelaxError("The length of the spin numbers arg (%s) does not match that of the value arg (%s)." % (len(spin_nums), N)) if spin_names and len(spin_names) != N: raise RelaxError("The length of the spin names arg (%s) does not match that of the value arg (%s)." % (len(spin_names), N)) # Generate some empty lists. if not mol_names: mol_names = [None] * N if not res_nums: res_nums = [None] * N if not res_names: res_names = [None] * N if not spin_nums: spin_nums = [None] * N if not spin_names: spin_names = [None] * N if errors == None: errors = [None] * N # Generate the spin IDs. if not spin_ids: spin_ids = [] for i in range(N): spin_ids.append(generate_spin_id_unique(spin_num=spin_nums[i], spin_name=spin_names[i], res_num=res_nums[i], res_name=res_names[i], mol_name=mol_names[i])) # Initialise the global data for the current pipe if necessary. if not hasattr(cdp, 'ri_type'): cdp.ri_type = {} if not hasattr(cdp, 'ri_ids'): cdp.ri_ids = [] # Set the spectrometer frequency. set_frequency(id=ri_id, frq=frq) # Update the global data. cdp.ri_ids.append(ri_id) cdp.ri_type[ri_id] = ri_type # The selection object. select_obj = None if spin_id: select_obj = Selection(spin_id) # Loop over the spin data. data = [] for i in range(N): # Get the corresponding spin container. match_mol_names, match_res_nums, match_res_names, spins = return_spin_from_selection(spin_ids[i], full_info=True, multi=True) if spins in [None, []]: raise RelaxNoSpinError(spin_ids[i]) # Remove non-matching spins. if select_obj: new_spins = [] new_mol_names = [] new_res_nums = [] new_res_names = [] new_ids = [] for j in range(len(spins)): if select_obj.contains_spin(spin_num=spins[j].num, spin_name=spins[j].name, res_num=match_res_nums[j], res_name=match_res_names[j], mol=match_mol_names[j]): new_spins.append(spins[j]) new_mol_names.append(match_mol_names[j]) new_res_nums.append(match_res_nums[j]) new_res_names.append(match_res_names[j]) new_ids.append(generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_num=spins[j].num, spin_name=spins[j].name)) new_id = new_ids[0] # Aliases for normal operation. else: new_spins = spins new_mol_names = match_mol_names new_res_nums = match_res_nums new_res_names = match_res_names new_id = spin_ids[i] new_ids = None # Check that only a singe spin is present. if len(new_spins) > 1: if new_ids: raise RelaxMultiSpinIDError(spin_ids[i], new_ids) else: raise RelaxMultiSpinIDError(spin_ids[i], new_ids) if len(new_spins) == 0: raise RelaxNoSpinError(spin_ids[i]) # Loop over the spins. for j in range(len(new_spins)): # No match to the selection. if select_obj and not select_obj.contains_spin(spin_num=new_spins[j].num, spin_name=new_spins[j].name, res_num=new_res_nums[j], res_name=new_res_names[j], mol=new_mol_names[j]): continue # Initialise the spin data if necessary. if not hasattr(new_spins[j], 'ri_data') or new_spins[j].ri_data == None: new_spins[j].ri_data = {} if not hasattr(new_spins[j], 'ri_data_err') or new_spins[j].ri_data_err == None: new_spins[j].ri_data_err = {} # Update all data structures. new_spins[j].ri_data[ri_id] = values[i] new_spins[j].ri_data_err[ri_id] = errors[i] # Append the data for printing out. data.append([new_id, repr(values[i]), repr(errors[i])]) # Print out. if verbose: print("\nThe following %s MHz %s relaxation data with the ID '%s' has been loaded into the relax data store:\n" % (frq/1e6, ri_type, ri_id)) write_data(out=sys.stdout, headings=["Spin_ID", "Value", "Error"], data=data)
def select(method=None, modsel_pipe=None, bundle=None, pipes=None): """Model selection function. @keyword method: The model selection method. This can currently be one of: - 'AIC', Akaike's Information Criteria. - 'AICc', Small sample size corrected AIC. - 'BIC', Bayesian or Schwarz Information Criteria. - 'CV', Single-item-out cross-validation. None of the other model selection techniques are currently supported. @type method: str @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. @type modsel_pipe: str @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. @type bundle: str or None @keyword pipes: A list of the data pipes to use in the model selection. @type pipes: list of str """ # Test if the pipe already exists. if has_pipe(modsel_pipe): raise RelaxPipeError(modsel_pipe) # Use all pipes. if pipes == None: # Get all data pipe names from the relax data store. pipes = pipe_names() # Select the model selection technique. if method == 'AIC': print("AIC model selection.") formula = aic elif method == 'AICc': print("AICc model selection.") formula = aicc elif method == 'BIC': print("BIC model selection.") formula = bic elif method == 'CV': print("CV model selection.") raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") else: raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") # No pipes. if len(pipes) == 0: raise RelaxError("No data pipes are available for use in model selection.") # Initialise. function_type = {} model_loop = {} model_type = {} duplicate_data = {} model_statistics = {} skip_function = {} modsel_pipe_exists = False # Cross validation setup. if isinstance(pipes[0], list): # No pipes. if len(pipes[0]) == 0: raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") # Loop over the data pipes. for i in range(len(pipes)): for j in range(len(pipes[i])): # Specific functions. model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) # The model loop should be the same for all data pipes! for i in range(len(pipes)): for j in range(len(pipes[i])): if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: raise RelaxError("The models for each data pipes should be the same.") model_loop = model_loop[pipes[0][0]] # The model description. model_desc = get_specific_fn('model_desc', get_type(pipes[0])) # Global vs. local models. global_flag = False for i in range(len(pipes)): for j in range(len(pipes[i])): if model_type[pipes[i][j]]() == 'global': global_flag = True # All other model selection setup. else: # Loop over the data pipes. for i in range(len(pipes)): # Specific functions. model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) model_loop = model_loop[pipes[0]] # The model description. model_desc = get_specific_fn('model_desc', get_type(pipes[0])) # Global vs. local models. global_flag = False for j in range(len(pipes)): if model_type[pipes[j]]() == 'global': global_flag = True # Loop over the base models. for model_info in model_loop(): # Print out. print("\n") desc = model_desc(model_info) if desc: print(desc) # Initial model. best_model = None best_crit = 1e300 data = [] # Loop over the pipes. for j in range(len(pipes)): # Single-item-out cross validation. if method == 'CV': # Sum of chi-squared values. sum_crit = 0.0 # Loop over the validation samples and sum the chi-squared values. for k in range(len(pipes[j])): # Alias the data pipe name. pipe = pipes[j][k] # Switch to this pipe. switch(pipe) # Skip function. if skip_function[pipe](model_info): continue # Get the model statistics. k, n, chi2 = model_statistics[pipe](model_info) # Missing data sets. if k == None or n == None or chi2 == None: continue # Chi2 sum. sum_crit = sum_crit + chi2 # Cross-validation criterion (average chi-squared value). crit = sum_crit / float(len(pipes[j])) # Other model selection methods. else: # Reassign the pipe. pipe = pipes[j] # Switch to this pipe. switch(pipe) # Skip function. if skip_function[pipe](model_info): continue # Get the model statistics. k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) # Missing data sets. if k == None or n == None or chi2 == None: continue # Calculate the criterion value. crit = formula(chi2, float(k), float(n)) # Store the values for a later printout. data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit]) # Select model. if crit < best_crit: best_model = pipe best_crit = crit # Write out the table. write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data) # Duplicate the data from the 'best_model' to the model selection data pipe. if best_model != None: # Print out of selected model. print("The model from the data pipe " + repr(best_model) + " has been selected.") # Switch to the selected data pipe. switch(best_model) # Duplicate. duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) # Model selection pipe now exists. modsel_pipe_exists = True # No model selected. else: # Print out of selected model. print("No model has been selected.") # Switch to the model selection pipe. if modsel_pipe_exists: switch(modsel_pipe) # Bundle the data pipe. if bundle: pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
def signal_noise_ratio(verbose=True): """Calculate the signal to noise ratio per spin. @keyword verbose: A flag which if True will print additional information out. @type verbose: bool """ # Tests. check_pipe() check_mol_res_spin_data() # Test if spectra have been loaded. if not hasattr(cdp, 'spectrum_ids'): raise RelaxError("No spectra have been loaded.") # Possible print. if verbose: print("\nThe following signal to noise ratios has been calculated:\n") # Set the spin specific signal to noise ratio. for spin, spin_id in spin_loop(return_id=True): # Skip deselected spins. if not spin.select: continue # Skip spins missing intensity data. if not hasattr(spin, 'peak_intensity'): continue # Test if error analysis has been performed. if not hasattr(spin, 'peak_intensity_err'): raise RelaxError("Intensity error analysis has not been performed. Please see spectrum.error_analysis().") # If necessary, create the dictionary. if not hasattr(spin, 'sn_ratio'): spin.sn_ratio = {} # Loop over the ID. ids = [] for id in spin.peak_intensity: # Append the ID to the list. ids.append(id) # Calculate the sn_ratio. pint = float(spin.peak_intensity[id]) pint_err = float(spin.peak_intensity_err[id]) sn_ratio = pint / pint_err # Assign the sn_ratio. spin.sn_ratio[id] = sn_ratio # Sort the ids alphanumeric. ids = sort_filenames(filenames=ids, rev=False) # Collect the data under sorted ids. data_i = [] for id in ids: # Get the values. pint = spin.peak_intensity[id] pint_err = spin.peak_intensity_err[id] sn_ratio = spin.sn_ratio[id] # Store the data. data_i.append([id, repr(pint), repr(pint_err), repr(sn_ratio)]) if verbose: section(file=sys.stdout, text="Signal to noise ratio for spin ID '%s'"%spin_id, prespace=1) write_data(out=sys.stdout, headings=["Spectrum ID", "Signal", "Noise", "S/N"], data=data_i)
def pack_data(ri_id, ri_type, frq, values, errors, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, spin_id=None, gen_seq=False, verbose=True): """Pack the relaxation data into the data pipe and spin containers. The values, errors, and spin_ids arguments must be lists of equal length or None. Each element i corresponds to a unique spin. @param ri_id: The relaxation data ID string. @type ri_id: str @param ri_type: The relaxation data type, ie 'R1', 'R2', or 'NOE'. @type ri_type: str @param frq: The spectrometer proton frequency in Hz. @type frq: float @keyword values: The relaxation data for each spin. @type values: None or list of float or float array @keyword errors: The relaxation data errors for each spin. @type errors: None or list of float or float array @keyword spin_ids: The list of spin ID strings. If the other spin identifiers are given, i.e. mol_names, res_nums, res_names, spin_nums, and/or spin_names, then this argument is not necessary. @type spin_ids: None or list of str @keyword mol_names: The list of molecule names used for creating the spin IDs (if not given) or for generating the sequence data. @type mol_names: None or list of str @keyword res_nums: The list of residue numbers used for creating the spin IDs (if not given) or for generating the sequence data. @type res_nums: None or list of str @keyword res_names: The list of residue names used for creating the spin IDs (if not given) or for generating the sequence data. @type res_names: None or list of str @keyword spin_nums: The list of spin numbers used for creating the spin IDs (if not given) or for generating the sequence data. @type spin_nums: None or list of str @keyword spin_names: The list of spin names used for creating the spin IDs (if not given) or for generating the sequence data. @type spin_names: None or list of str @keyword gen_seq: A flag which if True will cause the molecule, residue, and spin sequence data to be generated. @type gen_seq: bool @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. @type verbose: bool """ # The number of spins. N = len(values) # Test the data. if errors != None and len(errors) != N: raise RelaxError( "The length of the errors arg (%s) does not match that of the value arg (%s)." % (len(errors), N)) if spin_ids and len(spin_ids) != N: raise RelaxError( "The length of the spin ID strings arg (%s) does not match that of the value arg (%s)." % (len(mol_names), N)) if mol_names and len(mol_names) != N: raise RelaxError( "The length of the molecule names arg (%s) does not match that of the value arg (%s)." % (len(mol_names), N)) if res_nums and len(res_nums) != N: raise RelaxError( "The length of the residue numbers arg (%s) does not match that of the value arg (%s)." % (len(res_nums), N)) if res_names and len(res_names) != N: raise RelaxError( "The length of the residue names arg (%s) does not match that of the value arg (%s)." % (len(res_names), N)) if spin_nums and len(spin_nums) != N: raise RelaxError( "The length of the spin numbers arg (%s) does not match that of the value arg (%s)." % (len(spin_nums), N)) if spin_names and len(spin_names) != N: raise RelaxError( "The length of the spin names arg (%s) does not match that of the value arg (%s)." % (len(spin_names), N)) # Generate some empty lists. if not mol_names: mol_names = [None] * N if not res_nums: res_nums = [None] * N if not res_names: res_names = [None] * N if not spin_nums: spin_nums = [None] * N if not spin_names: spin_names = [None] * N if errors == None: errors = [None] * N # Generate the spin IDs. if not spin_ids: spin_ids = [] for i in range(N): spin_ids.append( generate_spin_id_unique(spin_num=spin_nums[i], spin_name=spin_names[i], res_num=res_nums[i], res_name=res_names[i], mol_name=mol_names[i])) # Initialise the global data for the current pipe if necessary. if not hasattr(cdp, 'ri_type'): cdp.ri_type = {} if not hasattr(cdp, 'ri_ids'): cdp.ri_ids = [] # Set the spectrometer frequency. set_frequency(id=ri_id, frq=frq) # Update the global data. cdp.ri_ids.append(ri_id) cdp.ri_type[ri_id] = ri_type # The selection object. select_obj = None if spin_id: select_obj = Selection(spin_id) # Loop over the spin data. data = [] for i in range(N): # A selection union. select_id = spin_ids[i] if spin_id != None: select_id = "%s&%s" % (select_id, spin_id) # Get the corresponding spin container. match_mol_names, match_res_nums, match_res_names, spins = return_spin_from_selection( selection=select_id, full_info=True, multi=True) # No spin. if len(spins) == 0: continue # Check that multiple spins are not present. if len(spins) > 1: # Generate the list of spin IDs. new_ids = [] for j in range(len(spins)): new_ids.append( generate_spin_id_unique(mol_name=match_mol_names[j], res_num=match_res_nums[j], res_name=match_res_names[j], spin_num=spins[j].num, spin_name=spins[j].name)) # Raise the error. raise RelaxMultiSpinIDError(spin_ids[i], new_ids) # Check that at least one spin is present. if len(spins) == 0: raise RelaxNoSpinError(spin_ids[i]) # Loop over the spins. for j in range(len(spins)): # No match to the selection. if select_obj and not select_obj.contains_spin( spin_num=spins[j].num, spin_name=spins[j].name, res_num=res_nums[j], res_name=res_names[j], mol=mol_names[j]): continue # Initialise the spin data if necessary. if not hasattr(spins[j], 'ri_data') or spins[j].ri_data == None: spins[j].ri_data = {} if not hasattr(spins[j], 'ri_data_err') or spins[j].ri_data_err == None: spins[j].ri_data_err = {} # Update all data structures. spins[j].ri_data[ri_id] = values[i] spins[j].ri_data_err[ri_id] = errors[i] # Append the data for printing out. data.append([spin_ids[i], repr(values[i]), repr(errors[i])]) # Print out. if verbose: print( "\nThe following %s MHz %s relaxation data with the ID '%s' has been loaded into the relax data store:\n" % (frq / 1e6, ri_type, ri_id)) write_data(out=sys.stdout, headings=["Spin_ID", "Value", "Error"], data=data)
def define(spin_id1=None, spin_id2=None, pipe=None, direct_bond=False, spin_selection=False, verbose=True): """Set up the magnetic dipole-dipole interaction. @keyword spin_id1: The spin identifier string of the first spin of the pair. @type spin_id1: str @keyword spin_id2: The spin identifier string of the second spin of the pair. @type spin_id2: str @param pipe: The data pipe to operate on. Defaults to the current data pipe. @type pipe: str @keyword direct_bond: A flag specifying if the two spins are directly bonded. @type direct_bond: bool @keyword spin_selection: Define the interatomic data container selection based on the spin selection. If either spin is deselected, the interatomic container will also be deselected. Otherwise the container will be selected. @type spin_selection: bool @keyword verbose: A flag which if True will result in printouts of the created interatomoic data containers. @type verbose: bool """ # The data pipe. if pipe == None: pipe = pipes.cdp_name() # Get the data pipe. dp = pipes.get_pipe(pipe) # Initialise the spin ID pairs list. ids = [] spin_selections = [] # Use the structural data to find connected atoms. if hasattr(dp, 'structure'): # The selection objects. selection1 = cdp.structure.selection(atom_id=spin_id1) selection2 = cdp.structure.selection(atom_id=spin_id2) # Loop over the atoms of the first spin selection. for mol_name1, res_num1, res_name1, atom_num1, atom_name1, mol_index1, atom_index1 in dp.structure.atom_loop(selection=selection1, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True): # Generate the first spin ID. id1 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name1, res_num=res_num1, res_name=res_name1, spin_num=atom_num1, spin_name=atom_name1) # Do the spin exist? spin1 = return_spin(id1) if not spin1: continue # Loop over the atoms of the second spin selection. for mol_name2, res_num2, res_name2, atom_num2, atom_name2, mol_index2, atom_index2 in dp.structure.atom_loop(selection=selection2, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True): # Directly bonded atoms. if direct_bond: # Different molecules. if mol_name1 != mol_name2: continue # Skip non-bonded atom pairs. if not dp.structure.are_bonded_index(mol_index1=mol_index1, atom_index1=atom_index1, mol_index2=mol_index2, atom_index2=atom_index2): continue # Generate the second spin ID. id2 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name2, res_num=res_num2, res_name=res_name2, spin_num=atom_num2, spin_name=atom_name2) # Do the spin exist? spin2 = return_spin(id2) if not spin2: continue # Store the IDs for the printout. ids.append([id1, id2]) spin_selections.append([spin1.select, spin2.select]) # No structural data present or the spin IDs are not in the structural data, so use spin loops and some basic rules. if ids == []: for spin1, mol_name1, res_num1, res_name1, id1 in spin_loop(spin_id1, pipe=pipe, full_info=True, return_id=True): for spin2, mol_name2, res_num2, res_name2, id2 in spin_loop(spin_id2, pipe=pipe, full_info=True, return_id=True): # Directly bonded atoms. if direct_bond: # Different molecules. if mol_name1 != mol_name2: continue # No element info. if not hasattr(spin1, 'element'): raise RelaxError("The spin '%s' does not have the element type set." % id1) if not hasattr(spin2, 'element'): raise RelaxError("The spin '%s' does not have the element type set." % id2) # Backbone NH and CH pairs. pair = False if (spin1.element == 'N' and spin2.element == 'H') or (spin2.element == 'N' and spin1.element == 'H'): pair = True elif (spin1.element == 'C' and spin2.element == 'H') or (spin2.element == 'C' and spin1.element == 'H'): pair = True # Same residue, so skip. if pair and res_num1 != None and res_num1 != res_num2: continue elif pair and res_num1 == None and res_name1 != res_name2: continue # Store the IDs for the printout. ids.append([id1, id2]) spin_selections.append([spin1.select, spin2.select]) # No matches, so fail! if not len(ids): # Find the problem. count1 = 0 count2 = 0 for spin in spin_loop(spin_id1): count1 += 1 for spin in spin_loop(spin_id2): count2 += 1 # Report the problem. if count1 == 0 and count2 == 0: raise RelaxError("Neither spin IDs '%s' and '%s' match any spins." % (spin_id1, spin_id2)) elif count1 == 0: raise RelaxError("The spin ID '%s' matches no spins." % spin_id1) elif count2 == 0: raise RelaxError("The spin ID '%s' matches no spins." % spin_id2) else: raise RelaxError("Unknown error.") # Define the interaction. for i in range(len(ids)): # Unpack. id1, id2 = ids[i] # Get the interatomic data object, if it exists. interatom = return_interatom(id1, id2, pipe=pipe) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=id1, spin_id2=id2, pipe=pipe) # Check that this has not already been set up. if interatom.dipole_pair: raise RelaxError("The magnetic dipole-dipole interaction already exists between the spins '%s' and '%s'." % (id1, id2)) # Set a flag indicating that a dipole-dipole interaction is present. interatom.dipole_pair = True # Set the selection. if spin_selection: interatom.select = False if spin_selections[i][0] and spin_selections[i][1]: interatom.select = True # Printout. if verbose: # Conversion. for i in range(len(ids)): ids[i][0] = repr(ids[i][0]) ids[i][1] = repr(ids[i][1]) # The printout. print("Interatomic interactions are now defined for the following spins:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True): """Read the peak intensity data. @keyword file: The name of the file(s) containing the peak intensities. @type file: str or list of str @keyword dir: The directory where the file is located. @type dir: str @keyword spectrum_id: The spectrum identification string. @type spectrum_id: str or list of str @keyword dim: The dimension of the peak list to associate the data with. @type dim: int @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format). @type int_col: int or list of int @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'. @type int_method: str @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. @type spin_id: None or str @keyword ncproc: The Bruker ncproc binary intensity scaling factor. @type ncproc: int or None @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. @type verbose: bool """ # Data checks. check_pipe() check_mol_res_spin_data() # Check the file name. if file == None: raise RelaxError("The file name must be supplied.") # Test that the intensity measures are identical. if hasattr(cdp, 'int_method') and cdp.int_method != int_method: raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method)) # Multiple ID flags. flag_multi = False flag_multi_file = False flag_multi_col = False if isinstance(spectrum_id, list) or spectrum_id == 'auto': flag_multi = True if isinstance(file, list): flag_multi_file = True if isinstance(int_col, list) or spectrum_id == 'auto': flag_multi_col = True # List argument checks. if flag_multi: # Too many lists. if flag_multi_file and flag_multi_col: raise RelaxError("If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists.") # Not enough lists. if not flag_multi_file and not flag_multi_col: raise RelaxError("If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length.") # List lengths for multiple files. if flag_multi_file and len(spectrum_id) != len(file): raise RelaxError("The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id)) # List lengths for multiple intensity columns. if flag_multi_col and spectrum_id != 'auto' and len(spectrum_id) != len(int_col): raise RelaxError("The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col)) # More list argument checks (when only one spectrum ID is supplied). else: # Multiple files. if flag_multi_file: raise RelaxError("If multiple files are supplied, then multiple spectrum IDs must also be supplied.") # Multiple intensity columns. if flag_multi_col: raise RelaxError("If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied.") # Intensity column checks. if spectrum_id != 'auto' and not flag_multi and flag_multi_col: raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.") # Check the intensity measure. if not int_method in ['height', 'point sum', 'other']: raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method) # Set the peak intensity measure. cdp.int_method = int_method # Convert the file argument to a list if necessary. if not isinstance(file, list): file = [file] # Loop over all files. for file_index in range(len(file)): # Read the peak list data. peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) # Automatic spectrum IDs. if spectrum_id == 'auto': spectrum_id = peak_list[0].intensity_name # Loop over the assignments. data = [] data_flag = False for assign in peak_list: # Generate the spin_id. spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1]) # Convert the intensity data to a list if needed. intensity = assign.intensity if not isinstance(intensity, list): intensity = [intensity] # Loop over the intensity data. for int_index in range(len(intensity)): # Sanity check. if intensity[int_index] == 0.0: warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id)) # Get the spin container. spin = return_spin(spin_id) if not spin: warn(RelaxNoSpinWarning(spin_id)) continue # Skip deselected spins. if not spin.select: continue # Initialise. if not hasattr(spin, 'peak_intensity'): spin.peak_intensity = {} # Intensity scaling. if ncproc != None: intensity[int_index] = intensity[int_index] / float(2**ncproc) # Add the data. if flag_multi_file: id = spectrum_id[file_index] elif flag_multi_col: id = spectrum_id[int_index] else: id = spectrum_id spin.peak_intensity[id] = intensity[int_index] # Switch the flag. data_flag = True # Append the data for printing out. data.append([spin_id, repr(intensity[int_index])]) # Add the spectrum id (and ncproc) to the relax data store. spectrum_ids = spectrum_id if isinstance(spectrum_id, str): spectrum_ids = [spectrum_id] if ncproc != None and not hasattr(cdp, 'ncproc'): cdp.ncproc = {} for i in range(len(spectrum_ids)): add_spectrum_id(spectrum_ids[i]) if ncproc != None: cdp.ncproc[spectrum_ids[i]] = ncproc # No data. if not data_flag: # Delete all the data. delete(spectrum_id) # Raise the error. raise RelaxError("No data could be loaded from the peak list") # Printout. if verbose: print("\nThe following intensities have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data) print('')
def read(file=None, dir=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True): """Read the peak intensity data. @keyword file: The name of the file containing the peak intensities. @type file: str @keyword dir: The directory where the file is located. @type dir: str @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. @type spin_id: None or str @keyword verbose: A flag which if True will cause all chemical shift data loaded to be printed out. @type verbose: bool """ # Test if the current data pipe exists. check_pipe() # Test if sequence data is loaded. if not exists_mol_res_spin_data(): raise RelaxNoSequenceError # Check the file name. if file == None: raise RelaxError("The file name must be supplied.") # Read the peak list data. peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) # Loop over the assignments. data = [] data_flag = False for assign in peak_list: # Loop over the dimensions of the peak list. for i in range(peak_list.dimensionality): # Generate the spin_id. spin_id = generate_spin_id_unique(res_num=assign.res_nums[i], spin_name=assign.spin_names[i]) # Get the spin container. spin = return_spin(spin_id=spin_id) if not spin: warn(RelaxNoSpinWarning(spin_id)) continue # Skip deselected spins. if not spin.select: continue # Store the shift. spin.chemical_shift = assign.shifts[i] # Switch the flag. data_flag = True # Append the data for printing out. data.append([spin_id, repr(spin.chemical_shift)]) # No data. if not data_flag: raise RelaxError("No chemical shifts could be loaded from the peak list") # Print out. if verbose: print("\nThe following chemical shifts have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID", "Chemical shift"], data=data)
def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None, float_format="%20.15g"): """Generator function for reading the spin specific data from file. Description =========== This function writes a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number. @param file: The name of the file to write the data to (or alternatively an already opened file object). @type file: str or file object @keyword dir: The directory to place the file into (defaults to the current directory if None and the file argument is not a file object). @type dir: str or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_ids: The list of spin ID strings. @type spin_ids: None or list of str @keyword mol_names: The list of molecule names. @type mol_names: None or list of str @keyword res_nums: The list of residue numbers. @type res_nums: None or list of int @keyword res_names: The list of residue names. @type res_names: None or list of str @keyword spin_nums: The list of spin numbers. @type spin_nums: None or list of int @keyword spin_names: The list of spin names. @type spin_names: None or list of str @keyword force: A flag which if True will cause an existing file to be overwritten. @type force: bool @keyword data: A list of the data to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. @type data: list or list of lists @keyword data_name: A name corresponding to the data argument. If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg. @type data_name: str or list of str @keyword error: A list of the errors to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. These will be inter-dispersed between the data columns, if the data is given. If the data arg is not None, then this must have the same dimensions as that object. @type error: list or list of lists @keyword error_name: A name corresponding to the error argument. If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg. @type error_name: str or list of str @keyword float_format: A float formatting string to use for the data and error whenever a float is found. @type float_format: str """ # Data argument tests. if data: # Data is a list of lists. if isinstance(data[0], list): # Data and data_name don't match. if not isinstance(data_name, list): raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name) # Error doesn't match. if error and (len(data) != len(error) or len(data[0]) != len(error[0])): raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error)) # Data is a simple list. else: # Data and data_name don't match. if not isinstance(data_name, str): raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name) # Error doesn't match. if error and len(data) != len(error): raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error)) # Error argument tests. if error: # Error is a list of lists. if isinstance(error[0], list): # Error and error_name don't match. if not isinstance(error_name, list): raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name) # Error is a simple list. else: # Error and error_name don't match. if not isinstance(error_name, str): raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name) # Number of spins check. args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names] arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names'] N = None first_arg = None first_arg_name = None for i in range(len(args)): if isinstance(args[i], list): # First list match. if N == None: N = len(args[i]) first_arg = args[i] first_arg_name = arg_names[i] # Length check. if len(args[i]) != N: raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i]))) # Nothing?!? if N == None: raise RelaxError("No spin ID data is present.") # Data and error length check. if data and len(data) != N: raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data))) if error and len(error) != N: raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error))) # The spin arguments. args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names] arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name'] # Init. headings = [] file_data = [] # Headers - the spin ID info. for i in range(len(args)): if args[i]: headings.append(arg_names[i]) # Headers - the data. if data: # List of lists. if isinstance(data[0], list): # Loop over the list. for i in range(len(data[0])): # The data. headings.append(data_name[i]) # The error. if error: headings.append(error_name[i]) # Simple list. else: # The data. headings.append(data_name) # The error. if error: headings.append(error_name) # Headers - only errors. elif error: # List of lists. if isinstance(error[0], list): for i in range(len(error[0])): headings.append(error_name[i]) # Simple list. else: headings.append(error_name) # No headings. if headings == []: headings = None # Spin specific data. for spin_index in range(N): # Append a new data row. file_data.append([]) # The spin ID info. for i in range(len(args)): if args[i]: value = args[i][spin_index] if not isinstance(value, str): value = repr(value) file_data[-1].append(value) # The data. if data: # List of lists. if isinstance(data[0], list): # Loop over the list. for i in range(len(data[0])): # The data. if is_float(data[spin_index][i]): file_data[-1].append(float_format % data[spin_index][i]) else: file_data[-1].append(repr(data[spin_index][i])) # The error. if error: if is_float(error[spin_index][i]): file_data[-1].append(float_format % error[spin_index][i]) else: file_data[-1].append(repr(error[spin_index][i])) # Simple list. else: # The data. if is_float(data[spin_index]): file_data[-1].append(float_format % data[spin_index]) else: file_data[-1].append(repr(data[spin_index])) # The error. if error: if is_float(error[spin_index]): file_data[-1].append(float_format % error[spin_index]) else: file_data[-1].append(repr(error[spin_index])) # Only errors. elif error: # List of lists. if isinstance(error[0], list): for i in range(len(error[0])): file_data[-1].append(repr(error[spin_index][i])) # Simple list. else: file_data[-1].append(repr(error[spin_index])) # No data to write, so do nothing! if file_data == [] or file_data == [[]]: return # Open the file for writing. file = open_write_file(file_name=file, dir=dir, force=force) # Write out the file data. write_data(out=file, headings=headings, data=file_data, sep=sep)
def copy(pipe_from=None, pipe_to=None, align_id=None, back_calc=True): """Copy the PCS data from one data pipe to another. @keyword pipe_from: The data pipe to copy the PCS data from. This defaults to the current data pipe. @type pipe_from: str @keyword pipe_to: The data pipe to copy the PCS data to. This defaults to the current data pipe. @type pipe_to: str @keyword align_id: The alignment ID string. @type align_id: str @keyword back_calc: A flag which if True will cause any back-calculated RDCs present to also be copied with the real values and errors. @type back_calc: bool """ # Defaults. if pipe_from == None and pipe_to == None: raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") elif pipe_from == None: pipe_from = pipes.cdp_name() elif pipe_to == None: pipe_to = pipes.cdp_name() # Check the pipe setup. check_pipe_setup(pipe=pipe_from, pcs_id=align_id, sequence=True, pcs=True) check_pipe_setup(pipe=pipe_to, sequence=True) # Get the data pipes. dp_from = pipes.get_pipe(pipe_from) dp_to = pipes.get_pipe(pipe_to) # The IDs. if align_id == None: align_ids = dp_from.align_ids else: align_ids = [align_id] # Init target pipe global structures. if not hasattr(dp_to, 'align_ids'): dp_to.align_ids = [] if not hasattr(dp_to, 'pcs_ids'): dp_to.pcs_ids = [] # Loop over the align IDs. for align_id in align_ids: # Printout. print("\nCoping PCSs for the alignment ID '%s'." % align_id) # Copy the global data. if align_id not in dp_to.align_ids and align_id not in dp_to.align_ids: dp_to.align_ids.append(align_id) if align_id in dp_from.pcs_ids and align_id not in dp_to.pcs_ids: dp_to.pcs_ids.append(align_id) # Spin loop. data = [] for spin_from, spin_id in spin_loop(return_id=True, skip_desel=True, pipe=pipe_from): # Find the matching spin container in the target data pipe. spin_to = return_spin(spin_id, pipe=pipe_to) # No matching spin container. if spin_to == None: warn(RelaxWarning("The spin container for the spin '%s' cannot be found in the target data pipe." % spin_id)) continue # No data or errors. if (not hasattr(spin_from, 'pcs') or not align_id in spin_from.pcs) and (not hasattr(spin_from, 'pcs_err') or not align_id in spin_from.pcs_err): continue # Initialise the spin data if necessary. if hasattr(spin_from, 'pcs') and not hasattr(spin_to, 'pcs'): spin_to.pcs = {} if back_calc and hasattr(spin_from, 'pcs_bc') and not hasattr(spin_to, 'pcs_bc'): spin_to.pcs_bc = {} if hasattr(spin_from, 'pcs_err') and not hasattr(spin_to, 'pcs_err'): spin_to.pcs_err = {} # Copy the value and error from pipe_from. value = None error = None value_bc = None if hasattr(spin_from, 'pcs'): value = spin_from.pcs[align_id] spin_to.pcs[align_id] = value if back_calc and hasattr(spin_from, 'pcs_bc'): value_bc = spin_from.pcs_bc[align_id] spin_to.pcs_bc[align_id] = value_bc if hasattr(spin_from, 'pcs_err'): error = spin_from.pcs_err[align_id] spin_to.pcs_err[align_id] = error # Append the data for printout. data.append([spin_id]) if is_float(value): data[-1].append("%20.15f" % value) else: data[-1].append("%20s" % value) if back_calc: if is_float(value_bc): data[-1].append("%20.15f" % value_bc) else: data[-1].append("%20s" % value_bc) if is_float(error): data[-1].append("%20.15f" % error) else: data[-1].append("%20s" % error) # Printout. print("The following PCSs have been copied:\n") if back_calc: write_data(out=sys.stdout, headings=["Spin_ID", "Value", "Back-calculated", "Error"], data=data) else: write_data(out=sys.stdout, headings=["Spin_ID", "Value", "Error"], data=data)
# Save all values of chi2. To help find reasonale level for the Innermost, Inner, Middle and Outer Isosurface. all_chi.append(chi2) # Increment the value of the second parameter. values[1] = values[1] + step_size[1] counter += 1 # Increment the value of the first parameter. values[0] = values[0] + step_size[0] print("\nMin cluster point %s=%3.3f, %s=%3.3f, with chi2=%3.3f" % (params[0], pcm[0], params[1], pcm[1], pre_chi2)) # Open file file_name = '3_simulate_graphs_S65_dw_r2a_FT128.txt' surface_file = open_write_file(file_name=file_name, dir=None, force=True) write_data(out=surface_file, headings=headings, data=data) # Close file surface_file.close() # Check spins. display_spin() # Now de-select spins from cluster. for spin_id in cur_spin_ids: deselect.spin(spin_id=spin_id) relax_disp.plot_disp_curves(dir='grace', y_axis='r2_eff', x_axis='disp', num_points=1000, extend_hz=500.0, extend_ppm=500.0, interpolate='disp', force=True)
def read(align_id=None, file=None, dir=None, file_data=None, data_type='D', spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sep=None, neg_g_corr=False, absolute=False): """Read the RDC data from file. @keyword align_id: The alignment tensor ID string. @type align_id: str @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword data_type: A string which is set to 'D' means that the splitting in the aligned sample was assumed to be J + D, or if set to '2D' then the splitting was taken as J + 2D. If set to 'T', then the data will be marked as being J+D values. @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the RDC data in Hz. @type data_col: int or None @keyword error_col: The column containing the RDC errors. @type error_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword neg_g_corr: A flag which is used to correct for the negative gyromagnetic ratio of 15N. If True, a sign inversion will be applied to all RDC values to be loaded. @type neg_g_corr: bool @keyword absolute: A flag which if True indicates that the RDCs to load are signless. All RDCs will then be converted to positive values. @type absolute: bool """ # Check the pipe setup. check_pipe_setup(sequence=True) # Either the data or error column must be supplied. if data_col == None and error_col == None: raise RelaxError("One of either the data or error column must be supplied.") # Check the data types. rdc_types = ['D', '2D', 'T'] if data_type not in rdc_types: raise RelaxError("The RDC data type '%s' must be one of %s." % (data_type, rdc_types)) # Spin specific data. ##################### # Extract the data from the file, and remove comments and blank lines. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the RDC data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue if error_col and error_col > len(line): warn(RelaxWarning("The data %s is invalid, no error column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] value = None if data_col: value = line[data_col-1] error = None if error_col: error = line[error_col-1] # Convert the spin IDs. if spin_id1[0] in ["\"", "\'"]: spin_id1 = eval(spin_id1) if spin_id2[0] in ["\"", "\'"]: spin_id2 = eval(spin_id2) # Convert and check the value. if value == 'None': value = None if value != None: try: value = float(value) except ValueError: warn(RelaxWarning("The RDC value of the line %s is invalid." % line)) continue # Convert and check the error. if error == 'None': error = None if error != None: try: error = float(error) except ValueError: warn(RelaxWarning("The error value of the line %s is invalid." % line)) continue # Get the spins. spin1 = return_spin(spin_id1) spin2 = return_spin(spin_id2) # Check the spin IDs. if not spin1: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line))) continue if not spin2: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line))) continue # Get the interatomic data container. interatom = return_interatom(spin_id1, spin_id2) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2) # Test the error value (a value of 0.0 will cause the interatomic container to be deselected). if error == 0.0: interatom.select = False warn(RelaxWarning("An error value of zero has been encountered, deselecting the interatomic container between spin '%s' and '%s'." % (spin_id1, spin_id2))) continue # Store the data type as global data (need for the conversion of RDC data). if not hasattr(interatom, 'rdc_data_types'): interatom.rdc_data_types = {} if not align_id in interatom.rdc_data_types: interatom.rdc_data_types[align_id] = data_type # Convert and add the data. if data_col: # Data conversion. value = convert(value, data_type, align_id, to_intern=True) # Correction for the negative gyromagnetic ratio of 15N. if neg_g_corr and value != None: value = -value # Absolute values. if absolute: # Force the value to be positive. value = abs(value) # Initialise. if not hasattr(interatom, 'rdc'): interatom.rdc = {} # Add the value. interatom.rdc[align_id] = value # Store the absolute value flag. if not hasattr(interatom, 'absolute_rdc'): interatom.absolute_rdc = {} interatom.absolute_rdc[align_id] = absolute # Convert and add the error. if error_col: # Data conversion. error = convert(error, data_type, align_id, to_intern=True) # Initialise. if not hasattr(interatom, 'rdc_err'): interatom.rdc_err = {} # Append the error. interatom.rdc_err[align_id] = error # Append the data for printout. data.append([spin_id1, spin_id2]) if is_float(value): data[-1].append("%20.15f" % value) else: data[-1].append("%20s" % value) if is_float(error): data[-1].append("%20.15f" % error) else: data[-1].append("%20s" % error) # No data, so fail hard! if not len(data): raise RelaxError("No RDC data could be extracted.") # Print out. print("The following RDCs have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data) # Initialise some global structures. if not hasattr(cdp, 'align_ids'): cdp.align_ids = [] if not hasattr(cdp, 'rdc_ids'): cdp.rdc_ids = [] # Add the RDC id string. if align_id not in cdp.align_ids: cdp.align_ids.append(align_id) if align_id not in cdp.rdc_ids: cdp.rdc_ids.append(align_id)
def grid_setup(lower=None, upper=None, inc=None, verbosity=1, skip_preset=True): """Determine the per-model grid bounds, allowing for the zooming grid search. @keyword lower: The user supplied lower bounds of the grid search which must be equal to the number of parameters in the model. @type lower: list of numbers @keyword upper: The user supplied upper bounds of the grid search which must be equal to the number of parameters in the model. @type upper: list of numbers @keyword inc: The user supplied grid search increments. @type inc: int or list of int @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. @type verbosity: int @keyword skip_preset: This argument, when True, allows any parameter which already has a value set to be skipped in the grid search. @type skip_preset: bool @return: The per-model grid upper and lower bounds. The first dimension of each structure corresponds to the model, the second the model parameters. @rtype: tuple of lists of lists of float, lists of lists of float, list of lists of int """ # The specific analysis API object and parameter object. api = return_api() param_object = return_parameter_object() # Initialise. model_lower = [] model_upper = [] model_inc = [] # Loop over the models. for model_info in api.model_loop(): # Get the parameter names and current values. names = api.get_param_names(model_info) values = api.get_param_values(model_info) # No parameters for this model. if names == None or len(names) == 0: model_lower.append([]) model_upper.append([]) model_inc.append([]) continue # The parameter number. n = len(names) # Make sure that the length of the parameter array is > 0. if n == 0: raise RelaxError( "Cannot run a grid search on a model with zero parameters.") # Check that the user supplied bound lengths are ok. if lower != None and len(lower) != n: raise RelaxLenError('lower bounds', n) if upper != None and len(upper) != n: raise RelaxLenError('upper bounds', n) # Check the user supplied increments. if isinstance(inc, list) and len(inc) != n: raise RelaxLenError('increment', n) if isinstance(inc, list): for i in range(n): if not (isinstance(inc[i], int) or inc[i] == None): raise RelaxIntListIntError('increment', inc) elif not isinstance(inc, int): raise RelaxIntListIntError('increment', inc) # Convert to the model increment list. if isinstance(inc, int): model_inc.append([inc] * n) else: model_inc.append(inc) # Print out the model title. api.print_model_title(prefix="Grid search setup: ", model_info=model_info) # The grid zoom level. zoom = 0 if hasattr(cdp, 'grid_zoom_level'): zoom = cdp.grid_zoom_level zoom_factor = 1.0 / 2.0**zoom if zoom > 0: print( "Zooming grid level of %s, scaling the grid size by a factor of %s.\n" % (zoom, zoom_factor)) # Append empty lists for the bounds to be built up. model_lower.append([]) model_upper.append([]) # Loop over the parameters. data = [] for i in range(n): # A comment for user feedback. comment = 'Default bounds' if lower != None and upper != None: comment = 'User supplied lower and upper bound' elif lower != None: comment = 'User supplied lower bound' elif upper != None: comment = 'User supplied upper bound' # Alias the number of increments for this parameter. incs = model_inc[-1][i] # Error checking for increment values of None. if incs == None and values[i] in [None, {}, []]: raise RelaxError( "The parameter '%s' has no preset value, therefore a grid increment of None is not valid." % names[i]) # The lower bound for this parameter. if lower != None: lower_i = lower[i] else: lower_i = param_object.grid_lower(names[i], incs=incs, model_info=model_info) # The upper bound for this parameter. if upper != None: upper_i = upper[i] else: upper_i = param_object.grid_upper(names[i], incs=incs, model_info=model_info) # The skipping logic. skip = False if skip_preset: # Override the flag if the zoom is on. if zoom: skip = False # No preset value. elif values[i] in [None, {}, []]: skip = False # The preset value is a NaN value due to numpy conversions of None. elif isNaN(values[i]): skip = False # Ok, now the parameter can be skipped. else: skip = True # Override the skip flag if the incs value is None. if incs == None: skip = True # Skip preset values. if skip: lower_i = values[i] upper_i = values[i] model_inc[-1][i] = incs = 1 comment = 'Preset value' # Zooming grid. elif zoom: # The full size and scaled size. size = upper_i - lower_i zoom_size = size * zoom_factor half_size = zoom_size / 2.0 comment = 'Zoom grid width of %s %s' % ( zoom_size, param_object.units(names[i])) # The new size around the current value. lower_zoom = values[i] - half_size upper_zoom = values[i] + half_size # Outside of the original lower bound, so shift the grid to fit. if zoom > 0 and lower_zoom < lower_i: # The amount to shift by. shift = lower_i - lower_zoom # Set the new bounds. upper_i = upper_zoom + shift # Outside of the original upper bound, so shift the grid to fit. elif zoom > 0 and upper_zoom > upper_i: # The amount to shift by. shift = upper_i - upper_zoom # Set the new bounds. lower_i = lower_zoom + shift # Inside the original bounds. else: lower_i = lower_zoom upper_i = upper_zoom # Add to the data list for printing out. data.append([ names[i], "%15s" % lower_i, "%15s" % upper_i, "%15s" % incs, comment ]) # Scale the bounds. scaling = param_object.scaling(names[i], model_info=model_info) lower_i /= scaling upper_i /= scaling # Append. model_lower[-1].append(lower_i) model_upper[-1].append(upper_i) # Printout. if verbosity: write_data(out=sys.stdout, headings=[ "Parameter", "Lower bound", "Upper bound", "Increments", "Comment" ], data=data) sys.stdout.write('\n') # Return the bounds. return model_lower, model_upper, model_inc
def select(method=None, modsel_pipe=None, bundle=None, pipes=None): """Model selection function. @keyword method: The model selection method. This can currently be one of: - 'AIC', Akaike's Information Criteria. - 'AICc', Small sample size corrected AIC. - 'BIC', Bayesian or Schwarz Information Criteria. - 'CV', Single-item-out cross-validation. None of the other model selection techniques are currently supported. @type method: str @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. @type modsel_pipe: str @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. @type bundle: str or None @keyword pipes: A list of the data pipes to use in the model selection. @type pipes: list of str """ # Test if the pipe already exists. if has_pipe(modsel_pipe): raise RelaxPipeError(modsel_pipe) # Use all pipes. if pipes == None: # Get all data pipe names from the relax data store. pipes = pipe_names() # Select the model selection technique. if method == 'AIC': print("AIC model selection.") formula = aic elif method == 'AICc': print("AICc model selection.") formula = aicc elif method == 'BIC': print("BIC model selection.") formula = bic elif method == 'CV': print("CV model selection.") raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") else: raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") # No pipes. if len(pipes) == 0: raise RelaxError("No data pipes are available for use in model selection.") # Initialise. function_type = {} model_loop = {} model_type = {} duplicate_data = {} model_statistics = {} skip_function = {} modsel_pipe_exists = False # Cross validation setup. if isinstance(pipes[0], list): # No pipes. if len(pipes[0]) == 0: raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") # Loop over the data pipes. for i in range(len(pipes)): for j in range(len(pipes[i])): # The specific analysis API object. api = return_api(pipe_name=pipes[i][j]) # Store the specific functions. model_loop[pipes[i][j]] = api.model_loop model_type[pipes[i][j]] = api.model_type duplicate_data[pipes[i][j]] = api.duplicate_data model_statistics[pipes[i][j]] = api.model_statistics skip_function[pipes[i][j]] = api.skip_function # The model loop should be the same for all data pipes! for i in range(len(pipes)): for j in range(len(pipes[i])): if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: raise RelaxError("The models for each data pipes should be the same.") # Alias some function from the specific API of the first data pipe. api = return_api(pipe_name=pipes[0][0]) model_loop = api.model_loop model_desc = api.model_desc # Global vs. local models. global_flag = False for i in range(len(pipes)): for j in range(len(pipes[i])): if model_type[pipes[i][j]]() == 'global': global_flag = True # All other model selection setup. else: # Loop over the data pipes. for i in range(len(pipes)): # The specific analysis API object. api = return_api() # Store the specific functions. model_loop[pipes[i]] = api.model_loop model_type[pipes[i]] = api.model_type duplicate_data[pipes[i]] = api.duplicate_data model_statistics[pipes[i]] = api.model_statistics skip_function[pipes[i]] = api.skip_function # Alias some function from the specific API of the first data pipe. api = return_api(pipe_name=pipes[0]) model_loop = api.model_loop model_desc = api.model_desc # Global vs. local models. global_flag = False for j in range(len(pipes)): if model_type[pipes[j]]() == 'global': global_flag = True # Loop over the base models. for model_info in model_loop(): # Print out. print("\n") desc = model_desc(model_info) if desc: print(desc) # Initial model. best_model = None best_crit = 1e300 data = [] # Loop over the pipes. for j in range(len(pipes)): # Single-item-out cross validation. if method == 'CV': # Sum of chi-squared values. sum_crit = 0.0 # Loop over the validation samples and sum the chi-squared values. for k in range(len(pipes[j])): # Alias the data pipe name. pipe = pipes[j][k] # Switch to this pipe. switch(pipe) # Skip function. if skip_function[pipe](model_info): continue # Get the model statistics. k, n, chi2 = model_statistics[pipe](model_info) # Missing data sets. if k == None or n == None or chi2 == None: continue # Chi2 sum. sum_crit = sum_crit + chi2 # Cross-validation criterion (average chi-squared value). crit = sum_crit / float(len(pipes[j])) # Other model selection methods. else: # Reassign the pipe. pipe = pipes[j] # Switch to this pipe. switch(pipe) # Skip function. if skip_function[pipe](model_info): continue # Get the model statistics. k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) # Missing data sets. if k == None or n == None or chi2 == None: continue # Calculate the criterion value. crit = formula(chi2, float(k), float(n)) # Store the values for a later printout. data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit]) # Select model. if crit < best_crit: best_model = pipe best_crit = crit # Write out the table. write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data) # Duplicate the data from the 'best_model' to the model selection data pipe. if best_model != None: # Print out of selected model. print("The model from the data pipe " + repr(best_model) + " has been selected.") # Switch to the selected data pipe. switch(best_model) # Duplicate. duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) # Model selection pipe now exists. modsel_pipe_exists = True # No model selected. else: # Print out of selected model. print("No model has been selected.") # Switch to the model selection pipe. if modsel_pipe_exists: switch(modsel_pipe) # Bundle the data pipe. if bundle: pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe) # Update all of the required metadata structures. mol_res_spin.metadata_update() interatomic.metadata_update()
def grid_setup(lower=None, upper=None, inc=None, verbosity=1, skip_preset=True): """Determine the per-model grid bounds, allowing for the zooming grid search. @keyword lower: The user supplied lower bounds of the grid search which must be equal to the number of parameters in the model. @type lower: list of numbers @keyword upper: The user supplied upper bounds of the grid search which must be equal to the number of parameters in the model. @type upper: list of numbers @keyword inc: The user supplied grid search increments. @type inc: int or list of int @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. @type verbosity: int @keyword skip_preset: This argument, when True, allows any parameter which already has a value set to be skipped in the grid search. @type skip_preset: bool @return: The per-model grid upper and lower bounds. The first dimension of each structure corresponds to the model, the second the model parameters. @rtype: tuple of lists of lists of float, lists of lists of float, list of lists of int """ # The specific analysis API object and parameter object. api = return_api() param_object = return_parameter_object() # Initialise. model_lower = [] model_upper = [] model_inc = [] # Loop over the models. for model_info in api.model_loop(): # Get the parameter names and current values. names = api.get_param_names(model_info) values = api.get_param_values(model_info) # No parameters for this model. if names == None or len(names) == 0: model_lower.append([]) model_upper.append([]) model_inc.append([]) continue # The parameter number. n = len(names) # Make sure that the length of the parameter array is > 0. if n == 0: raise RelaxError("Cannot run a grid search on a model with zero parameters.") # Check that the user supplied bound lengths are ok. if lower != None and len(lower) != n: raise RelaxLenError('lower bounds', n) if upper != None and len(upper) != n: raise RelaxLenError('upper bounds', n) # Check the user supplied increments. if isinstance(inc, list) and len(inc) != n: raise RelaxLenError('increment', n) if isinstance(inc, list): for i in range(n): if not (isinstance(inc[i], int) or inc[i] == None): raise RelaxIntListIntError('increment', inc) elif not isinstance(inc, int): raise RelaxIntListIntError('increment', inc) # Convert to the model increment list. if isinstance(inc, int): model_inc.append([inc]*n) else: model_inc.append(inc) # Print out the model title. api.print_model_title(prefix="Grid search setup: ", model_info=model_info) # The grid zoom level. zoom = 0 if hasattr(cdp, 'grid_zoom_level'): zoom = cdp.grid_zoom_level zoom_factor = 1.0 / 2.0**zoom if zoom > 0: print("Zooming grid level of %s, scaling the grid size by a factor of %s.\n" % (zoom, zoom_factor)) # Append empty lists for the bounds to be built up. model_lower.append([]) model_upper.append([]) # Loop over the parameters. data = [] for i in range(n): # A comment for user feedback. comment = 'Default bounds' if lower != None and upper != None: comment = 'User supplied lower and upper bound' elif lower != None: comment = 'User supplied lower bound' elif upper != None: comment = 'User supplied upper bound' # Alias the number of increments for this parameter. incs = model_inc[-1][i] # Error checking for increment values of None. if incs == None and values[i] in [None, {}, []]: raise RelaxError("The parameter '%s' has no preset value, therefore a grid increment of None is not valid." % names[i]) # The lower bound for this parameter. if lower != None: lower_i = lower[i] else: lower_i = param_object.grid_lower(names[i], incs=incs, model_info=model_info) # The upper bound for this parameter. if upper != None: upper_i = upper[i] else: upper_i = param_object.grid_upper(names[i], incs=incs, model_info=model_info) # The skipping logic. skip = False if skip_preset: # Override the flag if the zoom is on. if zoom: skip = False # No preset value. elif values[i] in [None, {}, []]: skip = False # The preset value is a NaN value due to numpy conversions of None. elif isNaN(values[i]): skip = False # Ok, now the parameter can be skipped. else: skip = True # Override the skip flag if the incs value is None. if incs == None: skip = True # Skip preset values. if skip: lower_i = values[i] upper_i = values[i] model_inc[-1][i] = incs = 1 comment = 'Preset value' # Zooming grid. elif zoom: # The full size and scaled size. size = upper_i - lower_i zoom_size = size * zoom_factor half_size = zoom_size / 2.0 comment = 'Zoom grid width of %s %s' % (zoom_size, param_object.units(names[i])) # The new size around the current value. lower_zoom = values[i] - half_size upper_zoom = values[i] + half_size # Outside of the original lower bound, so shift the grid to fit. if zoom > 0 and lower_zoom < lower_i: # The amount to shift by. shift = lower_i - lower_zoom # Set the new bounds. upper_i = upper_zoom + shift # Outside of the original upper bound, so shift the grid to fit. elif zoom > 0 and upper_zoom > upper_i: # The amount to shift by. shift = upper_i - upper_zoom # Set the new bounds. lower_i = lower_zoom + shift # Inside the original bounds. else: lower_i = lower_zoom upper_i = upper_zoom # Add to the data list for printing out. data.append([names[i], "%15s" % lower_i, "%15s" % upper_i, "%15s" % incs, comment]) # Scale the bounds. scaling = param_object.scaling(names[i], model_info=model_info) lower_i /= scaling upper_i /= scaling # Append. model_lower[-1].append(lower_i) model_upper[-1].append(upper_i) # Printout. if verbosity: write_data(out=sys.stdout, headings=["Parameter", "Lower bound", "Upper bound", "Increments", "Comment"], data=data) sys.stdout.write('\n') # Return the bounds. return model_lower, model_upper, model_inc
def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True): """Read the peak intensity data. @keyword file: The name of the file(s) containing the peak intensities. @type file: str or list of str @keyword dir: The directory where the file is located. @type dir: str @keyword spectrum_id: The spectrum identification string. @type spectrum_id: str or list of str @keyword dim: The dimension of the peak list to associate the data with. @type dim: int @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format). @type int_col: int or list of int @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'. @type int_method: str @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. @type spin_id: None or str @keyword ncproc: The Bruker ncproc binary intensity scaling factor. @type ncproc: int or None @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. @type verbose: bool """ # Data checks. check_pipe() check_mol_res_spin_data() # Check the file name. if file == None: raise RelaxError("The file name must be supplied.") # Test that the intensity measures are identical. if hasattr(cdp, 'int_method') and cdp.int_method != int_method: raise RelaxError( "The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method)) # Multiple ID flags. flag_multi = False flag_multi_file = False flag_multi_col = False if isinstance(spectrum_id, list) or spectrum_id == 'auto': flag_multi = True if isinstance(file, list): flag_multi_file = True if isinstance(int_col, list) or spectrum_id == 'auto': flag_multi_col = True # List argument checks. if flag_multi: # Too many lists. if flag_multi_file and flag_multi_col: raise RelaxError( "If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists." ) # Not enough lists. if not flag_multi_file and not flag_multi_col: raise RelaxError( "If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length." ) # List lengths for multiple files. if flag_multi_file and len(spectrum_id) != len(file): raise RelaxError( "The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id)) # List lengths for multiple intensity columns. if flag_multi_col and spectrum_id != 'auto' and len( spectrum_id) != len(int_col): raise RelaxError( "The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col)) # More list argument checks (when only one spectrum ID is supplied). else: # Multiple files. if flag_multi_file: raise RelaxError( "If multiple files are supplied, then multiple spectrum IDs must also be supplied." ) # Multiple intensity columns. if flag_multi_col: raise RelaxError( "If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied." ) # Intensity column checks. if spectrum_id != 'auto' and not flag_multi and flag_multi_col: raise RelaxError( "If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length." ) # Check the intensity measure. if not int_method in ['height', 'point sum', 'other']: raise RelaxError( "The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method) # Set the peak intensity measure. cdp.int_method = int_method # Convert the file argument to a list if necessary. if not isinstance(file, list): file = [file] # Loop over all files. for file_index in range(len(file)): # Read the peak list data. peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) # Automatic spectrum IDs. if spectrum_id == 'auto': spectrum_id = peak_list[0].intensity_name # Loop over the assignments. data = [] data_flag = False for assign in peak_list: # Generate the spin_id. spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim - 1], spin_name=assign.spin_names[dim - 1]) # Convert the intensity data to a list if needed. intensity = assign.intensity if not isinstance(intensity, list): intensity = [intensity] # Loop over the intensity data. for int_index in range(len(intensity)): # Sanity check. if intensity[int_index] == 0.0: warn( RelaxWarning( "A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id)) # Get the spin container. spin = return_spin(spin_id=spin_id) if not spin: warn(RelaxNoSpinWarning(spin_id)) continue # Skip deselected spins. if not spin.select: continue # Initialise. if not hasattr(spin, 'peak_intensity'): spin.peak_intensity = {} # Intensity scaling. if ncproc != None: intensity[int_index] = intensity[int_index] / float(2** ncproc) # Add the data. if flag_multi_file: id = spectrum_id[file_index] elif flag_multi_col: id = spectrum_id[int_index] else: id = spectrum_id spin.peak_intensity[id] = intensity[int_index] # Switch the flag. data_flag = True # Append the data for printing out. data.append([spin_id, repr(intensity[int_index])]) # Add the spectrum id (and ncproc) to the relax data store. spectrum_ids = spectrum_id if isinstance(spectrum_id, str): spectrum_ids = [spectrum_id] if ncproc != None and not hasattr(cdp, 'ncproc'): cdp.ncproc = {} for i in range(len(spectrum_ids)): add_spectrum_id(spectrum_ids[i]) if ncproc != None: cdp.ncproc[spectrum_ids[i]] = ncproc # No data. if not data_flag: # Delete all the data. delete(spectrum_id) # Raise the error. raise RelaxError("No data could be loaded from the peak list") # Printout. if verbose: print( "\nThe following intensities have been loaded into the relax data store:\n" ) write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data) print('')
def define(spin_id1=None, spin_id2=None, pipe=None, direct_bond=False, verbose=True): """Set up the magnetic dipole-dipole interaction. @keyword spin_id1: The spin identifier string of the first spin of the pair. @type spin_id1: str @keyword spin_id2: The spin identifier string of the second spin of the pair. @type spin_id2: str @param pipe: The data pipe to operate on. Defaults to the current data pipe. @type pipe: str @keyword direct_bond: A flag specifying if the two spins are directly bonded. @type direct_bond: bool @keyword verbose: A flag which if True will result in printouts of the created interatomoic data containers. @type verbose: bool """ # The data pipe. if pipe == None: pipe = pipes.cdp_name() # Get the data pipe. dp = pipes.get_pipe(pipe) # Loop over both spin selections. ids = [] for spin1, mol_name1, res_num1, res_name1, id1 in spin_loop(spin_id1, pipe=pipe, full_info=True, return_id=True): for spin2, mol_name2, res_num2, res_name2, id2 in spin_loop(spin_id2, pipe=pipe, full_info=True, return_id=True): # Directly bonded atoms. if direct_bond: # Different molecules. if mol_name1 != mol_name2: continue # From structural info. if hasattr(dp, 'structure') and dp.structure.get_molecule(mol_name1, model=1): if not dp.structure.are_bonded(atom_id1=id1, atom_id2=id2): continue # From the residue info. else: # No element info. if not hasattr(spin1, 'element'): raise RelaxError("The spin '%s' does not have the element type set." % id1) if not hasattr(spin2, 'element'): raise RelaxError("The spin '%s' does not have the element type set." % id2) # Backbone NH and CH pairs. pair = False if (spin1.element == 'N' and spin2.element == 'H') or (spin2.element == 'N' and spin1.element == 'H'): pair = True elif (spin1.element == 'C' and spin2.element == 'H') or (spin2.element == 'C' and spin1.element == 'H'): pair = True # Same residue, so skip. if pair and res_num1 != None and res_num1 != res_num2: continue elif pair and res_num1 == None and res_name1 != res_name2: continue # Get the interatomic data object, if it exists. interatom = return_interatom(id1, id2, pipe=pipe) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=id1, spin_id2=id2, pipe=pipe) # Check that this has not already been set up. if interatom.dipole_pair: raise RelaxError("The magnetic dipole-dipole interaction already exists between the spins '%s' and '%s'." % (id1, id2)) # Set a flag indicating that a dipole-dipole interaction is present. interatom.dipole_pair = True # Store the IDs for the printout. ids.append([repr(id1), repr(id2)]) # No matches, so fail! if not len(ids): # Find the problem. count1 = 0 count2 = 0 for spin in spin_loop(spin_id1): count1 += 1 for spin in spin_loop(spin_id2): count2 += 1 # Report the problem. if count1 == 0 and count2 == 0: raise RelaxError("Neither spin IDs '%s' and '%s' match any spins." % (spin_id1, spin_id2)) elif count1 == 0: raise RelaxError("The spin ID '%s' matches no spins." % spin_id1) elif count2 == 0: raise RelaxError("The spin ID '%s' matches no spins." % spin_id2) else: raise RelaxError("Unknown error.") # Print out. if verbose: print("Interatomic interactions are now defined for the following spins:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
def read(file=None, dir=None, file_data=None, spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sign_col=None, sep=None): """Read the J coupling data from file. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the J coupling data in Hz. @type data_col: int or None @keyword error_col: The column containing the J coupling errors. @type error_col: int or None @keyword sign_col: The optional column containing the sign of the J coupling. @type sign_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the pipe setup. check_pipe_setup(sequence=True) # Either the data or error column must be supplied. if data_col == None and error_col == None: raise RelaxError("One of either the data or error column must be supplied.") # Extract the data from the file, and remove comments and blank lines. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the J coupling data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue if error_col and error_col > len(line): warn(RelaxWarning("The data %s is invalid, no error column can be found." % line)) continue if sign_col and sign_col > len(line): warn(RelaxWarning("The data %s is invalid, no sign column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] value = None if data_col: value = line[data_col-1] error = None if error_col: error = line[error_col-1] sign = None if sign_col: sign = line[sign_col-1] # Convert the spin IDs. if spin_id1[0] in ["\"", "\'"]: spin_id1 = eval(spin_id1) if spin_id2[0] in ["\"", "\'"]: spin_id2 = eval(spin_id2) # Convert and check the value. if value == 'None': value = None if value != None: try: value = float(value) except ValueError: warn(RelaxWarning("The J coupling value of the line %s is invalid." % line)) continue # The sign data. if sign == 'None': sign = None if sign != None: try: sign = float(sign) except ValueError: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue if sign not in [1.0, -1.0]: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue # Convert and check the error. if error == 'None': error = None if error != None: try: error = float(error) except ValueError: warn(RelaxWarning("The error value of the line %s is invalid." % line)) continue # Get the spins. spin1 = return_spin(spin_id1) spin2 = return_spin(spin_id2) # Check the spin IDs. if not spin1: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line))) continue if not spin2: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line))) continue # Test the error value (cannot be 0.0). if error == 0.0: raise RelaxError("An invalid error value of zero has been encountered.") # Get the interatomic data container. interatom = return_interatom(spin_id1, spin_id2) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2) # Add the data. if data_col: # Sign conversion. if sign != None: value = value * sign # Add the value. interatom.j_coupling = value # Add the error. if error_col: interatom.j_coupling_err = error # Append the data for printout. data.append([spin_id1, spin_id2]) if is_float(value): data[-1].append("%20.15f" % value) else: data[-1].append("%20s" % value) if is_float(error): data[-1].append("%20.15f" % error) else: data[-1].append("%20s" % error) # No data, so fail hard! if not len(data): raise RelaxError("No J coupling data could be extracted.") # Print out. print("The following J coupling have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data)
def signal_noise_ratio(verbose=True): """Calculate the signal to noise ratio per spin. @keyword verbose: A flag which if True will print additional information out. @type verbose: bool """ # Tests. check_pipe() check_mol_res_spin_data() # Test if spectra have been loaded. if not hasattr(cdp, 'spectrum_ids'): raise RelaxError("No spectra have been loaded.") # Possible print. if verbose: print("\nThe following signal to noise ratios has been calculated:\n") # Set the spin specific signal to noise ratio. for spin, spin_id in spin_loop(return_id=True): # Skip deselected spins. if not spin.select: continue # Skip spins missing intensity data. if not hasattr(spin, 'peak_intensity'): continue # Test if error analysis has been performed. if not hasattr(spin, 'peak_intensity_err'): raise RelaxError( "Intensity error analysis has not been performed. Please see spectrum.error_analysis()." ) # If necessary, create the dictionary. if not hasattr(spin, 'sn_ratio'): spin.sn_ratio = {} # Loop over the ID. ids = [] for id in spin.peak_intensity: # Append the ID to the list. ids.append(id) # Calculate the sn_ratio. pint = float(spin.peak_intensity[id]) pint_err = float(spin.peak_intensity_err[id]) sn_ratio = pint / pint_err # Assign the sn_ratio. spin.sn_ratio[id] = sn_ratio # Sort the ids alphanumeric. ids = sort_filenames(filenames=ids, rev=False) # Collect the data under sorted ids. data_i = [] for id in ids: # Get the values. pint = spin.peak_intensity[id] pint_err = spin.peak_intensity_err[id] sn_ratio = spin.sn_ratio[id] # Store the data. data_i.append([id, repr(pint), repr(pint_err), repr(sn_ratio)]) if verbose: section(file=sys.stdout, text="Signal to noise ratio for spin ID '%s'" % spin_id, prespace=1) write_data(out=sys.stdout, headings=["Spectrum ID", "Signal", "Noise", "S/N"], data=data_i)
def define_dipole_pair(spin_id1=None, spin_id2=None, spin1=None, spin2=None, pipe=None, direct_bond=False, spin_selection=False, verbose=True): """Set up the magnetic dipole-dipole interaction. @keyword spin_id1: The spin identifier string of the first spin of the pair. @type spin_id1: str @keyword spin_id2: The spin identifier string of the second spin of the pair. @type spin_id2: str @keyword spin1: An optional single spin container for the first atom. This is for speeding up the interatomic data container creation, if the spin containers are already available in the calling function. @type spin1: str @keyword spin2: An optional single spin container for the second atom. This is for speeding up the interatomic data container creation, if the spin containers are already available in the calling function. @type spin2: str @param pipe: The data pipe to operate on. Defaults to the current data pipe. @type pipe: str @keyword direct_bond: A flag specifying if the two spins are directly bonded. @type direct_bond: bool @keyword spin_selection: Define the interatomic data container selection based on the spin selection. If either spin is deselected, the interatomic container will also be deselected. Otherwise the container will be selected. @type spin_selection: bool @keyword verbose: A flag which if True will result in printouts of the created interatomoic data containers. @type verbose: bool """ # The data pipe. if pipe == None: pipe = pipes.cdp_name() # Get the data pipe. dp = pipes.get_pipe(pipe) # Initialise data structures for storing spin data. ids = [] spins = [] spin_selections = [] # Pre-supplied spins. if spin1 and spin2: # Store the IDs for the printout. ids.append([spin_id1, spin_id2]) # Store the spin data. spins.append([spin1, spin2]) spin_selections.append([spin1.select, spin2.select]) # Use the structural data to find connected atoms. elif hasattr(dp, 'structure'): # The selection objects. selection1 = cdp.structure.selection(atom_id=spin_id1) selection2 = cdp.structure.selection(atom_id=spin_id2) # Loop over the atoms of the first spin selection. for mol_name1, res_num1, res_name1, atom_num1, atom_name1, mol_index1, atom_index1 in dp.structure.atom_loop(selection=selection1, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True): # Generate the first spin ID. id1 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name1, res_num=res_num1, res_name=res_name1, spin_num=atom_num1, spin_name=atom_name1) # Do the spin exist? spin1 = return_spin(spin_id=id1) if not spin1: continue # Loop over the atoms of the second spin selection. for mol_name2, res_num2, res_name2, atom_num2, atom_name2, mol_index2, atom_index2 in dp.structure.atom_loop(selection=selection2, model_num=1, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, mol_index_flag=True, index_flag=True): # Directly bonded atoms. if direct_bond: # Different molecules. if mol_name1 != mol_name2: continue # Skip non-bonded atom pairs. if not dp.structure.are_bonded_index(mol_index1=mol_index1, atom_index1=atom_index1, mol_index2=mol_index2, atom_index2=atom_index2): continue # Generate the second spin ID. id2 = generate_spin_id_unique(pipe_cont=dp, mol_name=mol_name2, res_num=res_num2, res_name=res_name2, spin_num=atom_num2, spin_name=atom_name2) # Do the spin exist? spin2 = return_spin(spin_id=id2) if not spin2: continue # Store the IDs for the printout. ids.append([id1, id2]) # Store the spin data. spins.append([spin1, spin2]) spin_selections.append([spin1.select, spin2.select]) # No structural data present or the spin IDs are not in the structural data, so use spin loops and some basic rules. if ids == []: for spin1, mol_name1, res_num1, res_name1, id1 in spin_loop(spin_id1, pipe=pipe, full_info=True, return_id=True): for spin2, mol_name2, res_num2, res_name2, id2 in spin_loop(spin_id2, pipe=pipe, full_info=True, return_id=True): # Directly bonded atoms. if direct_bond: # Different molecules. if mol_name1 != mol_name2: continue # No element info. if not hasattr(spin1, 'element'): raise RelaxError("The spin '%s' does not have the element type set." % id1) if not hasattr(spin2, 'element'): raise RelaxError("The spin '%s' does not have the element type set." % id2) # Backbone NH and CH pairs. pair = False if (spin1.element == 'N' and spin2.element == 'H') or (spin2.element == 'N' and spin1.element == 'H'): pair = True elif (spin1.element == 'C' and spin2.element == 'H') or (spin2.element == 'C' and spin1.element == 'H'): pair = True # Same residue, so skip. if pair and res_num1 != None and res_num1 != res_num2: continue elif pair and res_num1 == None and res_name1 != res_name2: continue # Store the IDs for the printout. ids.append([id1, id2]) # Store the spin data. spins.append([spin1, spin2]) spin_selections.append([spin1.select, spin2.select]) # No matches, so fail! if not len(ids): # Find the problem. count1 = 0 count2 = 0 for spin in spin_loop(spin_id1): count1 += 1 for spin in spin_loop(spin_id2): count2 += 1 # Report the problem. if count1 == 0 and count2 == 0: raise RelaxError("Neither spin IDs '%s' and '%s' match any spins." % (spin_id1, spin_id2)) elif count1 == 0: raise RelaxError("The spin ID '%s' matches no spins." % spin_id1) elif count2 == 0: raise RelaxError("The spin ID '%s' matches no spins." % spin_id2) else: raise RelaxError("Unknown error.") # Define the interaction. for i in range(len(ids)): # Unpack. id1, id2 = ids[i] spin1, spin2 = spins[i] # Get the interatomic data object, if it exists. interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash, pipe=pipe) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=id1, spin_id2=id2, spin1=spins[i][0], spin2=spins[i][1], pipe=pipe) # Check that this has not already been set up. if interatom.dipole_pair: raise RelaxError("The magnetic dipole-dipole interaction already exists between the spins '%s' and '%s'." % (id1, id2)) # Set a flag indicating that a dipole-dipole interaction is present. interatom.dipole_pair = True # Set the selection. if spin_selection: interatom.select = False if spin_selections[i][0] and spin_selections[i][1]: interatom.select = True # Printout. if verbose: # Conversion. for i in range(len(ids)): ids[i][0] = repr(ids[i][0]) ids[i][1] = repr(ids[i][1]) # The printout. print("Interatomic interactions are now defined for the following spins:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
def copy(pipe_from=None, pipe_to=None, spin_id1=None, spin_id2=None, verbose=True): """Copy the interatomic data from one data pipe to another. @keyword pipe_from: The data pipe to copy the interatomic data from. This defaults to the current data pipe. @type pipe_from: str @keyword pipe_to: The data pipe to copy the interatomic data to. This defaults to the current data pipe. @type pipe_to: str @keyword spin_id1: The spin ID string of the first atom. @type spin_id1: str @keyword spin_id2: The spin ID string of the second atom. @type spin_id2: str @keyword verbose: A flag which if True will cause info about each spin pair to be printed out. @type verbose: bool """ # Defaults. if pipe_from == None and pipe_to == None: raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") elif pipe_from == None: pipe_from = pipes.cdp_name() elif pipe_to == None: pipe_to = pipes.cdp_name() # Test if the pipe_from and pipe_to data pipes exist. check_pipe(pipe_from) check_pipe(pipe_to) # Check that the spin IDs exist. if spin_id1: if count_spins(selection=spin_id1, pipe=pipe_from, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id1, pipe_from) if count_spins(selection=spin_id1, pipe=pipe_to, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id1, pipe_to) if spin_id2: if count_spins(selection=spin_id2, pipe=pipe_from, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id2, pipe_from) if count_spins(selection=spin_id2, pipe=pipe_to, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id2, pipe_to) # Check for the sequence data in the target pipe if no spin IDs are given. if not spin_id1 and not spin_id2: for spin, spin_id in spin_loop(pipe=pipe_from, return_id=True): if not return_spin(spin_id=spin_id, pipe=pipe_to): raise RelaxNoSpinError(spin_id, pipe_to) # Test if pipe_from contains interatomic data (skipping the rest of the function if it is missing). if not exists_data(pipe_from): return # Loop over the interatomic data of the pipe_from data pipe. ids = [] for interatom in interatomic_loop(selection1=spin_id1, selection2=spin_id2, pipe=pipe_from): # Create a new container. new_interatom = create_interatom(spin_id1=interatom.spin_id1, spin_id2=interatom.spin_id2, pipe=pipe_to) # Duplicate all the objects of the container. for name in dir(interatom): # Skip special objects. if search('^_', name): continue # Skip the spin IDs. if name in ['spin_id1', 'spin_id2']: continue # Skip class methods. if name in interatom.__class__.__dict__: continue # Duplicate all other objects. obj = deepcopy(getattr(interatom, name)) setattr(new_interatom, name, obj) # Store the IDs for the printout. ids.append([repr(interatom.spin_id1), repr(interatom.spin_id2)]) # Reconfigure the spin hashes. hash_update(interatom=new_interatom, pipe=pipe_to) # Print out. if verbose: write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)
def read(file=None, dir=None, file_data=None, spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sign_col=None, sep=None): """Read the J coupling data from file. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the J coupling data in Hz. @type data_col: int or None @keyword error_col: The column containing the J coupling errors. @type error_col: int or None @keyword sign_col: The optional column containing the sign of the J coupling. @type sign_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the pipe setup. check_pipe_setup(sequence=True) # Either the data or error column must be supplied. if data_col == None and error_col == None: raise RelaxError("One of either the data or error column must be supplied.") # Extract the data from the file, and remove comments and blank lines. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the J coupling data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue if error_col and error_col > len(line): warn(RelaxWarning("The data %s is invalid, no error column can be found." % line)) continue if sign_col and sign_col > len(line): warn(RelaxWarning("The data %s is invalid, no sign column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] value = None if data_col: value = line[data_col-1] error = None if error_col: error = line[error_col-1] sign = None if sign_col: sign = line[sign_col-1] # Convert the spin IDs. if spin_id1[0] in ["\"", "\'"]: spin_id1 = eval(spin_id1) if spin_id2[0] in ["\"", "\'"]: spin_id2 = eval(spin_id2) # Convert and check the value. if value == 'None': value = None if value != None: try: value = float(value) except ValueError: warn(RelaxWarning("The J coupling value of the line %s is invalid." % line)) continue # The sign data. if sign == 'None': sign = None if sign != None: try: sign = float(sign) except ValueError: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue if sign not in [1.0, -1.0]: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue # Convert and check the error. if error == 'None': error = None if error != None: try: error = float(error) except ValueError: warn(RelaxWarning("The error value of the line %s is invalid." % line)) continue # Get the spins. spin1 = return_spin(spin_id=spin_id1) spin2 = return_spin(spin_id=spin_id2) # Check the spin IDs. if not spin1: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line))) continue if not spin2: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line))) continue # Test the error value (cannot be 0.0). if error == 0.0: raise RelaxError("An invalid error value of zero has been encountered.") # Get the interatomic data container. interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2) # Add the data. if data_col: # Sign conversion. if sign != None: value = value * sign # Add the value. interatom.j_coupling = value # Add the error. if error_col: interatom.j_coupling_err = error # Append the data for printout. data.append([spin_id1, spin_id2]) if is_float(value): data[-1].append("%20.15f" % value) else: data[-1].append("%20s" % value) if is_float(error): data[-1].append("%20.15f" % error) else: data[-1].append("%20s" % error) # No data, so fail hard! if not len(data): raise RelaxError("No J coupling data could be extracted.") # Print out. print("The following J coupling have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data)
def read_dist(file=None, dir=None, unit='meter', spin_id1_col=None, spin_id2_col=None, data_col=None, sep=None): """Set up the magnetic dipole-dipole interaction. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword unit: The measurement unit. This can be either 'meter' or 'Angstrom'. @type unit: str @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the averaged distances in meters. @type data_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the units. if unit not in ['meter', 'Angstrom']: raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit) # Test if the current data pipe exists. check_pipe() # Test if sequence data exists. if not exists_mol_res_spin_data(): raise RelaxNoSequenceError # Extract the data from the file, and clean it up. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the RDC data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] ave_dist = None if data_col: ave_dist = line[data_col-1] # Convert and check the value. if ave_dist != None: try: ave_dist = float(ave_dist) except ValueError: warn(RelaxWarning("The averaged distance of '%s' from the line %s is invalid." % (ave_dist, line))) continue # Unit conversion. if unit == 'Angstrom': ave_dist = ave_dist * 1e-10 # Get the interatomic data container. spin1 = return_spin(spin_id=spin_id1) spin2 = return_spin(spin_id=spin_id2) interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash) # No container found, so create it. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2, verbose=True) # Store the averaged distance. interatom.r = ave_dist # Store the data for the printout. data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)]) # No data, so fail! if not len(data): raise RelaxError("No data could be extracted from the file.") # Print out. print("The following averaged distances have been read:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
def get_pos(spin_id=None, str_id=None, ave_pos=False): """Load the spins from the structural object into the relax data store. @keyword spin_id: The molecule, residue, and spin identifier string. @type spin_id: str @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. @type str_id: int or str @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. @type ave_pos: bool """ # Test if the current data pipe exists. pipes.test() # Test if the structure exists. if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): raise RelaxNoPdbError # Loop over all atoms of the spin_id selection. data = [] for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): # Remove the '+' regular expression character from the mol, res, and spin names! if mol_name and search('\+', mol_name): mol_name = mol_name.replace('+', '') if res_name and search('\+', res_name): res_name = res_name.replace('+', '') if atom_name and search('\+', atom_name): atom_name = atom_name.replace('+', '') # The spin identification string. id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name) # Get the spin container. spin_cont = return_spin(id) # Skip the spin if it doesn't exist. if spin_cont == None: continue # Add the position vector to the spin container. spin_cont.pos = pos # Store the data for a printout at the end. data.append([id, repr(pos)]) # No positions found. if not len(data): raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id) # Update pseudo-atoms. for spin in spin_loop(): if hasattr(spin, 'members'): # Get the spin positions. positions = [] for atom in spin.members: # Get the spin container. subspin = return_spin(atom) # Test that the spin exists. if subspin == None: raise RelaxNoSpinError(atom) # Test the position. if not hasattr(subspin, 'pos') or subspin.pos == None or not len(subspin.pos): raise RelaxError("Positional information is not available for the atom '%s'." % atom) # Alias the position. pos = subspin.pos # Convert to a list of lists if not already. multi_model = True if type(pos[0]) in [float, float64]: multi_model = False pos = [pos] # Store the position. positions.append([]) for i in range(len(pos)): positions[-1].append(pos[i].tolist()) # The averaging. if spin.averaging == 'linear': # Average pos. ave = linear_ave(positions) # Convert to the correct structure. if multi_model: spin.pos = ave else: spin.pos = ave[0] # Print out. write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)
def copy(pipe_from=None, pipe_to=None, spin_id1=None, spin_id2=None, verbose=True): """Copy the interatomic data from one data pipe to another. @keyword pipe_from: The data pipe to copy the interatomic data from. This defaults to the current data pipe. @type pipe_from: str @keyword pipe_to: The data pipe to copy the interatomic data to. This defaults to the current data pipe. @type pipe_to: str @keyword spin_id1: The spin ID string of the first atom. @type spin_id1: str @keyword spin_id2: The spin ID string of the second atom. @type spin_id2: str @keyword verbose: A flag which if True will cause info about each spin pair to be printed out. @type verbose: bool """ # Defaults. if pipe_from == None and pipe_to == None: raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") elif pipe_from == None: pipe_from = pipes.cdp_name() elif pipe_to == None: pipe_to = pipes.cdp_name() # Test if the pipe_from and pipe_to data pipes exist. pipes.test(pipe_from) pipes.test(pipe_to) # Check that the spin IDs exist. if spin_id1: if count_spins(selection=spin_id1, pipe=pipe_from, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id1, pipe_from) if count_spins(selection=spin_id1, pipe=pipe_to, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id1, pipe_to) if spin_id2: if count_spins(selection=spin_id2, pipe=pipe_from, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id2, pipe_from) if count_spins(selection=spin_id2, pipe=pipe_to, skip_desel=False) == 0: raise RelaxNoSpinError(spin_id2, pipe_to) # Check for the sequence data in the target pipe if no spin IDs are given. if not spin_id1 and not spin_id2: for spin, spin_id in spin_loop(pipe=pipe_from, return_id=True): if not return_spin(spin_id, pipe=pipe_to): raise RelaxNoSpinError(spin_id, pipe_to) # Test if pipe_from contains interatomic data (skipping the rest of the function if it is missing). if not exists_data(pipe_from): return # Loop over the interatomic data of the pipe_from data pipe. ids = [] for interatom in interatomic_loop(selection1=spin_id1, selection2=spin_id2, pipe=pipe_from): # Create a new container. new_interatom = create_interatom(spin_id1=interatom.spin_id1, spin_id2=interatom.spin_id2, pipe=pipe_to) # Duplicate all the objects of the container. for name in dir(interatom): # Skip special objects. if search('^_', name): continue # Skip the spin IDs. if name in ['spin_id1', 'spin_id2']: continue # Skip class methods. if name in list(interatom.__class__.__dict__.keys()): continue # Duplicate all other objects. obj = deepcopy(getattr(interatom, name)) setattr(new_interatom, name, obj) # Store the IDs for the printout. ids.append([repr(interatom.spin_id1), repr(interatom.spin_id2)]) # Print out. if verbose: write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2"], data=ids)