def build(cls, trajectory_handler, matrix_creation_parameters): """ Will generate the CondensedMatrix filled with the all vs all geometric center distances of the "body_selection" coordinates (which will usually be a ligand). @param trajectory_handler: The handler containing selection strings, pdb info and coordsets. @param matrix_creation_parameters: The creation parameters (from the initial script). @return: The created distances matrix. """ # Build calculator with fitting coordinate sets ... fit_selection_coordsets = trajectory_handler.getSelection(matrix_creation_parameters["dist_fit_selection"]) # and calculation coordsets (we want them to be moved along with the fitting ones) body_selection_string = matrix_creation_parameters["body_selection"] body_selection_coordsets = trajectory_handler.getSelection(body_selection_string) calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR", fittingCoordsets = fit_selection_coordsets, calculationCoordsets = body_selection_coordsets) # Superpose iteratively (will modify all coordinates) calculator.iterativeSuperposition() # Working coordinates are changed to the body coordinates (to be used later for instance # with clustering metrics) trajectory_handler.setWorkingCoordinates(body_selection_string) distances = cls.calculate_geom_center(body_selection_coordsets) matrix = CondensedMatrix(distances) return matrix
def build(cls, data_handler, matrix_params): """ - "distance": Euclidean distance of the geometrical center of one body. "parameters":{ "fit_selection": String, "body_selection": String, } "fit_selection": The Prody selection string used to describe the atoms to be superposed. "body_selection": Another Prody selection string that describes the element that will be used to get the euclidean distances. """ # Build calculator with fitting coordinate sets ... fit_selection_coordsets = data_handler.get_data().getFittingCoordinates() # and calculation coordsets (we want them to be moved along with the fitting ones) body_selection_coordsets = data_handler.get_data().getCalculationCoordinates() calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR", fittingCoordsets = fit_selection_coordsets, calculationCoordsets = body_selection_coordsets) # Superpose iteratively (will modify all coordinates) calculator.iterativeSuperposition() #centers = body_selection_coordsets.mean(1) from prody.measure import calcCenter centers = calcCenter(body_selection_coordsets) return centers
def superpose_and_calc_rmsf(ca_pdb_coordsets, cluster): # Pick the coordinates (ensuring that we are copying them) fitting_coordinates_of_this_cluster = ca_pdb_coordsets[cluster.all_elements] calculator = RMSDCalculator(calculatorType = "QTRFIT_SERIAL_CALCULATOR", fittingCoordsets = fitting_coordinates_of_this_cluster) # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation) calculator.iterativeSuperposition() return list(calc_rmsf_of_cluster(fitting_coordinates_of_this_cluster, cluster))
def evaluate(self, clustering): """ Calculates the value of the PCA metric, which is the mean of the largest eigenvalue obtained from the PCA (the one corresponding to the axis of bigger variability) weighted by the cluster size. @param clustering: The clustering we want to calculate the metric. @return: the value of the metric. """ # Pca for each one of the clusters pca_mean_val = 0.; MAX_ELEMENTS = 1000 for c in clustering.clusters: # Pick the coordinates (ensuring that we are copying them) element_indexes = c.all_elements ################### # Performance hack ################### # As it can be very slow for big clusters (i.e. > 3k elements) we'll compress this clusters # before calculating PCA. It should increase variance but will allow calculations. # It should use the kmedoids compressor if len(c.all_elements) > MAX_ELEMENTS: element_indexes = c.get_random_sample(MAX_ELEMENTS) print "[PCA] Random sampling too big cluster to improve performance (%d elements -> %d elements)."%(len(c.all_elements),MAX_ELEMENTS) ################### fitting_coordinates_of_this_cluster = self.fitting_coordinates[element_indexes] calculator = RMSDCalculator(calculatorType = "QTRFIT_SERIAL_CALCULATOR", fittingCoordsets = fitting_coordinates_of_this_cluster) if self.calculation_coordinates is not None: calculation_coordinates_of_this_cluster = self.calculation_coordinates[element_indexes] calculator = RMSDCalculator(calculatorType = "QTRFIT_SERIAL_CALCULATOR", fittingCoordsets = fitting_coordinates_of_this_cluster, calculationCoordsets = calculation_coordinates_of_this_cluster) # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation) calculator.iterativeSuperposition() # Calculate the covariance matrix if self.calculation_coordinates is None: covariance_matrix = PCAMetric.create_covariance_matrix(fitting_coordinates_of_this_cluster) else: covariance_matrix = PCAMetric.create_covariance_matrix(calculation_coordinates_of_this_cluster) # And then the eigenvalue we are interested in pca_mean_val += PCAMetric.calculate_biggest_eigenvalue(covariance_matrix) print "PCA finished" return pca_mean_val /clustering.total_number_of_elements
def superpose_and_calc_rmsf(ca_pdb_coordsets, cluster): # Pick the coordinates (ensuring that we are copying them) fitting_coordinates_of_this_cluster = ca_pdb_coordsets[ cluster.all_elements] calculator = RMSDCalculator( calculatorType="QTRFIT_SERIAL_CALCULATOR", fittingCoordsets=fitting_coordinates_of_this_cluster) # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation) calculator.iterativeSuperposition() return list( calc_rmsf_of_cluster(fitting_coordinates_of_this_cluster, cluster))
def superimpose_coordinates(all_coordsets, iterpose = True): all_superimposed_coordsets = [] for coordsets in all_coordsets: calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR", fittingCoordsets = coordsets) calculator.setNumberOfOpenMPThreads(4) if iterpose: print "\t- Using iterposition on trajectory (shape ", coordsets.shape, ")" calculator.iterativeSuperposition() all_superimposed_coordsets.append(coordsets) else: print "\t- Superimposing with first trajectory frame (shape ", coordsets.shape, ")" _, superimposed_coordsets = calculator.oneVsTheOthers(0, get_superposed_coordinates = True) all_superimposed_coordsets.append(superimposed_coordsets) return all_superimposed_coordsets
def superimpose_coordinates(all_coordsets, iterpose=True): all_superimposed_coordsets = [] for coordsets in all_coordsets: calculator = RMSDCalculator(calculatorType="QTRFIT_OMP_CALCULATOR", fittingCoordsets=coordsets) calculator.setNumberOfOpenMPThreads(4) if iterpose: print "\t- Using iterposition on trajectory (shape ", coordsets.shape, ")" calculator.iterativeSuperposition() all_superimposed_coordsets.append(coordsets) else: print "\t- Superimposing with first trajectory frame (shape ", coordsets.shape, ")" _, superimposed_coordsets = calculator.oneVsTheOthers( 0, get_superposed_coordinates=True) all_superimposed_coordsets.append(superimposed_coordsets) return all_superimposed_coordsets
def coords_rmsf(ca_coords): calculator = RMSDCalculator(calculatorType="QTRFIT_OMP_CALCULATOR", fittingCoordsets=ca_coords) calculator.setNumberOfOpenMPThreads(4) new_ca_coords = calculator.iterativeSuperposition() # Calculate the actual rmsf mean_conformation = new_ca_coords.mean(0) ssqf = numpy.zeros(mean_conformation.shape) for conf in new_ca_coords: ssqf += (conf - mean_conformation)**2 return (ssqf.sum(1) / new_ca_coords.shape[0])**0.5
original_sequence = sequences[options.ref] # Extract the coordinates we know known_residues = get_seq_positions_with_known_residues(sequences) # Extraer bien las coordenadas known_coords = extract_coordinates_from_known_residues( known_residues, coords) # Do an iterative superposition of that coordinates, but move all coordinates known_coords = numpy.reshape( known_coords, (known_coords.shape[0], known_coords.shape[1] / 3, 3)) coords = numpy.reshape(coords, (coords.shape[0], coords.shape[1] / 3, 3)) calculator = RMSDCalculator("QTRFIT_SERIAL_CALCULATOR", known_coords, coords) calculator.iterativeSuperposition() # Reshape iterposed coordinates coords = numpy.reshape(coords, (coords.shape[0], coords.shape[1] * 3)) # Calculate known coordinates mean known_mean_coords = calc_mean_of_known_atoms(sequences, coords) # Change unknown coordinates by mean change_unknown_by_mean(sequences, coords, known_mean_coords) numpy.savetxt("coords_mean", coords, fmt="%.4f") # Recalc mean for all values recalcd_mean = numpy.mean(coords, axis=0) numpy.savetxt("mean", recalcd_mean, fmt="%.4f")