Пример #1
0
    def evaluate(self, clustering):
        """
        Calculates the value of the PCA metric, which is the mean of the largest eigenvalue obtained from the PCA (the one corresponding
        to the axis of bigger variability) weighted by the cluster size.
        
        @param clustering: The clustering we want to calculate the metric.
        
        @return: the value of the metric.
        """
        # Pca for each one of the clusters
        pca_mean_val = 0.
        MAX_ELEMENTS = 1000
        for c in clustering.clusters:
            # Pick the coordinates (ensuring that we are copying them)
            element_indexes = c.all_elements
            ###################
            # Performance hack
            ###################
            # As it can be very slow for big clusters (i.e. > 3k elements) we'll compress this clusters
            # before calculating PCA. It should increase variance but will allow calculations.
            # It should use the kmedoids compressor
            if len(c.all_elements) > MAX_ELEMENTS:
                element_indexes = c.get_random_sample(MAX_ELEMENTS)
                print "[PCA] Random sampling too big cluster to improve performance (%d elements -> %d elements)." % (
                    len(c.all_elements), MAX_ELEMENTS)
            ###################

            fitting_coordinates_of_this_cluster = self.fitting_coordinates[
                element_indexes]

            calculator = RMSDCalculator(
                calculatorType="QTRFIT_SERIAL_CALCULATOR",
                fittingCoordsets=fitting_coordinates_of_this_cluster)

            if self.calculation_coordinates is not None:
                calculation_coordinates_of_this_cluster = self.calculation_coordinates[
                    element_indexes]
                calculator = RMSDCalculator(
                    calculatorType="QTRFIT_SERIAL_CALCULATOR",
                    fittingCoordsets=fitting_coordinates_of_this_cluster,
                    calculationCoordsets=calculation_coordinates_of_this_cluster
                )

            # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation)
            calculator.iterativeSuperposition()

            # Calculate the covariance matrix
            if self.calculation_coordinates is None:
                covariance_matrix = PCAMetric.create_covariance_matrix(
                    fitting_coordinates_of_this_cluster)
            else:
                covariance_matrix = PCAMetric.create_covariance_matrix(
                    calculation_coordinates_of_this_cluster)

            # And then the eigenvalue we are interested in
            pca_mean_val += PCAMetric.calculate_biggest_eigenvalue(
                covariance_matrix)
        print "PCA finished"
        return pca_mean_val / clustering.total_number_of_elements
    def build(cls, trajectory_handler, matrix_creation_parameters):
        """
        Will generate the CondensedMatrix filled with the all vs all geometric center distances of the "body_selection"
        coordinates (which will usually be a ligand).

        @param trajectory_handler: The handler containing selection strings, pdb info and coordsets.
        @param matrix_creation_parameters: The creation parameters (from the initial script).

        @return: The created distances matrix.
        """

        # Build calculator with fitting coordinate sets ...
        fit_selection_coordsets = trajectory_handler.getSelection(
            trajectory_handler.fitting_selection)

        # and calculation coordsets (we want them to be moved along with the fitting ones)
        body_selection_coordsets = trajectory_handler.getSelection(
            trajectory_handler.calculation_selection)

        calculator = RMSDCalculator(
            calculatorType="QTRFIT_OMP_CALCULATOR",
            fittingCoordsets=fit_selection_coordsets,
            calculationCoordsets=body_selection_coordsets)

        # Superpose iteratively (will modify all coordinates)
        calculator.iterativeSuperposition()

        # Working coordinates are changed to the body coordinates (to be used later for instance
        # with clustering metrics)
        trajectory_handler.setWorkingCoordinates(
            trajectory_handler.calculation_selection)
        distances = cls.calculate_geom_center(body_selection_coordsets)
        matrix = CondensedMatrix(distances)
        return matrix
Пример #3
0
    def build(cls, data_handler, matrix_params):
        """
        - "distance": Euclidean distance of the geometrical center of one body.
    
                "parameters":{
                    "fit_selection":  String,
                    "body_selection": String,
                }
    
                "fit_selection": The Prody selection string used to describe the atoms to be superposed.
                "body_selection": Another Prody selection string that describes the element that will be used
                to get the euclidean distances.
        
        """
        # Build calculator with fitting coordinate sets ...
        fit_selection_coordsets = data_handler.get_data(
        ).getFittingCoordinates()

        # and calculation coordsets (we want them to be moved along with the fitting ones)
        body_selection_coordsets = data_handler.get_data(
        ).getCalculationCoordinates()

        calculator = RMSDCalculator(
            calculatorType="QTRFIT_OMP_CALCULATOR",
            fittingCoordsets=fit_selection_coordsets,
            calculationCoordsets=body_selection_coordsets)

        # Superpose iteratively (will modify all coordinates)
        calculator.iterativeSuperposition()

        #centers = body_selection_coordsets.mean(1)
        from prody.measure import calcCenter
        centers = calcCenter(body_selection_coordsets)

        return centers
Пример #4
0
    def build(cls, trajectory_handler, matrix_creation_parameters):
        """
        Generates a matrix with the method used in the handler creation.

        @param trajectory_handler:
        @param matrix_creation_parameters:

        @return: The created matrix.
        """

        fit_selection_string = matrix_creation_parameters["fit_selection"]
        fit_selection_coordsets = trajectory_handler.getSelection(
            fit_selection_string)
        trajectory_handler.setWorkingCoordinates(fit_selection_string)

        calculator_type = matrix_creation_parameters[
            "calculator_type"] if "calculator_type" in matrix_creation_parameters else "QTRFIT_OMP_CALCULATOR"

        calculator = RMSDCalculator(calculatorType=calculator_type,
                                    fittingCoordsets=fit_selection_coordsets)

        # Apply calculation selection if needed
        calc_selection_string = matrix_creation_parameters[
            "calc_selection"] if "calc_selection" in matrix_creation_parameters else ""
        if calc_selection_string != "" and calc_selection_string != fit_selection_string:
            calc_selection_coordsets = trajectory_handler.getSelection(
                calc_selection_string)
            trajectory_handler.setWorkingCoordinates(calc_selection_string)

            symm_groups = []
            if "symmetries" in matrix_creation_parameters:
                # Then prepare it to handle calculation symmetries
                # Description of equivalences must have the same number of atoms
                symm_groups = cls.process_symm_groups(
                    matrix_creation_parameters, trajectory_handler,
                    calc_selection_coordsets)
                print "Using symmetries", symm_groups

            calculator = RMSDCalculator(
                calculatorType=calculator_type,
                fittingCoordsets=fit_selection_coordsets,
                calculationCoordsets=calc_selection_coordsets,
                calcSymmetryGroups=symm_groups)

        rmsds = calculator.pairwiseRMSDMatrix()

        return CondensedMatrix(rmsds)
Пример #5
0
def superpose_and_calc_rmsf(ca_pdb_coordsets, cluster):
    # Pick the coordinates (ensuring that we are copying them)
    fitting_coordinates_of_this_cluster = ca_pdb_coordsets[
        cluster.all_elements]

    calculator = RMSDCalculator(
        calculatorType="QTRFIT_SERIAL_CALCULATOR",
        fittingCoordsets=fitting_coordinates_of_this_cluster)

    # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation)
    calculator.iterativeSuperposition()

    return list(
        calc_rmsf_of_cluster(fitting_coordinates_of_this_cluster, cluster))
Пример #6
0
def coords_rmsf(ca_coords):
    calculator = RMSDCalculator(calculatorType="QTRFIT_OMP_CALCULATOR",
                                fittingCoordsets=ca_coords)

    calculator.setNumberOfOpenMPThreads(4)

    new_ca_coords = calculator.iterativeSuperposition()

    # Calculate the actual rmsf
    mean_conformation = new_ca_coords.mean(0)

    ssqf = numpy.zeros(mean_conformation.shape)

    for conf in new_ca_coords:
        ssqf += (conf - mean_conformation)**2

    return (ssqf.sum(1) / new_ca_coords.shape[0])**0.5
Пример #7
0
def superimpose_coordinates(all_coordsets, iterpose=True):
    all_superimposed_coordsets = []
    for coordsets in all_coordsets:
        calculator = RMSDCalculator(calculatorType="QTRFIT_OMP_CALCULATOR",
                                    fittingCoordsets=coordsets)
        calculator.setNumberOfOpenMPThreads(4)

        if iterpose:
            print "\t- Using iterposition on trajectory (shape ", coordsets.shape, ")"
            calculator.iterativeSuperposition()
            all_superimposed_coordsets.append(coordsets)
        else:
            print "\t- Superimposing with first trajectory frame (shape ", coordsets.shape, ")"
            _, superimposed_coordsets = calculator.oneVsTheOthers(
                0, get_superposed_coordinates=True)
            all_superimposed_coordsets.append(superimposed_coordsets)
    return all_superimposed_coordsets
def process_after_perturb_max_and_mean_disp(data):
    number_of_sets = len(data["coords_before"])
    num_coords = len(data["coords_before"][0])
    
    coordsets_before = numpy.array(data["coords_before"])
    coordsets_before = numpy.reshape(coordsets_before, (number_of_sets, num_coords/3, 3))
    coordsets_after = numpy.array(data["coords_after"])
    coordsets_after = numpy.reshape(coordsets_after, (number_of_sets, num_coords/3, 3))
    
    superimposed_translations = []
    for i in range(number_of_sets):
        coords = numpy.array([coordsets_before[i], coordsets_after[i]])
        
        calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR",
                                    fittingCoordsets = coords)
        _, rearranged_coords = calculator.oneVsTheOthers(0, get_superposed_coordinates = True)
        superimposed_translations.append(rearranged_coords[1]-rearranged_coords[0])
    translations = numpy.array(superimposed_translations)
    norms = numpy.array([norm(t) for t in translations])
    return numpy.max(norms, axis = 1), numpy.mean(norms, axis = 1)
            coords = [coord for coord in atom.coord]
            structure_coords.append(numpy.array(coords))
        coordinates.append(numpy.array(structure_coords))
    coordinates = numpy.array(coordinates, dtype=numpy.float64)

    # Computation with Biopython
    start_t = time.time()
    biopython_rmsd_values = []
    super_imposer = Bio.PDB.Superimposer()
    for i in range(0, len(structures) - 1):
        reference = structures[i]
        for j in range(i + 1, len(structures)):
            mobile = structures[j]
            super_imposer.set_atoms(list(reference.get_atoms()),
                                    list(mobile.get_atoms()))
            biopython_rmsd_values.append(super_imposer.rms)
    stop_t = time.time()
    print "Biopython's computation time:", stop_t - start_t

    # Computation with pyRMSD, Biopython uses KABSCH's
    start_t = time.time()
    pyrmsd_rmsd_values = RMSDCalculator(
        coordinates, "KABSCH_SERIAL_CALCULATOR").pairwiseRMSDMatrix()
    stop_t = time.time()
    print "pyRMSD's computation time:", stop_t - start_t

    # Comparison
    numpy.testing.assert_array_almost_equal(biopython_rmsd_values,
                                            pyrmsd_rmsd_values, 12)
    print "Done"
Пример #10
0
    def build(cls, data_handler, matrix_creation_parameters):
        """
        Generates a matrix with the method used in the handler creation.

        @param trajectory_handler:
        @param matrix_creation_parameters:

        @return: The created matrix.
        """
        calculator_type = matrix_creation_parameters.get_value(
            "calculator_type", default_value="QTRFIT_OMP_CALCULATOR")

        calculator_options = matrix_creation_parameters.get_value(
            "calculator_options",
            default_value=ProtocolParameters({
                "number_of_threads": 8,
                "blocks_per_grid": 8,
                "threads_per_block": 32
            }))
        calculator_options = ProtocolParameters(calculator_options)

        structure = data_handler.get_data()
        fit_selection_coordsets = structure.getFittingCoordinates()
        calc_selection_coordsets = structure.getCalculationCoordinates()

        if calc_selection_coordsets is None:
            calculator = RMSDCalculator(
                calculatorType=calculator_type,
                fittingCoordsets=fit_selection_coordsets)
        else:
            symm_groups = []
            if "symmetries" in matrix_creation_parameters:
                # Then prepare it to handle calculation symmetries
                # Description of equivalences must have the same number of atoms
                symm_groups = cls.process_symm_groups(
                    matrix_creation_parameters, structure,
                    calc_selection_coordsets)
                print "Using symmetries", symm_groups

            calculator = RMSDCalculator(
                calculatorType=calculator_type,
                fittingCoordsets=fit_selection_coordsets,
                calculationCoordsets=calc_selection_coordsets,
                calcSymmetryGroups=symm_groups)

        try:
            calculator.setNumberOfOpenMPThreads(
                calculator_options.get_value("number_of_threads",
                                             default_value=8))
        except KeyError:
            pass

        try:
            calculator.setCUDAKernelThreadsPerBlock(
                calculator_options.get_value("threads_per_block",
                                             default_value=32),
                calculator_options.get_value("blocks_per_grid",
                                             default_value=8))
        except KeyError:
            pass

        rmsds = calculator.pairwiseRMSDMatrix()
        return CondensedMatrix(rmsds)
Пример #11
0
def print_matrix(input_coordsets, output):
    # Generate the matrix and print it
    calculator = RMSDCalculator(calculatorType="QCP_OMP_CALCULATOR",
                                fittingCoordsets=input_coordsets)
    matrixToImage(CondensedMatrix(calculator.pairwiseRMSDMatrix()),
                  output + ".png")
Пример #12
0
        open("data/expect_script",
             "r").readlines())) % (TRAJECTORY_FILE, TRAJECTORY_FILE)
    open("data/expect_script_tmp", "w").write(expect_script_str)
    # Use 'expect' to spawn the program
    start_t = time.time()
    subprocess.call(["expect", "data/expect_script_tmp"])
    # Load the matrix
    g_rmsd_matrix = XPMConverter().convert(open("data/matrix.xpm", "r"))
    stop_t = time.time()
    print "g_rms's computation time:", stop_t - start_t
    os.system("rm data/matrix.xpm data/rmsd.xvg data/expect_script_tmp")

    # Computation with pyRMSD, g_rms uses KABSCH's
    start_t = time.time()
    coordinates = Reader().readThisFile(TRAJECTORY_FILE).read()
    RMSDCalculator(coordinates, "KABSCH_SERIAL_CALCULATOR").oneVsFollowing(
        0)  # This is to mimic g_rmsd pipeline
    pyrmsd_rmsd_values = RMSDCalculator(
        coordinates, "QTRFIT_SERIAL_CALCULATOR").pairwiseRMSDMatrix()
    stop_t = time.time()
    print "pyRMSD's computation time:", stop_t - start_t

    # Convert g_rmsd matrix to 'condensed'
    dim = len(g_rmsd_matrix)
    c_m_values = []
    for i in range(dim - 1):
        for j in range(i + 1, dim):
            c_m_values.append(g_rmsd_matrix[i][j])

    rmsd = numpy.sqrt((
        (numpy.array(c_m_values) - numpy.array(pyrmsd_rmsd_values))**2).sum() /
                      len(c_m_values))
Пример #13
0
    sequences = load_sequences(options.input + ".seq")
    original_coords = numpy.copy(coords[options.ref])
    original_sequence = sequences[options.ref]

    # Extract the coordinates we know
    known_residues = get_seq_positions_with_known_residues(sequences)

    # Extraer bien las coordenadas
    known_coords = extract_coordinates_from_known_residues(
        known_residues, coords)

    # Do an iterative superposition of that coordinates, but move all coordinates
    known_coords = numpy.reshape(
        known_coords, (known_coords.shape[0], known_coords.shape[1] / 3, 3))
    coords = numpy.reshape(coords, (coords.shape[0], coords.shape[1] / 3, 3))
    calculator = RMSDCalculator("QTRFIT_SERIAL_CALCULATOR", known_coords,
                                coords)
    calculator.iterativeSuperposition()

    # Reshape iterposed coordinates
    coords = numpy.reshape(coords, (coords.shape[0], coords.shape[1] * 3))

    # Calculate known coordinates mean
    known_mean_coords = calc_mean_of_known_atoms(sequences, coords)

    # Change unknown coordinates by mean
    change_unknown_by_mean(sequences, coords, known_mean_coords)
    numpy.savetxt("coords_mean", coords, fmt="%.4f")

    # Recalc mean for all values
    recalcd_mean = numpy.mean(coords, axis=0)
    numpy.savetxt("mean", recalcd_mean, fmt="%.4f")
Пример #14
0
        result = numpy.array(tmp_result)
        if options.skip_step:
            # skip first number
            result = result[:, 1:]

    if options.plot_type == "rmsd":
        v1, v2 = needs_two_files(options.input1, options.input2)
        v1, v2 = v1[:, 1:], v2[:, 1:]
        v1, v2 = can_have_different_numberof_rows(v1, v2)

        result = []
        for i in range(len(v1)):
            coordset1 = numpy.resize(v1[i], (len(v1[i]) / 3, 3))
            coordset2 = numpy.resize(v2[i], (len(v2[i]) / 3, 3))
            coordsets = numpy.array([coordset1, coordset2])
            calculator = RMSDCalculator("QCP_SERIAL_CALCULATOR", coordsets)
            result.append(calculator.pairwise(0, 1))

    elif options.plot_type == "normal":
        result = needs_one_file(options.input1)

        if options.skip_step:
            # skip first number
            result = result[:, 1:]

    if options.plot_type in ["ccdist", "absdiff", "diff", "normal"]:
        number_of_plots = options.to - options._from + 1
        subplot_shape = (number_of_plots, 1)
        plt.title('----')
        for i in range(number_of_plots):
            ax = plt.subplot2grid(subplot_shape, (i, 0))