示例#1
0
    def write_superposed_pdbs(self, output_pdb_folder, alignments: dict = None):
        """
        Superposes PDBs according to alignment and writes transformed PDBs to files
        (View with Pymol)

        Parameters
        ----------
        alignments
        output_pdb_folder
        """
        if alignments is None:
            alignments = self.alignment
        output_pdb_folder = Path(output_pdb_folder)
        if not output_pdb_folder.exists():
            output_pdb_folder.mkdir()
        reference_name = self.structures[0].name
        reference_pdb = pd.parsePDB(
            str(self.output_folder / f"cleaned_pdb/{self.structures[0].name}.pdb")
        )
        core_indices = np.array(
            [
                i
                for i in range(len(alignments[reference_name]))
                if -1 not in [alignments[n][i] for n in alignments]
            ]
        )
        aln_ref = alignments[reference_name]
        ref_coords_core = (
            reference_pdb[helper.get_alpha_indices(reference_pdb)]
            .getCoords()
            .astype(np.float64)[np.array([aln_ref[c] for c in core_indices])]
        )
        ref_centroid = helper.nb_mean_axis_0(ref_coords_core)
        ref_coords_core -= ref_centroid
        transformation = pd.Transformation(np.eye(3), -ref_centroid)
        reference_pdb = pd.applyTransformation(transformation, reference_pdb)
        pd.writePDB(str(output_pdb_folder / f"{reference_name}.pdb"), reference_pdb)
        for i in range(1, len(self.structures)):
            name = self.structures[i].name
            pdb = pd.parsePDB(
                str(self.output_folder / f"cleaned_pdb/{self.structures[i].name}.pdb")
            )
            aln_name = alignments[name]
            common_coords_2 = (
                pdb[helper.get_alpha_indices(pdb)]
                .getCoords()
                .astype(np.float64)[np.array([aln_name[c] for c in core_indices])]
            )
            (
                rotation_matrix,
                translation_matrix,
            ) = superposition_functions.svd_superimpose(
                ref_coords_core, common_coords_2
            )
            transformation = pd.Transformation(rotation_matrix.T, translation_matrix)
            pdb = pd.applyTransformation(transformation, pdb)
            pd.writePDB(str(output_pdb_folder / f"{name}.pdb"), pdb)
示例#2
0
    def write_superposed_pdbs_reference(self, output_pdb_folder, alignments):
        """
        Superposes PDBs according to reference structure and writes transformed PDBs to files
        (View with Pymol)

        Parameters
        ----------
        alignments
        output_pdb_folder
        """
        reference_name = self.structures[self.reference_structure_index].name
        reference_pdb = pd.parsePDB(
            str(self.output_folder /
                f"cleaned_pdb/{self.structures[self.reference_structure_index].name}.pdb"
                ))
        aln_ref = alignments[reference_name]
        reference_coords = (reference_pdb[helper.get_alpha_indices(
            reference_pdb)].getCoords().astype(np.float64))
        pd.writePDB(str(output_pdb_folder / f"{reference_name}.pdb"),
                    reference_pdb)
        for i in range(1, len(self.structures)):
            name = self.structures[i].name
            pdb = pd.parsePDB(
                str(self.output_folder /
                    f"cleaned_pdb/{self.structures[i].name}.pdb"))
            aln_name = alignments[name]
            common_coords_1, common_coords_2 = get_common_coordinates(
                reference_coords,
                pdb[helper.get_alpha_indices(pdb)].getCoords().astype(
                    np.float64),
                aln_ref,
                aln_name,
            )
            (
                rotation_matrix,
                translation_matrix,
            ) = superposition_functions.svd_superimpose(
                common_coords_1, common_coords_2)
            transformation = pd.Transformation(rotation_matrix.T,
                                               translation_matrix)
            pdb = pd.applyTransformation(transformation, pdb)
            pd.writePDB(str(output_pdb_folder / f"{name}.pdb"), pdb)
示例#3
0
    def apply_transformation(self, pdb_file, ligand_resnum,
                             target_fragment_atom_serials,
                             transformation_matrix):
        """
        Apply transformation to the target ligand-protein complex.

        Also considering:
        * Only work with residues with CA within 8A of ligand
        * Write all transformed PDBs to a new working directory?

        :param transformation_matrix: 
        :param target_pdb: 
        :return: 
        """
        # Only work with residues within 12A of target ligand
        target_pdb = prody.parsePDB(pdb_file)
        target_shell = target_pdb.select(
            '(protein and within 12 of (serial {0}) and not resnum {1}) or (serial {0})'
            .format(' '.join(target_fragment_atom_serials), ligand_resnum))

        transformed_pdb = prody.applyTransformation(transformation_matrix,
                                                    target_shell)

        return transformed_pdb
示例#4
0
        args = get_arguments()

        protein = parsePDB(args.pdb_structure)
        protein = protein.select('protein').copy()
        logger.info('%s loaded' % args.pdb_structure)

        if args.to_center:
            logger.info('Moving original structure to the center')
            moveAtoms(protein, to=np.zeros(3), ag=True)

        if args.random_rotation:
            logger.info('Rotating the structure randomly')
            random_rotation_matrix = get_affine(get_random_rotation_matrix())
            random_rotation = Transformation(random_rotation_matrix)
            applyTransformation(random_rotation, protein)

        ca_atoms = protein.select('protein and name CA')
        protein_anm = ANM('%s ca' % args.structure_name)
        protein_anm.buildHessian(ca_atoms)
        protein_anm.calcModes(n_modes=args.normal_modes)

        print 'Normal modes calculated'
        
        protein_anm_ext, protein_all = extendModel(protein_anm, ca_atoms, protein, norm=True)

        print 'Normal modes extended'
        if args.save_models:
            saveAtoms(protein, args.structure_name)
            saveModel(protein_anm, args.structure_name)
            saveModel(protein_anm_ext, args.structure_name)
示例#5
0
def score_interaction_and_dump(parsed, ifgresn, vdmresn, ifg_contact_atoms,
                               vdm_contact_atoms, method, targetresi, cutoff,
                               pdbix, pdbname):
    cutoff = float(cutoff)
    ifgtype, vdmtype, ifginfo, vdminfo = get_ifg_vdm(parsed, ifgresn, vdmresn,
                                                     ifg_contact_atoms,
                                                     vdm_contact_atoms, method)

    if ifgtype[1] != ['N', 'CA', 'C'] and ifgtype[1] != ['CA', 'C', 'O']:
        ifgresn = constants.AAname_rev[ifgtype[0]]
        vdmresn = constants.AAname_rev[vdmtype[0]]
        ifgatoms = ifgtype[1]
        vdmatoms = vdmtype[1]

        # filter for only vdmresn vdms of ifgresn with ifgatoms
        # and vdmatoms directly involved in interactions
        num_all_vdms, lookupdf = filter_contact(ifgresn, vdmresn, ifgatoms,
                                                vdmatoms)
        query = []
        for atom in ifgatoms:
            query.append(
                parsed.select('chain {} and resnum {} and name {}'.format(
                    ifginfo[0], ifginfo[1], atom)).getCoords()[0])
        for atom in vdmatoms:
            query.append(
                parsed.select('chain {} and resnum {} and name {}'.format(
                    vdminfo[0], vdminfo[1], atom)).getCoords()[0])

        query = np.array(query)
        lookupcoords = pkl.load(
            open(
                '/home/gpu/Sophia/combs/st_wd/Lookups/refinedvdms/coords_of_{}.pkl'
                .format(ifgtype[0]), 'rb'))
        #lookupcoords = lookupcoords[:50] # delete

        ifglists = flip(ifgatoms, ifgresn)
        vdmlists = flip(vdmatoms, vdmresn)
        rmsds = []
        num_atoms = len(query)
        coords_ls = [
            item for item in lookupcoords if item[0] in lookupdf.index
        ]
        lookupatoms_to_clus = []
        counter = 0  # to keep count of how many pdbs are being output
        for item in coords_ls:
            if len(item) == 3:
                compare_rmsds = []
                ifg_vdm_ind = []
                for ifg_ind, ifgls in enumerate(ifglists):
                    for vdm_ind, vdmls in enumerate(vdmlists):
                        lookupatoms = get_order_of_atoms(
                            item, ifgresn, vdmresn, ifgls, vdmls)
                        moved, transf = pr.superpose(lookupatoms, query)
                        temp_rmsd = pr.calcRMSD(moved, query)
                        compare_rmsds.append(temp_rmsd)
                        ifg_vdm_ind.append([moved, temp_rmsd])
                # item[0] is df index
                rmsds.append([item[0], min(compare_rmsds)])
                # get index of which one had min rmsd
                for which_ind, each in enumerate(ifg_vdm_ind):
                    if each[1] == min(compare_rmsds):
                        lookupatoms_to_clus.append(each[0])
                        ########################################################################
                        #                   output pdb if low rmsd
                        ########################################################################
                        if each[1] < cutoff and counter < 30 and which_ind == 0:
                            # this is to ensure rmsd is below cutoff when not flipped
                            # bc don't want to take care of that in prody to output pdb
                            row = lookupdf.loc[item[0]]
                            try:
                                db_dir = '/home/gpu/Sophia/STcombs/20171118/database/reduce/'
                                par = pr.parsePDB(db_dir + row['pdb'] +
                                                  'H.pdb')
                            except:
                                db_dir = '/home/gpu/Sophia/combs/st_wd/20180207_db_molprobity_biolassem/'
                                par = pr.parsePDB(db_dir + row['pdb'] +
                                                  'H.pdb')

                            ifgchid, ifgresnum = row['chid_ifg'], row[
                                'resnum_ifg']
                            vdmchid, vdmresnum = row['chid_vdm'], row[
                                'resnum_vdm']
                            printout = copy.deepcopy(par)
                            printout = printout.select(
                                '(chain {} and resnum {}) or (chain {} and resnum {})'
                                .format(ifgchid, ifgresnum, vdmchid,
                                        vdmresnum))
                            printout.select('chain {} and resnum {}'.format(
                                ifgchid, ifgresnum)).setChids('Y')
                            printout.select('chain {} and resnum {}'.format(
                                vdmchid, vdmresnum)).setChids('X')
                            printout.select('all').setResnums(10)
                            printout_interactamer = []
                            integrin_interactamer = []
                            try:  # skip the ones that have segment ids. will prob need to update this
                                # for the newly combed stuff
                                for atom in ifgatoms:
                                    integrin_interactamer.append(
                                        parsed.select(
                                            'chain {} and resnum {} and name {}'
                                            .format(ifginfo[0], ifginfo[1],
                                                    atom)))
                                    printout_interactamer.append(
                                        printout.select(
                                            'chain Y and resnum 10 and name {}'
                                            .format(atom)))
                                for atom in vdmatoms:
                                    integrin_interactamer.append(
                                        parsed.select(
                                            'chain {} and resnum {} and name {}'
                                            .format(vdminfo[0], vdminfo[1],
                                                    atom)))
                                    printout_interactamer.append(
                                        printout.select(
                                            'chain X and resnum 10 and name {}'
                                            .format(atom)))
                                integrin_interactamer_prody = []

                                integrin_interactamer = sum(
                                    integrin_interactamer[1:],
                                    integrin_interactamer[0])
                                printout_interactamer = sum(
                                    printout_interactamer[1:],
                                    printout_interactamer[0])
                                try:
                                    assert len(integrin_interactamer) == len(
                                        printout_interactamer)

                                    interact_res = printout.select(
                                        '(chain X and resnum 10) or (chain Y and resnum 10)'
                                    )
                                    interactamer_transf = pr.applyTransformation(
                                        transf, printout_interactamer)
                                    outdir = './output_data/pdbfiles/'

                                    threecode = constants.AAname[ifgresn]

                                    pr.writePDB(
                                        outdir +
                                        '{}_{}_{}_{}{}_{}{}_{}_{}'.format(
                                            pdbix, pdbname, targetresi,
                                            ifginfo[1], ifgresn, vdminfo[1],
                                            vdmresn, cutoff, row.name),
                                        interactamer_transf)
                                    counter += 1
                                except:
                                    pass
                            except:
                                traceback.print_exc()
                                pass

            else:
                rmsds.append([int(item[0]), 100000])

        # count how many NNs the query intrxn has
        num_nn, norm_metrics = get_NN(lookupatoms_to_clus, num_atoms, rmsds,
                                      query, cutoff, num_all_vdms)
        print('num NN')
        print(num_nn)

        exp_list = norm_metrics[-1]
        print('======= FOR NEAREST NEIGHBORS ==========')
        print('avg with single')
        print(exp_list[0])
        print('avg without single')
        print(exp_list[1])
        print('median with single')
        print(exp_list[2])
        print('median without single')
        print(exp_list[3])

        # do greedy clustering
        D = make_pairwise_rmsd_mat(
            np.array(lookupatoms_to_clus).astype('float32'))
        D = make_square(D)
        adj_mat = make_adj_mat(D, 0.5)
        mems, centroids = greedy(adj_mat)
        print('======= FOR GREEDY CLUS ==========')
        print('avg with singletons')
        print(np.mean([len(x) for x in mems]))
        print('avg without singletons')
        print(np.mean([len(x) for x in mems if len(x) > 1]))
        print('median with singletons')
        print(np.median([len(x) for x in mems]))
        print('median without singletons')
        print(np.median([len(x) for x in mems if len(x) > 1]))


        return ifginfo[0], ifginfo[1], ifgresn, vdminfo[0], vdminfo[1],\
            vdmresn, ifgatoms, vdmatoms, num_nn, norm_metrics
示例#6
0
    def alignment_monstrosity(self,
                              rmsd_cutoff=0.5,
                              use_local_pdb_database=False,
                              verify_substructure=True):
        """
        Consequences of not thinking ahead...
        For each fragment, align all fragment-containing ligands to fragment
        Generate PDBs with aligned coordinate systems
        :param args:
        :param rmsd_cutoff: fragment alignment RMSD cutoff, anything higher gets rejected
        :return:
        """

        # Create directory for processed PDBs
        rejected_dict = self.load_previously_rejected_pdbs()

        # Create directories...
        if not use_local_pdb_database:
            os.makedirs(self.pdb_bank_dir, exist_ok=True)
        os.makedirs(self.processed_PDBs_path, exist_ok=True)

        # If use_local_pdb_database=False, use PDB FTP to download all structures
        # Otherwise, all relevant structures should be found in the local PDB database
        if not use_local_pdb_database:
            prody.pathPDBFolder(folder=self.pdb_bank_dir)

            for current_fragment in self.pdb_ligand_json:

                # Only download PDBs that aren't already in PDB bank directory
                existing_PDBs = [
                    pdb[:4].lower() for pdb in os.listdir(self.pdb_bank_dir)
                ]
                PDBs_to_download = list(
                    set(self.pdb_ligand_json[current_fragment]['PDBs']) -
                    set(existing_PDBs))

                if len(PDBs_to_download) > 0:
                    print(f'Downloading PDBs for {current_fragment}...\n')
                    prody.fetchPDBviaFTP(*PDBs_to_download)
                else:
                    print(
                        f'All relevant PDBs for {current_fragment} found in {self.pdb_bank_dir}!\n'
                    )

        # Fragment_1, Fragment_2, ...
        for current_fragment in self.pdb_ligand_json:

            # Create directory for processed PDBs
            processed_dir = os.path.join(self.processed_PDBs_path,
                                         current_fragment)
            processed_dir_exists = os.path.exists(processed_dir)
            os.makedirs(processed_dir, exist_ok=True)

            # Get list of already processed PDBs for current_fragment
            already_processed_pdbs = [
                file[:4].lower() for file in os.listdir(processed_dir)
            ]

            # Save ideal_ligand_containers for each fragment so things are only downloaded once
            ideal_ligand_dict = dict()
            ideal_ligand_dict['Ligands'] = dict()
            ideal_ligand_dict['Failed'] = list()

            # Align_PDB class holds all information for the current fragment
            align = Align_PDB(self.user_defined_dir,
                              current_fragment,
                              self.sanitized_smiles_dict[current_fragment],
                              verify_substructure=verify_substructure)

            # Get PDB IDs that are viable for extracting protein-fragment contacts
            reject_pdbs = rejected_dict[
                current_fragment] if current_fragment in rejected_dict.keys(
                ) else list()
            if not processed_dir_exists:
                reject_pdbs = list()
            reject_pdbs.append('3k87')  # DEBUGGING

            viable_pdbs = list(
                set(self.pdb_ligand_json[current_fragment]['PDBs']) -
                set(reject_pdbs) - set(already_processed_pdbs))

            # For each PDB containing a fragment-containing compound
            for pdbid in viable_pdbs:

                # Return path of PDB file to use for processing
                found_pdb, pdb_path = self.return_PDB_to_use_for_alignments(
                    pdbid, use_local_pdb_database=use_local_pdb_database)

                if not found_pdb:
                    print(f'Cannot find {pdbid}!')
                    continue

                # Proceed with processing if the current PDB passes all filters
                print("\n\nProcessing {}...".format(pdbid))

                # --- Check which ligands contain relevant fragments --- #

                relevant_ligands = self.return_substructure_containing_ligands(
                    pdb_path, self.pdb_ligand_json, current_fragment)

                # Set things up! Get ligands from Ligand Expo if haven't already tried and failed
                for ligand in relevant_ligands:

                    if not ideal_ligand_dict['Ligands'].get(
                            ligand
                    ) and ligand not in ideal_ligand_dict['Failed']:
                        ideal_ligand_container = Ideal_Ligand_PDB_Container(
                            ligand)

                        if ideal_ligand_container.success:
                            ideal_ligand_dict['Ligands'][
                                ligand] = ideal_ligand_container
                        else:
                            ideal_ligand_dict['Failed'].append(ligand)

                # Create a temp list for ligands that will be pulled from the current PDB
                ligand_container_dict_for_current_pdb = {
                    lig: ideal_ligand_dict['Ligands'][lig]
                    for lig in ideal_ligand_dict['Ligands']
                    if lig in relevant_ligands
                }
                relevant_ligands_prody_dict = align.extract_ligand_records(
                    pdb_path, ligand_container_dict_for_current_pdb)

                # Reject if no ligands with all atoms represented can be found for the given PDB
                if len(relevant_ligands_prody_dict) < 1:
                    if current_fragment in rejected_dict.keys():
                        rejected_dict[current_fragment].append(pdbid)
                    else:
                        rejected_dict[current_fragment] = [pdbid]
                    print(
                        'REJECTED - no target ligands were fully represented in the PDB'
                    )
                    continue

                # --- Perform alignment of PDB fragment substructure (mobile) onto defined fragment (target) --- #

                # ...if PDB has not been processed, rejected, or excluded by the user

                else:

                    # Iterate over ligands found to contain fragments as substructures
                    for ligand_resname, ligand_chain, ligand_resnum in relevant_ligands_prody_dict:

                        # Mapping of fragment atoms to target ligand atoms
                        target_ligand_ideal_smiles = ligand_container_dict_for_current_pdb[
                            ligand_resname].smiles

                        # todo: catch ligands with missing SMILES strings earlier...
                        if target_ligand_ideal_smiles is None:
                            continue

                        target_ligand_pdb_string = io.StringIO()
                        target_ligand_prody = relevant_ligands_prody_dict[(
                            ligand_resname, ligand_chain,
                            ligand_resnum)].select('not hydrogen')
                        prody.writePDBStream(target_ligand_pdb_string,
                                             target_ligand_prody)

                        mapping_successful, fragment_target_map = align.fragment_target_mapping(
                            target_ligand_ideal_smiles,
                            target_ligand_pdb_string)

                        if not mapping_successful:
                            if current_fragment in rejected_dict.keys():
                                rejected_dict[current_fragment].append(pdbid)
                            else:
                                rejected_dict[current_fragment] = [pdbid]
                            print(
                                'REJECTED - failed atom mapping between target and reference fragment'
                            )
                            continue

                        print(
                            f'\n{len(fragment_target_map)} possible mapping(s) of fragment onto {pdbid}:{ligand} found...\n'
                        )

                        # Iterate over possible mappings of fragment onto current ligand
                        rmsd_success = False
                        for count, mapping in enumerate(fragment_target_map):

                            # todo: refactor to use RDKit's atom.GetMonomerInfo() for atom selections...
                            # Determine translation vector and rotation matrix
                            target_coords_and_serials, frag_atom_coords, transformation_matrix = align.determine_rotation_and_translation(
                                mapping, target_ligand_prody)
                            trgt_atom_coords, target_fragment_atom_serials = target_coords_and_serials

                            # Apply transformation to protein_ligand complex if rmsd if below cutoff
                            # Use information from PubChem fragment SMILES in determining correct mappings
                            # Actually, map fragment onto source ligand and use valence information to determine correct mappings
                            rmsd = prody.calcRMSD(
                                frag_atom_coords,
                                prody.applyTransformation(
                                    transformation_matrix, trgt_atom_coords))
                            print(
                                'RMSD of target onto reference fragment:\t{}'.
                                format(rmsd))

                            if rmsd < rmsd_cutoff:
                                transformed_pdb = align.apply_transformation(
                                    pdb_path, ligand_resnum,
                                    target_fragment_atom_serials,
                                    transformation_matrix)

                                # Continue if transformed_pdb - ligand is None
                                if transformed_pdb.select(
                                        f'not (resname {ligand_resname})'
                                ) is None:
                                    continue

                                transformed_pdb_name = f'{pdbid}_{ligand_resname}_{ligand_chain}_{ligand_resnum}-{count}.pdb'
                                prody.writePDB(
                                    os.path.join(processed_dir,
                                                 transformed_pdb_name),
                                    transformed_pdb)
                                rmsd_success = True

                            else:
                                print(
                                    'REJECTED - high RMSD upon alignment to reference fragment'
                                )

                        if rmsd_success is False:
                            if current_fragment in rejected_dict.keys():
                                rejected_dict[current_fragment].append(pdbid)
                            else:
                                rejected_dict[current_fragment] = [pdbid]

        # Remember rejected PDBs
        with open(self.rejected_dict_pickle, 'wb') as reject_pickle:
            pickle.dump(rejected_dict, reject_pickle)