Пример #1
0
def get_residue_name(imol, res_spec):
    return coot.residue_name(imol, rsu.residue_spec_to_chain_id(res_spec), rsu.residue_spec_to_res_no(res_spec), "")
Пример #2
0
    def find_the_sites(self, file_name_comp_id_list):

        # main line
        #
        coords_with_spec = []

        for fn_comp_id in file_name_comp_id_list:
            fn = fn_comp_id[0]
            comp_id = fn_comp_id[1]
            imol = coot.handle_read_draw_molecule_with_recentre(fn_comp_id[0], 0)
            # what are the residue specs for the given comp_ids?
            residue_specs = coot.get_residue_specs_in_mol_py(imol, comp_id)
            print fn, residue_specs

            for spec in residue_specs:
                # centre = residue_centre_from_spec_py(imol, spec)
                chain_id = rsu.residue_spec_to_chain_id(spec)
                res_no = rsu.residue_spec_to_res_no(spec)
                ins_code = ""

                res_info = coot.residue_info_py(imol, chain_id, res_no, ins_code)

                for atom in res_info:
                    coords_with_spec.append([rsu.residue_atom_to_position(atom), imol, spec])

        # print coords_with_spec

        # now cluster coords. There will be 1 (usually), maybe 2 possibly 3 sites

        if len(coords_with_spec) < 3:

            return False

        else:

            coords = [x[0] for x in coords_with_spec]
            positions_np = np.array(coords)
            n_components = self.optimize_n(positions_np, len(positions_np))
            print "optimize_n for sites::::::::::::", n_components
            dpgmm = mixture.GMM(n_components, covariance_type="full", n_iter=40)
            dpgmm.fit(positions_np)

            cluster_assignments = dpgmm.predict(positions_np)
            means = dpgmm.means_
            weights = dpgmm.weights_

            print cluster_assignments
            print means
            print weights

            print "cluster_assignments", cluster_assignments

            merge_map = self.find_mergeable_clusters(means, weights)
            # which key (i.e. cluster index) has the most number of other clusters
            # that can be merged in?
            #
            # convert to a list of ints (not <type 'numpy.int64'>) (because, on decoding Python->C++ object
            # we do a PyInt_Check for the site_idx (and a <type 'numpy.int64'> fails that test)
            #
            new_cluster_assignments = [int(x) for x in self.merge_clusters(cluster_assignments, merge_map)]
            print "new cluster_assignments", new_cluster_assignments

            specs = [x[1:] for x in coords_with_spec]
            cluster_assignments_with_specs = zip(new_cluster_assignments, specs)

            sites = coot.chemical_feature_clusters_accept_site_clusters_info_py(cluster_assignments_with_specs)

            # show me them
            if True:  # debug
                o = coot.new_generic_object_number("site clusters")
                for mean in means:
                    cluster_star_obj(o, mean, 2, 2)
                # coot.set_display_generic_object(o, 1) this is for debugging

            self.sites = sites
Пример #3
0
def cfc_process_site(site_number, imol_ligand_specs, first_ligand_spec):

    imol_first = imol_ligand_specs[0][0]  # others are lsq fitted to this

    env_residue_specs = coot.residues_near_residue_py(imol_first, first_ligand_spec, 6)

    protein_res_specs = [r for r in env_residue_specs if get_residue_name(imol_first, r) != "HOH"]

    # only lsq the first (0th) one - that one has the most ligands in the site
    #
    if site_number == 0:
        print "DEBUG:: protein_res_specs (for lsqing):"
        for spec in protein_res_specs:
            print "   ", spec, get_residue_name(imol_first, spec)

        for res_spec in protein_res_specs:
            chain_id = rsu.residue_spec_to_chain_id(res_spec)
            res_no = rsu.residue_spec_to_res_no(res_spec)
            coot.add_lsq_match(res_no, res_no, chain_id, res_no, res_no, chain_id, 1)

        for imol in imol_ligand_specs[1:]:  # lsq fit others to the first in the list
            coot.apply_lsq_matches_py(imol_first, imol[0])

        ligand_centre = coot.residue_centre_py(
            imol_first,
            rsu.residue_spec_to_chain_id(first_ligand_spec),
            rsu.residue_spec_to_res_no(first_ligand_spec),
            "",
        )
        coot.set_go_to_atom_molecule(imol_first)
        coot.set_rotation_centre(*ligand_centre)

    combo_list = []
    try:

        # we have a large radius for the water selection
        radius = 10  # water must be within radius of it's own ligand
        radius_2 = 5  # water must be with radius_2 of any ligand atom (not just its own)

        combo_list = coot.chemical_feature_clusters_py(env_residue_specs, imol_ligand_specs, radius, radius_2)
    except TypeError as e:
        print e

        # the rest is unlikely to work if we get here

    if True:

        water_position_list = combo_list[0]
        chemical_feature_list = combo_list[1]
        # residues_sidechains_list = combo_list[1]

        # ----------- handle waters -----------

        w_positions_list = []

        for item in [wat[2] for wat in water_position_list]:
            w_positions_list.append(item)

        for item in [wat[2] for wat in water_position_list]:
            delta = 0.1
            p1 = [item[0], item[1], item[2] + delta]
            p2 = [item[0], item[1], item[2] - delta]
            p3 = [item[0], item[1] + delta, item[2]]
            p4 = [item[0], item[1] - delta, item[2]]
            p5 = [item[0] + delta, item[1], item[2]]
            p6 = [item[0] - delta, item[1], item[2]]

            w_positions_list.append(p1)
            w_positions_list.append(p2)
            w_positions_list.append(p3)
            w_positions_list.append(p4)
            w_positions_list.append(p5)
            w_positions_list.append(p6)

        w_positions_np = np.array(w_positions_list)

        # move these to the origin
        # w_positions_np = w_positions_np_at_ligand
        # for pos in w_positions_np:
        #     pos -= np.array(ligand_centre)

        # dpgmm = mixture.DPGMM(n_components=25, covariance_type='spherical', alpha=1.101,
        #                       n_iter=40000, params='wmc', init_params='wmc', tol=1e-4,
        #                       verbose=0)
        #
        # the number of clusters is highly related to the dist_cutoff (the
        # distance of an accepted water atom to any any atom in any of the
        # ligands = currently 4.2)
        #
        gmm, cluster_assignments = cluster_and_display_waters(site_number, w_positions_np)

        means = gmm.means_
        cvs = gmm._get_covars()
        weights = gmm.weights_

        print "water means:"
        for mean in means:
            print "   ", mean

        # each water has been assigned a cluster, that is the cluster_assignments
        #
        # need to convert the array cluster_assignments to a list of items:
        #   [imol water_residue_spec cluster_number]
        #
        water_cluster_info_for_input = []
        for i, water_pos in enumerate(water_position_list):
            # print water_pos, cluster_assignments[i]
            item = [water_pos[0], water_pos[1], cluster_assignments[i]]
            water_cluster_info_for_input.append(item)

        # cluster_info is a list of
        #  list of water cluster info
        #      list of [mean, weight, length]  where length is the eigenvalue v[0],
        #              (same as v[1], v[2] - all the same for spherical model)
        #      list of cluster predictions for then input positions
        #
        ci = zip([[l[0], l[1], l[2]] for l in means], weights, [cv[0][0] for cv in cvs])
        water_cluster_info = [ci, water_cluster_info_for_input]
        # give those results back to c++ so that we can use them for display
        #

        coot.set_display_generic_objects_as_solid(1)

        # ----------- handle chemical features -----------

        # make a dictionary from the list of chemical features
        chemical_features_dict = {}
        for item in chemical_feature_list:
            for type in ["Donor", "Acceptor", "Aromatic", "Hydrophobe", "LumpedHydrophobe"]:
                if item[0] == type:
                    try:
                        chemical_features_dict[type].append(item[1:])
                    except KeyError:
                        chemical_features_dict[type] = [item[1:]]

        chemical_feature_clusters_info = []
        for key in chemical_features_dict:
            # list of [type, features-annotated-by-cluster-number, cluster_means]
            clusters = cluster_and_display_chemical_features(site_number, key, chemical_features_dict[key])
            chemical_feature_clusters_info.append(clusters)

        # print 'water_cluster_info'
        # for wc in water_cluster_info:
        #    print wc

        cluster_info = [water_cluster_info, chemical_feature_clusters_info]

        coot.chemical_feature_clusters_accept_info_py(site_number, protein_res_specs, imol_ligand_specs, cluster_info)
Пример #4
0
def get_residue_name(imol, res_spec):
    return coot.residue_name(imol, rsu.residue_spec_to_chain_id(res_spec),
                             rsu.residue_spec_to_res_no(res_spec), "")
Пример #5
0
    def find_the_sites(self, file_name_comp_id_list):

        # main line
        #
        coords_with_spec = []

        for fn_comp_id in file_name_comp_id_list:
            fn = fn_comp_id[0]
            comp_id = fn_comp_id[1]
            imol = coot.handle_read_draw_molecule_with_recentre(
                fn_comp_id[0], 0)
            # what are the residue specs for the given comp_ids?
            residue_specs = coot.get_residue_specs_in_mol_py(imol, comp_id)
            print(fn, residue_specs)

            for spec in residue_specs:
                # centre = residue_centre_from_spec_py(imol, spec)
                chain_id = rsu.residue_spec_to_chain_id(spec)
                res_no = rsu.residue_spec_to_res_no(spec)
                ins_code = ''

                res_info = coot.residue_info_py(imol, chain_id, res_no,
                                                ins_code)

                for atom in res_info:
                    coords_with_spec.append(
                        [rsu.residue_atom_to_position(atom), imol, spec])

        # print coords_with_spec

        # now cluster coords. There will be 1 (usually), maybe 2 possibly 3 sites

        if len(coords_with_spec) < 3:

            return False

        else:

            coords = [x[0] for x in coords_with_spec]
            positions_np = np.array(coords)
            n_components = self.optimize_n(positions_np, len(positions_np))
            print("optimize_n for sites::::::::::::", n_components)
            dpgmm = mixture.GMM(n_components,
                                covariance_type='full',
                                n_iter=40)
            dpgmm.fit(positions_np)

            cluster_assignments = dpgmm.predict(positions_np)
            means = dpgmm.means_
            weights = dpgmm.weights_

            print(cluster_assignments)
            print(means)
            print(weights)

            print("cluster_assignments", cluster_assignments)

            merge_map = self.find_mergeable_clusters(means, weights)
            # which key (i.e. cluster index) has the most number of other clusters
            # that can be merged in?
            #
            # convert to a list of ints (not <type 'numpy.int64'>) (because, on decoding Python->C++ object
            # we do a PyInt_Check for the site_idx (and a <type 'numpy.int64'> fails that test)
            #
            new_cluster_assignments = [
                int(x)
                for x in self.merge_clusters(cluster_assignments, merge_map)
            ]
            print("new cluster_assignments", new_cluster_assignments)

            specs = [x[1:] for x in coords_with_spec]
            cluster_assignments_with_specs = zip(new_cluster_assignments,
                                                 specs)

            sites = coot.chemical_feature_clusters_accept_site_clusters_info_py(
                cluster_assignments_with_specs)

            # show me them
            if True:  # debug
                o = coot.new_generic_object_number("site clusters")
                for mean in means:
                    cluster_star_obj(o, mean, 2, 2)
                # coot.set_display_generic_object(o, 1) this is for debugging

            self.sites = sites
Пример #6
0
def cfc_process_site(site_number, imol_ligand_specs, imol_first,
                     first_ligand_spec):

    print("debug:: in cfc_process_site with imol_ligand_specs",
          imol_ligand_specs)
    print("debug:: in cfc_process_site with non-first imol_ligand_specs",
          imol_ligand_specs[1:])

    # print("calling residues_near_residue_py", imol_first, first_ligand_spec)
    env_residue_specs = coot.residues_near_residue_py(imol_first,
                                                      first_ligand_spec, 6)
    # print("env_residue_specs", env_residue_specs)
    protein_res_specs = [
        r for r in env_residue_specs
        if get_residue_name(imol_first, r) != "HOH"
    ]

    # only lsq the first (0th) one - that one has the most ligands in the site
    #
    if site_number == 0:
        # print("protein_res_specs (for lsqing):")
        # for spec in protein_res_specs:
        #     print("   ", spec, get_residue_name(imol_first, spec))

        for res_spec in protein_res_specs:
            chain_id = rsu.residue_spec_to_chain_id(res_spec)
            res_no = rsu.residue_spec_to_res_no(res_spec)
            coot.add_lsq_match(res_no, res_no, chain_id, res_no, res_no,
                               chain_id, 1)

        for imol_and_spec in imol_ligand_specs[
                1:]:  # lsq fit others to the first in the list
            print('============================ lsq-match ', imol_first,
                  imol_and_spec, imol_and_spec[0])
            imol, spec = imol_and_spec
            # coot.apply_lsq_matches_py(imol_first, imol_and_spec[0])
            coot.apply_lsq_matches_py(imol_first, imol)
            make_ball_and_stick_by_spec(imol, spec)
            # pass

        print("Here with first_ligand_spec:", first_ligand_spec)
        ligand_centre = coot.residue_centre_py(
            imol_first, rsu.residue_spec_to_chain_id(first_ligand_spec),
            rsu.residue_spec_to_res_no(first_ligand_spec), '')
        coot.set_go_to_atom_molecule(imol_first)
        coot.set_rotation_centre(*ligand_centre)

    combo_list = []
    try:

        # we have a large radius for the water selection
        radius = 10  # water must be within radius of it's own ligand
        radius_2 = 5  # water must be with radius_2 of any ligand atom (not just its own)

        combo_list = coot.chemical_feature_clusters_py(env_residue_specs,
                                                       imol_ligand_specs,
                                                       radius, radius_2)
    except TypeError as e:
        print(e)

        # the rest is unlikely to work if we get here

    if True:

        water_position_list = combo_list[0]
        chemical_feature_list = combo_list[1]
        # residues_sidechains_list = combo_list[1]

        # ----------- handle waters -----------

        w_positions_list = []

        for item in [wat[2] for wat in water_position_list]:
            w_positions_list.append(item)

        for item in [wat[2] for wat in water_position_list]:
            delta = 0.1
            p1 = [item[0], item[1], item[2] + delta]
            p2 = [item[0], item[1], item[2] - delta]
            p3 = [item[0], item[1] + delta, item[2]]
            p4 = [item[0], item[1] - delta, item[2]]
            p5 = [item[0] + delta, item[1], item[2]]
            p6 = [item[0] - delta, item[1], item[2]]

            w_positions_list.append(p1)
            w_positions_list.append(p2)
            w_positions_list.append(p3)
            w_positions_list.append(p4)
            w_positions_list.append(p5)
            w_positions_list.append(p6)

        w_positions_np = np.array(w_positions_list)

        # move these to the origin
        # w_positions_np = w_positions_np_at_ligand
        # for pos in w_positions_np:
        #     pos -= np.array(ligand_centre)

        # dpgmm = mixture.DPGMM(n_components=25, covariance_type='spherical', alpha=1.101,
        #                       n_iter=40000, params='wmc', init_params='wmc', tol=1e-4,
        #                       verbose=0)
        #
        # the number of clusters is highly related to the dist_cutoff (the
        # distance of an accepted water atom to any any atom in any of the
        # ligands = currently 4.2)
        #
        gmm, cluster_assignments = cluster_and_display_waters(
            site_number, w_positions_np)

        means = gmm.means_
        cvs = gmm._get_covars()
        weights = gmm.weights_

        print("water means:")
        for mean in means:
            print("   ", mean)

        # each water has been assigned a cluster, that is the cluster_assignments
        #
        # need to convert the array cluster_assignments to a list of items:
        #   [imol water_residue_spec cluster_number]
        #
        water_cluster_info_for_input = []
        for i, water_pos in enumerate(water_position_list):
            # print water_pos, cluster_assignments[i]
            item = [water_pos[0], water_pos[1], cluster_assignments[i]]
            water_cluster_info_for_input.append(item)

        # cluster_info is a list of
        #  list of water cluster info
        #      list of [mean, weight, length]  where length is the eigenvalue v[0],
        #              (same as v[1], v[2] - all the same for spherical model)
        #      list of cluster predictions for then input positions
        #
        ci = list(
            zip([[l[0], l[1], l[2]] for l in means], weights,
                [cv[0][0] for cv in cvs]))
        water_cluster_info = [ci, water_cluster_info_for_input]
        # give those results back to c++ so that we can use them for display
        #

        coot.set_display_generic_objects_as_solid(1)

        # ----------- handle chemical features -----------

        # make a dictionary from the list of chemical features
        chemical_features_dict = {}
        for item in chemical_feature_list:
            for type in [
                    'Donor', 'Acceptor', 'Aromatic', 'Hydrophobe',
                    'LumpedHydrophobe'
            ]:
                if item[0] == type:
                    try:
                        chemical_features_dict[type].append(item[1:])
                    except KeyError:
                        chemical_features_dict[type] = [item[1:]]

        chemical_feature_clusters_info = []
        for key in chemical_features_dict:
            # list of [type, features-annotated-by-cluster-number, cluster_means]
            clusters = cluster_and_display_chemical_features(
                site_number, key, chemical_features_dict[key])
            chemical_feature_clusters_info.append(clusters)

        # print 'water_cluster_info'
        # for wc in water_cluster_info:
        #    print wc

        cluster_info = [water_cluster_info, chemical_feature_clusters_info]

        coot.chemical_feature_clusters_accept_info_py(site_number,
                                                      protein_res_specs,
                                                      imol_ligand_specs,
                                                      cluster_info)