Exemplo n.º 1
0
    def __coo_atom(self, comp, m_ele):
        '''
        Given a molecule<ccdc.molecule.Molecule> and a metal symbol<str>
        Return a dict about the metal with its coordinated atoms
        :param comp: <ccdc.molecule.Molecule>molecule
        :param m_ele: <str>metal elements symbol
        :param label: <bool>return atom label or atom entity
        :return:<dict>{<str>metal label: <set>{<str> coordinated atoms label}
        '''
        dict_coo_atoms = dict(
        )  # dict for storing the coordination atom around the target metal ie.atoms one by one
        qm = QueryAtom(m_ele)
        qs = search.QuerySubstructure()
        qs.add_atom(qm)
        sub_search = search.SubstructureSearch()
        sub_search.add_substructure(qs)
        mol_metals = sub_search.search(comp)

        for mol_metal in mol_metals:
            set_atom = set()
            metal = mol_metal.match_atoms()
            set_atom.update([N_atom.label for N_atom in metal[0].neighbours])
            metal_label = metal[0].label
            dict_coo_atoms[metal_label] = set_atom

        return dict_coo_atoms
Exemplo n.º 2
0
    def delete_anion(self, path_anion):
        '''
        removing anions which are defined by mol2 file in a entry
        :param path_anion: the defined anions files
        :return: None
        '''

        if os.path.isdir(path_anion):
            anion_list = [
                search.MoleculeSubstructure(
                    io.MoleculeReader(f)[0].components[0])
                for f in glob.glob(os.path.join(path_anion, '*.mol2'))
            ]
        else:
            raise FileExistsError('do not find the path!')

        list_crystals_remove_anion = []
        p_bar = tqdm(self.entry_reader)

        for entry in p_bar:
            if entry.has_3d_structure:
                # Ensure labels are unique
                mol = entry.molecule
                mol.normalise_labels()
                # Use a copy
                clone = mol.copy()
                # Remove all metal atoms
                clone.remove_atoms(a for a in clone.atoms
                                   if a.is_metal or not a.bonds)
                for c in clone.components:
                    for anion in anion_list:
                        ani_search = search.SubstructureSearch()
                        ani_search.add_substructure(anion)
                        hits = ani_search.search(c)
                        for hit in hits:
                            hit_atoms = hit.match_atoms()
                            if len(hit_atoms) == len(c.atoms):
                                mol.remove_atoms(
                                    mol.atom(a.label) for a in hit_atoms)
                entry.crystal.molecule = self.__delete_isolated_atoms(mol)
                list_crystals_remove_anion.append(entry)
            p_bar.set_description('Anions removing...')

        self.entry_reader = list_crystals_remove_anion
Exemplo n.º 3
0
    def get_all_function_groups(path_mols, path_con):
        """从*.mol2文件中找到指定基团的类型及数量

        :param path_cifs:
        :param path_con:
        :return:
        """

        # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径
        list_mol_names = os.listdir(path_mols)
        list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2'))

        # 通过con定义功能基团
        list_con_names = os.listdir(path_con)
        path_conner_list = glob.glob(os.path.join(path_con, '*.con'))
        list_connser_substructure = []
        for path in path_conner_list:
            connser_substructure = search.ConnserSubstructure(path)
            list_connser_substructure.append(connser_substructure)

        # 读取mol2文件中
        dict_result = dict()
        count = 0
        pbar = tqdm(list_path_mols)
        for path_cif_temp in pbar:
            list_temp = [
            ]  # 维度为len(list_connser_substructure),即维度为定义的官能团个数;该列表用于储存当前cif文件中包含基团的数目
            mol_temp = io.MoleculeReader(path_cif_temp)[0]  # 读取cif文件
            for func_group in list_connser_substructure:
                substructure_search = search.SubstructureSearch()
                _ = substructure_search.add_substructure(func_group)
                hits = substructure_search.search(mol_temp)
                list_temp.append(len(hits))
            dict_result[list_mol_names[count]] = list_temp
            count += 1
        pbar.set_description('正在统计所有的指定基团:')

        return dict_result, list_con_names
Exemplo n.º 4
0
    def get_neighbor_function_groups(path_mols, path_con, query_atom):

        # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径
        list_mol_names = os.listdir(path_mols)
        list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2'))

        # 通过con定义功能基团
        list_con_names = os.listdir(path_con)
        path_conner_list = glob.glob(os.path.join(path_con, '*.con'))
        list_connser_substructure = []
        for path in path_conner_list:
            connser_substructure = search.ConnserSubstructure(path)
            list_connser_substructure.append(connser_substructure)

        # 统计配位基团的类型及数量
        dict_result = dict()
        pbar = tqdm(range(len(list_path_mols)))

        for i in pbar:
            # 读取分子,并且读取出其中的components
            path_mol = list_path_mols[i]
            mol = io.MoleculeReader(path_mol)[0]
            list_components = mol.components
            mol.normalise_labels()
            # 统计每个基团在分子中出现的次数
            list_temp = []  # 储存每个mol2文件中匹配到的配位基团的数量
            for con in list_connser_substructure:
                count_temp = 0  # 基团出现数量
                for component in list_components:
                    set_temp = set()  # 用于存放出现的基团的字符串
                    # 查询金属原子
                    m = QueryAtom(query_atom)
                    s = search.QuerySubstructure()
                    s.add_atom(m)
                    sub_search = search.SubstructureSearch()
                    sub_search.add_substructure(s)
                    mol_metals = sub_search.search(component)

                    if len(mol_metals) > 0:
                        substructure_search = search.SubstructureSearch()
                        substructure_search.add_substructure(con)
                        hits = substructure_search.search(component)

                        if len(hits) > 0:
                            for hit in hits:
                                temp_hit_atoms = hit.match_atoms()  # 匹配到的基团的原子
                                for temp_metal in mol_metals:
                                    temp_metal = temp_metal.match_atoms()[0]
                                    common_elements = set(
                                        temp_metal.neighbours) & set(
                                            temp_hit_atoms)
                                    if len(common_elements) > 0:
                                        set_temp.add(str(temp_hit_atoms))
                                # for num in range(len(mol_metals)):
                                #     metal_label = query_atom + str(num + 1)
                                #     temp_metal = component.atom(metal_label)
                                #     common_elements = set(temp_metal.neighbours) & set(temp_hit_atoms)
                                #     if len(common_elements) > 0:
                                #         set_temp.add(str(temp_hit_atoms))
                    count_temp += len(set_temp)
                list_temp.append(count_temp)
            dict_result[list_mol_names[i]] = list_temp

        return dict_result, list_con_names
Exemplo n.º 5
0
    def coordination_bond_length(self, element=None):
        '''

        :param dict_coo_sub:
        :param element:
        :return:
        '''
        p_bar = tqdm(self.entry_reader)
        # {bond type: [bond type, ideal bond length, bond length, substructure,
        #              The main of substructure connection, Connection with ligands
        #              metal, coordinated atom, identifier]
        dict_bond = dict()
        for entry in p_bar:
            # Getting molecule
            mol = entry.molecule
            # Remove all of hydrogen
            mol.remove_hydrogens()
            # Ensure labels are unique
            mol.normalise_labels()
            # remove single_atoms
            mol.remove_atoms([
                single_atom for single_atom in mol.atoms
                if not single_atom.bonds
            ])
            # dict to save coordinated bonds information
            dict_mol_bonds = dict()
            for comp in mol.components:
                dict_coo_atoms = self.__coo_atom(comp, element)
                # Getting the bond length
                dict_bond_length = self.__measurement_bond_length(
                    comp, element)
                # Get coordinate substructures which the coordinated atoms belong to
                for metal in dict_coo_atoms:
                    # Set of atoms in the matched substructure in the molecule
                    set_sub_atoms = set()

                    set_coo_atoms_label = dict_coo_atoms[metal]
                    # Set of coordinated atoms in defined substructures
                    set_coo_atoms_label_in_sub = set()
                    '''
                    Get information of coordinated atoms about which substructure belongs to
                    '''
                    list_sub_name = sorted(self.dict_substructure,
                                           reverse=True)
                    for sub_name in list_sub_name:
                        # Defining method of substructure searching
                        substructure_search = search.SubstructureSearch()
                        substructure_search.add_substructure(
                            self.dict_substructure[sub_name])
                        # Searching
                        hits = substructure_search.search(comp)
                        if not not hits:
                            for hit in hits:
                                hit_atoms_label = set(
                                    atom.label for atom in hit.match_atoms())
                                if not any(atom_label in set_sub_atoms
                                           for atom_label in hit_atoms_label):
                                    set_sub_atoms.update([
                                        atom.label
                                        for atom in hit.match_atoms()
                                        if atom.atomic_symbol != 'C'
                                    ])
                                    coo_atoms_in_sub_label = set_coo_atoms_label & hit_atoms_label
                                    if not not coo_atoms_in_sub_label:
                                        set_coo_atoms_label_in_sub.update(
                                            coo_atoms_in_sub_label)
                                        # Add the substructures type to bond information
                                        # Add the type of the main that the substructure connects with
                                        for coo_atom_in_sub_label in coo_atoms_in_sub_label:
                                            if len(dict_bond_length[
                                                    metal + '-' +
                                                    coo_atom_in_sub_label]
                                                   ) < 4:
                                                dict_bond_length[metal + '-' + coo_atom_in_sub_label]. \
                                                    extend([sub_name, self.__main_type_sub_connect(hit)])

                    # Set of coordinated atoms out defined substructures
                    set_coo_atoms_label_out_sub = set_coo_atoms_label - set_coo_atoms_label_in_sub
                    # Filling coordination atoms with Nan that do not belong to any defined substructure
                    for coo_atom_out_sub_label in set_coo_atoms_label_out_sub:
                        dict_bond_length[metal + '-' + coo_atom_out_sub_label]. \
                            extend(['NaN', 'NaN'])
                    '''
                    Get information of Connection of metal with ligands
                    '''
                    c_comp = comp.copy()
                    c_comp.remove_atoms(atom for atom in c_comp.atoms
                                        if atom.atomic_symbol == element)
                    set_coo_atoms_label_in_comp = set()
                    for sub_c_comp in c_comp.components:
                        set_sub_c_comp_atoms = set(
                            atom.label for atom in sub_c_comp.atoms)
                        common_coo_atoms = set_sub_c_comp_atoms & set_coo_atoms_label
                        set_coo_atoms_label_in_comp.update(common_coo_atoms)
                        len_common_coo_atoms = len(common_coo_atoms)
                        if len_common_coo_atoms != 0:
                            for common_coo_atom in common_coo_atoms:
                                dict_bond_length[metal + '-' +
                                                 common_coo_atom].append(
                                                     len_common_coo_atoms)
                    set_coo_atoms_label_out_comp = set_coo_atoms_label - set_coo_atoms_label_in_comp
                    for common_coo_atom in set_coo_atoms_label_out_comp:
                        dict_bond_length[metal + '-' +
                                         common_coo_atom].append(0)
                # Setting basic information for bonds

                for metal in dict_coo_atoms:
                    for coo_atom in dict_coo_atoms[metal]:
                        dict_bond_length[metal + '-' + coo_atom].extend(
                            [metal, coo_atom, entry])
                        dict_bond_length[metal + '-' + coo_atom].insert(
                            0, metal + '-' + coo_atom)
                        dict_bond_length[metal + '-' + coo_atom].insert(
                            0,
                            element + '-' + mol.atom(coo_atom).atomic_symbol)
                # Save information of bond length
                dict_mol_bonds.update(dict_bond_length)

            # Save results
            for bond in dict_mol_bonds:
                if dict_mol_bonds[bond][0] in dict_bond:
                    dict_bond[dict_mol_bonds[bond][0]].append(
                        dict_mol_bonds[bond])
                else:
                    dict_bond[dict_mol_bonds[bond][0]] = []
                    dict_bond[dict_mol_bonds[bond][0]].append(
                        dict_mol_bonds[bond])

        return dict_bond