示例#1
0
    def from_qc_json(cls, qc_json) -> "ReadInput":
        """
        Given a QC JSON object, extracts the topology, atoms and coords of the molecule.
        #TODO we need to be absle to read mapped smiles for this to work with stereochem and aromaticity
        """

        topology = nx.Graph()
        atoms = []

        for i, atom in enumerate(qc_json.symbols):
            atoms.append(
                Atom(
                    atomic_number=Element().number(atom),
                    atom_index=i,
                    atom_name=f"{atom}{i}",
                ))
            topology.add_node(i)

        for bond in qc_json.connectivity:
            topology.add_edge(*bond[:2])

        coords = np.array(qc_json.geometry).reshape(
            (len(atoms), 3)) * BOHR_TO_ANGS
        atoms = atoms or None
        return cls(name=None, rdkit_mol=None, coords=coords)
示例#2
0
    def _read_pdb(self):
        """
        Internal pdb reader. Only called when RDKit failed to read the pdb.
        Extracts the topology, atoms and coords of the molecule.
        """
        coords = []
        self.topology = nx.Graph()
        atoms = []

        atom_count = 0

        print('called!')
        with open(self.mol_input) as pdb:

            for line in pdb:
                if 'ATOM' in line or 'HETATM' in line:
                    print('reading!')
                    # start collecting the atom class info
                    atomic_symbol = str(line[76:78])
                    atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)
                    atomic_symbol = atomic_symbol.strip()
                    atom_name = str(line.split()[2])

                    # If the element column is missing from the pdb, extract the atomic_symbol from the atom name.
                    if not atomic_symbol:
                        atomic_symbol = str(line.split()[2])[:-1]
                        atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)

                    atomic_number = Element().number(atomic_symbol)
                    # Now instance the qube atom
                    qube_atom = Atom(atomic_number, atom_count, atom_name)
                    atoms.append(qube_atom)

                    # Also add the atom number as the node in the graph
                    self.topology.add_node(atom_count)
                    atom_count += 1
                    coords.append([
                        float(line[30:38]),
                        float(line[38:46]),
                        float(line[46:54])
                    ])

                if 'CONECT' in line:
                    atom_index = int(line.split()[1]) - 1
                    # Search the connectivity section and add all edges to the graph corresponding to the bonds.
                    for i in range(2, len(line.split())):
                        if int(line.split()[i]) != 0:
                            bonded_index = int(line.split()[i]) - 1
                            self.topology.add_edge(atom_index, bonded_index)
                            atoms[atom_index].add_bond(bonded_index)
                            atoms[bonded_index].add_bond(atom_index)

        # put the object back into the correct place
        self.coords = np.array(coords)
        self.atoms = atoms or None
示例#3
0
    def _read_pdb_protein(self):
        """

        :return:
        """
        with open(self.mol_input, 'r') as pdb:
            lines = pdb.readlines()

        coords = []
        atoms = []
        self.topology = nx.Graph()
        self.Residues = []
        self.pdb_names = []

        # atom counter used for graph node generation
        atom_count = 0
        for line in lines:
            if 'ATOM' in line or 'HETATM' in line:
                atomic_symbol = str(line[76:78])
                atomic_symbol = re.sub('[0-9]+', '', atomic_symbol).strip()

                # If the element column is missing from the pdb, extract the atomic_symbol from the atom name.
                if not atomic_symbol:
                    atomic_symbol = str(line.split()[2])
                    atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)

                # now make sure we have a valid element
                if atomic_symbol.lower() != 'cl' and atomic_symbol.lower() != 'br':
                    atomic_symbol = atomic_symbol[0]

                atom_name = f'{atomic_symbol}{atom_count}'
                qube_atom = Atom(Element().number(atomic_symbol), atom_count, atom_name)

                atoms.append(qube_atom)

                self.pdb_names.append(str(line.split()[2]))

                # also get the residue order from the pdb file so we can rewrite the file
                self.Residues.append(str(line.split()[3]))

                # Also add the atom number as the node in the graph
                self.topology.add_node(atom_count)
                atom_count += 1
                coords.append([float(line[30:38]), float(line[38:46]), float(line[46:54])])

            elif 'CONECT' in line:
                conect_terms = line.split()
                for atom in conect_terms[2:]:
                    if int(atom):
                        self.topology.add_edge(int(conect_terms[1]) - 1, int(atom) - 1)

        self.atoms = atoms
        self.coords = np.array(coords)
        self.residues = [res for res, group in groupby(self.Residues)]
示例#4
0
    def _read_qc_json(self):
        """
        Given a QC JSON object, extracts the topology, atoms and coords of the molecule.
        """

        self.topology = nx.Graph()
        atoms = []

        for i, atom in enumerate(self.mol_input.symbols):
            atoms.append(Atom(atomic_number=Element().number(atom), atom_index=i, atom_name=f'{atom}{i}'))
            self.topology.add_node(i)

        for bond in self.mol_input.connectivity:
            self.topology.add_edge(*bond[:2])

        self.coords = np.array(self.mol_input.geometry).reshape((len(atoms), 3)) * constants.BOHR_TO_ANGS
        self.atoms = atoms or None
示例#5
0
    def from_pdb(cls, file_name: str, name: Optional[str] = None):
        """
        Read the protein input pdb file.
        :return:
        """
        with open(file_name, "r") as pdb:
            lines = pdb.readlines()

        coords = []
        atoms = []
        bonds = []
        Residues = []
        pdb_names = []

        # atom counter used for graph node generation
        atom_count = 0
        for line in lines:
            if "ATOM" in line or "HETATM" in line:
                atomic_symbol = str(line[76:78])
                atomic_symbol = re.sub("[0-9]+", "", atomic_symbol).strip()

                # If the element column is missing from the pdb, extract the atomic_symbol from the atom name.
                if not atomic_symbol:
                    atomic_symbol = str(line.split()[2])
                    atomic_symbol = re.sub("[0-9]+", "", atomic_symbol)

                # now make sure we have a valid element
                if atomic_symbol.lower() != "cl" and atomic_symbol.lower(
                ) != "br":
                    atomic_symbol = atomic_symbol[0]

                atom_name = f"{atomic_symbol}{atom_count}"
                # TODO should we use a protein pdb package for this?
                qube_atom = Atom(
                    atomic_number=Element().number(atomic_symbol),
                    atom_index=atom_count,
                    atom_name=atom_name,
                    formal_charge=0,
                    aromatic=False,
                )

                atoms.append(qube_atom)

                pdb_names.append(str(line.split()[2]))

                # also get the residue order from the pdb file so we can rewrite the file
                Residues.append(str(line.split()[3]))

                atom_count += 1
                coords.append([
                    float(line[30:38]),
                    float(line[38:46]),
                    float(line[46:54])
                ])

            elif "CONECT" in line:
                conect_terms = line.split()
                for atom in conect_terms[2:]:
                    if int(atom):
                        bond = Bond(
                            atom1_index=int(conect_terms[1]) - 1,
                            atom2_index=int(atom) - 1,
                            bond_order=1,
                            aromatic=False,
                        )
                        bonds.append(bond)

        coords = np.array(coords)
        residues = [res for res, group in groupby(Residues)]
        if name is None:
            name = Path(file_name).stem
        return cls(
            atoms=atoms,
            bonds=bonds,
            coords=coords,
            pdb_names=pdb_names,
            residues=residues,
            name=name,
        )
示例#6
0
    def _read_mol2(self):
        """
        Internal mol2 reader. Only called when RDKit failed to read the mol2.
        Extracts the topology, atoms and coords of the molecule.
        """

        coords = []
        self.topology = nx.Graph()
        atoms = []

        atom_count = 0

        with open(self.mol_input, 'r') as mol2:

            atom_flag = False
            bond_flag = False

            for line in mol2:
                if '@<TRIPOS>ATOM' in line:
                    atom_flag = True
                    continue
                elif '@<TRIPOS>BOND' in line:
                    atom_flag = False
                    bond_flag = True
                    continue
                elif '@<TRIPOS>SUBSTRUCTURE' in line:
                    bond_flag = False
                    continue

                if atom_flag:
                    # Add the molecule information
                    atomic_symbol = line.split()[1][:2]
                    atomic_symbol = re.sub('[0-9]+', '', atomic_symbol)
                    atomic_symbol = atomic_symbol.strip().title()

                    atomic_number = Element().number(atomic_symbol)

                    coords.append([float(line.split()[2]), float(line.split()[3]), float(line.split()[4])])

                    # Collect the atom names
                    atom_name = str(line.split()[1])

                    # Add the nodes to the topology object
                    self.topology.add_node(atom_count)
                    atom_count += 1

                    # Get the atom types
                    atom_type = line.split()[5]
                    atom_type = atom_type.replace(".", "")

                    # Make the qube_atom
                    qube_atom = Atom(atomic_number, atom_count, atom_name)
                    qube_atom.atom_type = atom_type

                    atoms.append(qube_atom)

                if bond_flag:
                    # Add edges to the topology network
                    atom_index, bonded_index = int(line.split()[1]) - 1, int(line.split()[2]) - 1
                    self.topology.add_edge(atom_index, bonded_index)
                    atoms[atom_index].add_bond(bonded_index)
                    atoms[bonded_index].add_bond(atom_index)

        # put the object back into the correct place
        self.coords = np.array(coords)
        self.atoms = atoms or None