def parse(filename, structure=False): """ Parses a mol2 file (or mol3) file Parameters ---------- filename : str or file-like Name of the file to parse or file-like object to parse from structure : bool, optional If True, the return value is a :class:`Structure` instance. If False, it is either a :class:`ResidueTemplate` or :class:`ResidueTemplateContainter` instance, depending on whether there is one or more than one residue defined in it. Default is False Returns ------- molecule : :class:`Structure`, :class:`ResidueTemplate`, or :class:`ResidueTemplateContainer` The molecule defined by this mol2 file Raises ------ Mol2Error If the file format is not recognized or non-numeric values are present where integers or floating point numbers are expected. Also raises Mol2Error if you try to parse a mol2 file that has multiple @<MOLECULE> entries with ``structure=True``. """ if isinstance(filename, string_types): f = genopen(filename, 'r') own_handle = True else: f = filename own_handle = False rescont = ResidueTemplateContainer() struct = Structure() restemp = ResidueTemplate() mol_info = [] multires_structure = False try: section = None last_residue = None headtail = 'head' molecule_number = 0 for line in f: if line.startswith('#'): continue if not line.strip() and section is None: continue if line.startswith('@<TRIPOS>'): section = line[9:].strip() if section == 'MOLECULE' and (restemp.atoms or rescont): if structure: raise Mol2Error('Cannot convert MOL2 with multiple ' '@<MOLECULE>s to a Structure') # Set the residue name from the MOL2 title if the # molecule had only 1 residue and it was given a name in # the title if not multires_structure and mol_info[0]: restemp.name = mol_info[0] multires_structure = False rescont.append(restemp) restemp = ResidueTemplate() struct = Structure() last_residue = None molecule_number += 1 mol_info = [] continue if section is None: raise Mol2Error('Bad mol2 file format') if section == 'MOLECULE': # Section formatted as follows: # mol_name # num_atoms [num_bonds [num_substr [num_feat [num_sets]]]] # mol_type # charge_type # [status_bits] # [mol_comment] # TODO: Do something with the name. if len(mol_info) == 0: mol_info.append(line.strip()) elif len(mol_info) == 1: mol_info.append([int(x) for x in line.split()]) elif len(mol_info) == 2: mol_info.append(line.strip()) elif len(mol_info) == 3: mol_info.append(line.strip()) # Ignore the rest continue if section == 'ATOM': # Section formatted as follows: # atom_id -- serial number of atom # atom_name -- name of the atom # x -- X-coordinate of the atom # y -- Y-coordinate of the atom # z -- Z-coordinate of the atom # atom_type -- type of the atom # subst_id -- Residue serial number # subst_name -- Residue name # charge -- partial atomic charge # status_bit -- ignored words = line.split() id = int(words[0]) name = words[1] x = float(words[2]) y = float(words[3]) z = float(words[4]) typ = words[5] try: resid = int(words[6]) except IndexError: resid = 0 try: resname = words[7] except IndexError: resname = 'UNK' if 'NO_CHARGES' not in mol_info: try: charge = float(words[8]) except IndexError: charge = 0 else: charge = 0 if last_residue is None: last_residue = (resid, resname) restemp.name = resname atom = Atom(name=name, type=typ, number=id, charge=charge) atom.xx, atom.xy, atom.xz = x, y, z struct.add_atom(atom, resname, resid) if last_residue != (resid, resname): rescont.append(restemp) restemp = ResidueTemplate() restemp.name = resname last_residue = (resid, resname) multires_structure = True restemp.add_atom(copy.copy(atom)) continue if section == 'BOND': # Section formatted as follows: # bond_id -- serial number of bond (ignored) # origin_atom_id -- serial number of first atom in bond # target_atom_id -- serial number of other atom in bond # bond_type -- string describing bond type (ignored) # status_bits -- ignored words = line.split() int(words[0]) # Bond serial number... redundant and ignored a1 = int(words[1]) a2 = int(words[2]) atom1 = struct.atoms.find_original_index(a1) atom2 = struct.atoms.find_original_index(a2) struct.bonds.append(Bond(atom1, atom2)) # Now add it to our residue container # See if it's a head/tail connection if atom1.residue is not atom2.residue: if atom1.residue.idx == len(rescont): res1 = restemp elif atom1.residue.idx < len(rescont): res1 = rescont[atom1.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') if atom2.residue.idx == len(rescont): res2 = restemp elif atom1.residue.idx < len(rescont): res2 = rescont[atom2.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') assert res1 is not res2, 'BAD identical residues' idx1 = atom1.idx - atom1.residue[0].idx idx2 = atom2.idx - atom2.residue[0].idx if atom1.residue.idx < atom2.residue.idx: res1.tail = res1[idx1] res2.head = res2[idx2] else: res1.head = res1[idx1] res2.tail = res2[idx2] elif not multires_structure: restemp.add_bond(a1-1, a2-1) else: # Same residue, add the bond offset = atom1.residue[0].idx if atom1.residue.idx == len(rescont): res = restemp else: res = rescont[atom1.residue.idx] res.add_bond(atom1.idx-offset, atom2.idx-offset) continue if section == 'CRYSIN': # Section formatted as follows: # a -- length of first unit cell vector # b -- length of second unit cell vector # c -- length of third unit cell vector # alpha -- angle b/w b and c # beta -- angle b/w a and c # gamma -- angle b/w a and b # space group -- number of space group (ignored) # space group setting -- ignored words = line.split() box = [float(x) for x in words[:6]] if len(box) != 6: raise ValueError('%d box dimensions found; needed 6' % len(box)) struct.box = copy.copy(box) rescont.box = copy.copy(box) continue if section == 'SUBSTRUCTURE': # Section formatted as follows: # subst_id -- residue number # subst_name -- residue name # root_atom -- first atom of residue # subst_type -- ignored (usually 'RESIDUE') # dict_type -- type of substructure (ignored) # chain -- chain ID of residue # sub_type -- type of the chain # inter_bonds -- # of inter-substructure bonds # status -- ignored # comment -- ignored words = line.split() if not words: continue id = int(words[0]) resname = words[1] try: chain = words[5] except IndexError: chain = '' # Set the chain ID for res in struct.residues: if res.number == id and res.name == resname: res.chain = chain continue # MOL3 sections if section == 'HEADTAIL': atname, residx = line.split() residx = int(residx) if residx in (0, 1) or residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in head/tail') for atom in res: if atom.name == atname: if headtail == 'head': res.head = atom headtail = 'tail' else: res.tail = atom headtail = 'head' break else: if headtail == 'head': headtail = 'tail' else: headtail = 'head' continue if section == 'RESIDUECONNECT': words = line.split() residx = int(words[0]) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in ' 'residueconnect') for a in words[3:]: if a == '0': continue for atom in res: if atom.name == a: atom.connections.append(atom) break else: raise Mol2Error('Residue connection atom %s not ' 'found in residue %d' % (a, residx)) if structure: return struct elif len(rescont) > 0: if not multires_structure and mol_info[0]: restemp.name = mol_info[0] rescont.append(restemp) return rescont else: return restemp except ValueError as e: raise Mol2Error('String conversion trouble: %s' % e) finally: if own_handle: f.close()
def write(struct, dest, mol3=False, split=False): """ Writes a mol2 file from a structure or residue template Parameters ---------- struct : :class:`Structure` or :class:`ResidueTemplate` or :class:`ResidueTemplateContainer` The input structure to write the mol2 file from dest : str or file-like obj Name of the file to write or open file handle to write to mol3 : bool, optional If True and ``struct`` is a ResidueTemplate or container, write HEAD/TAIL sections. Default is False split : bool, optional If True and ``struct`` is a ResidueTemplateContainer or a Structure with multiple residues, each residue is printed in a separate @<MOLECULE> section that appear sequentially in the output file """ own_handle = False if not hasattr(dest, 'write'): own_handle = True dest = genopen(dest, 'w') if split: # Write sequentially if it is a multi-residue container or Structure if isinstance(struct, ResidueTemplateContainer): for res in struct: Mol2File.write(res, dest, mol3) return elif isinstance(struct, Structure) and len(struct.residues) > 1: for res in ResidueTemplateContainer.from_structure(struct): Mol2File.write(res, dest, mol3) return try: if isinstance(struct, ResidueTemplateContainer): natom = sum([len(c) for c in struct]) # To find the number of bonds, we need to total number of bonds # + the number of bonds that would be formed by "stitching" # together residues via their head and tail bonds = [] charges = [] bases = [1 for res in struct] for i, res in enumerate(struct): if i < len(struct) - 1: bases[i+1] = bases[i] + len(res) for i, res in enumerate(struct): for bond in res.bonds: bonds.append((bond.atom1.idx+bases[i], bond.atom2.idx+bases[i])) if i < len(struct)-1 and (res.tail is not None and struct[i+1].head is not None): bonds.append((res.tail.idx+bases[i], struct[i+1].head.idx+bases[i+1])) charges.extend([a.charge for a in res]) residues = struct if not struct.name: name = struct[0].name else: name = struct.name else: natom = len(struct.atoms) bonds = [(b.atom1.idx+1, b.atom2.idx+1) for b in struct.bonds] if isinstance(struct, ResidueTemplate): residues = [struct] name = struct.name else: residues = struct.residues name = struct.residues[0].name charges = [a.charge for a in struct.atoms] dest.write('@<TRIPOS>MOLECULE\n') dest.write('%s' % name) dest.write('\n') dest.write('%d %d %d 0 1\n' % (natom, len(bonds), len(residues))) if len(residues) == 1: dest.write('SMALL\n') else: for residue in residues: if AminoAcidResidue.has(residue.name): dest.write('PROTEIN\n') break if (RNAResidue.has(residue.name) or DNAResidue.has(residue.name)): dest.write('NUCLEIC\n') break else: dest.write('BIOPOLYMER\n') if not any(charges): dest.write('NO_CHARGES\n') printchg = False else: dest.write('USER_CHARGES\n') printchg = True # Now do ATOM section dest.write('@<TRIPOS>ATOM\n') j = 1 for i, res in enumerate(residues): for atom in res: try: x = atom.xx except AttributeError: x = 0 try: y = atom.xy except AttributeError: y = 0 try: z = atom.xz except AttributeError: z = 0 dest.write('%8d %-8s %10.4f %10.4f %10.4f %-8s %6d %-8s' % ( j, atom.name, x, y, z, atom.type.strip() or atom.name, i+1, res.name)) if printchg: dest.write(' %10.6f\n' % atom.charge) else: dest.write('\n') j += 1 dest.write('@<TRIPOS>BOND\n') for i, bond in enumerate(bonds): dest.write('%8d %8d %8d 1\n' % (i+1, bond[0], bond[1])) dest.write('@<TRIPOS>SUBSTRUCTURE\n') first_atom = 0 for i, res in enumerate(residues): if not hasattr(res, 'chain') or not res.chain: chain = '****' else: chain = res.chain intresbonds = 0 if isinstance(res, ResidueTemplate): if i != len(residues)-1 and (res.tail is not None and residues[i+1].head is not None): intresbonds += 1 if i != 0 and (res.head is not None and residues[i-1].tail is not None): intresbonds += 1 else: for atom in res: for a2 in atom.bond_partners: if a2.residue is not res: intresbonds += 1 dest.write('%8d %-8s %8d RESIDUE %4d %-4s ROOT %6d\n' % (i+1, res.name, first_atom+1, 0, chain[:4], intresbonds)) first_atom += len(res) if mol3: dest.write('@<TRIPOS>HEADTAIL\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): if res.head is not None: dest.write('%s %d\n' % (res.head.name, i+1)) else: dest.write('0 0\n') if res.tail is not None: dest.write('%s %d\n' % (res.tail.name, i+1)) else: dest.write('0 0\n') else: head = tail = None for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: head = atom if a2.residue.idx == res.idx + 1: tail = atom if head is not None: dest.write('%s %d\n' % (head.name, i+1)) else: dest.write('0 0\n') if tail is not None: dest.write('%s %d\n' % (tail.name, i+1)) else: dest.write('0 0\n') dest.write('@<TRIPOS>RESIDUECONNECT\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): con = [res.head, res.tail, None, None, None, None] for i, a in enumerate(res.connections): con[i+2] = a else: con = [None, None, None, None, None, None] ncon = 2 for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: con[0] = atom elif a2.residue.idx == res.idx + 1: con[1] = atom elif a2.residue.idx != res.idx: con[ncon] = atom ncon += 1 dest.write('%d' % (i+1)) for a in con: if a is not None: dest.write(' %s' % a.name) else: dest.write(' 0') dest.write('\n') finally: if own_handle: dest.close()