def read_cartesian(self,fname): molc=Molecule() f = open(fname,'r') while (True): str = f.readline() if (str==''): break sp = str.split() if (len(sp) != 4): continue molc.add_atom(Atom(sp[0].lower(),0,float(sp[1]),float(sp[2]),float(sp[3]))) self.mol = molc return molc
def load(file_name): # we read a list of molecules read_molecules = [] sdf_file = None try: sdf_file = open(file_name, "r") except IOError as e: # file not found or something... not much we could do other than print an error and return an empty # molecule list error(">>> Error while opening %s" % file_name) error(">>> I/O error code %d: %s" % (e.errno, e.strerror)) error(">>> No molecules were loaded.") return read_molecules atomic_masses = { "C": 12.0107, "H": 1.00794, "Cl": 35.4527, "N": 14.00674, "O": 15.9994, "S": 32.066, "F": 18.9984032 } line_number = 1 molecule_name = "" n_atoms = 0 n_bonds = 0 molecule = '' absolute_line = 1 property_name = None reading_property = False for line in sdf_file: # decide where are we based on the line number if SDFReader.is_header(line_number): # we append the header lines to the name of the molecule if line_number > 1: molecule_name += ',' molecule_name += line.strip() elif SDFReader.is_counts_line(line_number): # we can create the molecule and add its name molecule = Molecule(molecule_name) # reset the name molecule_name = '' # extract the number of atoms and bonds # number of atoms are the first three characters and number of bonds the next three n_atoms = int(line[0:3]) n_bonds = int(line[3:6]) elif SDFReader.is_atom_table(line_number, n_atoms): # the coordinates are given in the first 30 characters, 10 per each dimension x_pos = float(line[0:10]) y_pos = float(line[10:20]) z_pos = float(line[20:30]) # then the name of the atom symbol = line[30:33].strip() # insert the atom in the molecule # TODO: use a table to find the atomic mass based on the atomic symbol! atomic_mass = atomic_masses.get(symbol) molecule.add_atom( Atom(symbol, Point3D(x_pos, y_pos, z_pos), atomic_mass)) elif SDFReader.is_bond_table(line_number, n_atoms, n_bonds): # the first three characters represent the first atom atom_one_index = int(line[0:3]) # the 4th-6th characters represent the second atom atom_two_index = int(line[3:6]) # the 7th-9th characters represent the bond type bond_type = int(line[6:9]) # remember that in python indices start from 0, while in mol files, they start from 1!!! molecule.add_bond(atom_one_index - 1, atom_two_index - 1, bond_type) elif SDFReader.is_properties_block(line_number, n_atoms, n_bonds): if line == "M END": # ignore pass elif line.startswith('>'): # property reading_property = True # this is what the line looks like_ # > <PUBCHEM_COMPOUND_CID> property_name = line[3:line.rfind('>')] elif reading_property: # the name has been read, we take the whole line as the value (remove line break) # right now, no support for multi-line property values is available... molecule.add_property(property_name, line.rstrip('\n')) reading_property = False elif line.strip().startswith("$$$$"): # end of molecule! read_molecules.append(molecule) line_number = 0 # done reading this line line_number += 1 absolute_line += 1 # done reading file, close it try: sdf_file.close() except IOError: # ignore the error pass return read_molecules
class GeomReader: def read_cartesian(self,fname): molc=Molecule() f = open(fname,'r') while (True): str = f.readline() if (str==''): break sp = str.split() if (len(sp) != 4): continue molc.add_atom(Atom(sp[0].lower(),0,float(sp[1]),float(sp[2]),float(sp[3]))) self.mol = molc return molc def read_zmatrix(self,fname): f = open(fname,'r') self.file = f; name = f.readline().strip() self.mol = Molecule() self.mol.add_atom(Atom(name.lower(),1,0,0,0,connect=0)); str = f.readline(); sp = str.split(); if (sp == []): return self.mol sp[0] = sp[0].lower() if (sp==[]): return self.mol try: bl = float(sp[2]) except: bl = self.lookup_var(sp[2]) self.mol.add_atom(Atom(sp[0],2,0,0,bl,int(sp[1]))) self.mol.add_bond(2,int(sp[1])) str = f.readline(); sp = str.split(); if (sp == []): return self.mol sp[0] = sp[0].lower() try: bl = float(sp[2]); except: bl = self.lookup_var(sp[2]) try: ba = float(sp[4]); except: ba = self.lookup_var(sp[4]) a = Atom(sp[0],3,math.sin(ba*math.pi/180.0)*bl,0,self.mol.atoms[int(sp[1]) -1].z - math.cos(ba*math.pi/180.0)*bl,int(sp[1]))# BIG CHANGE self.mol.add_atom(a) self.mol.add_bond(3,int(sp[1])) atom_number = 4; while (True): #print atom_number str = f.readline(); sp = str.split(); if (sp==[]): print 'end of matrix' break sp[0] = sp[0].lower() try: bl = float(sp[2]); except: bl = self.lookup_var(sp[2]) try: ba = float(sp[4]); except: ba = self.lookup_var(sp[4]) try: dh = float(sp[6]); except: dh = self.lookup_var(sp[6]) connect = int(sp[1]) angle_connect = int(sp[3]) dihed_connect = int(sp[5]) atoms = self.mol.atoms; #print 'connect: %i angle_connect: %i' %(connect,angle_connect) vector1 = vec_minus(atoms[connect-1].xyz,atoms[angle_connect-1].xyz ); vector2 = vec_minus(atoms[connect-1].xyz,atoms[dihed_connect-1].xyz ); norm1 = vec_cross(vector1,vector2) norm2 = vec_cross(vector1,norm1) norm1 = normalise(norm1) norm2 = normalise(norm2) norm1 =vec_times(norm1,-1*math.sin(dh*math.pi/180)) norm2 = vec_times(norm2,math.cos(dh*math.pi/180)) vector3 =vec_add(norm1,norm2) vector3 =normalise(vector3) vector3 = vec_times(vector3,bl*math.sin(ba*math.pi/180.0)) vector1 = normalise(vector1) vector1 = vec_times(vector1,bl*math.cos(ba*math.pi/180.0)) vector2 = vec_add(atoms[connect - 1].xyz,vector3) vector2 = vec_minus(vector2,vector1) a = Atom(sp[0],atom_number,vector2[0],vector2[1],vector2[2],int(sp[1])) self.mol.add_atom(a) self.mol.add_bond(atom_number,int(sp[1])) atom_number+=1; return self.mol def lookup_var(self,name): # print 'Looking up ',name mark = self.file.tell(); name.strip() mul = 1; if (name[0] == '-'): mul = -1; name = name[1:] while (True): str = self.file.readline(); if (str == ''): print 'Lookup error' break; sp = str.split() if (sp ==[]): continue if (sp[0]==name): self.file.seek(mark); return float(sp[1])*mul; print 'Couldnt look up ', name; self.file.seek(mark); return None; def __init__(self, fname): if fname.endswith(".zmat"): self.read_zmatrix(fname) else: self.read_cartesian(fname) def write_atoms(self,fname): f = open(fname,'w') for at in self.mol.atoms: f.write('%s %f %f %f\n'%(at.name,at.x,at.y,at.z)) f.close() def print_mol_ats(self): key = {} for at in self.mol.atoms: try: key[at.name]+=1 except: key[at.name]=1 for i in key: print '%s %i' % (i,key[i])