Пример #1
0
 def read_cartesian(self,fname):
     molc=Molecule()
     f = open(fname,'r')
     while (True):
         str = f.readline()
         if (str==''):
             break
         sp = str.split()
         if (len(sp) != 4):
             continue
         molc.add_atom(Atom(sp[0].lower(),0,float(sp[1]),float(sp[2]),float(sp[3])))
     self.mol = molc
     return  molc
Пример #2
0
    def load(file_name):
        # we read a list of molecules
        read_molecules = []
        sdf_file = None
        try:
            sdf_file = open(file_name, "r")
        except IOError as e:
            # file not found or something... not much we could do other than print an error and return an empty
            # molecule list
            error(">>> Error while opening %s" % file_name)
            error(">>> I/O error code %d: %s" % (e.errno, e.strerror))
            error(">>> No molecules were loaded.")
            return read_molecules

        atomic_masses = {
            "C": 12.0107,
            "H": 1.00794,
            "Cl": 35.4527,
            "N": 14.00674,
            "O": 15.9994,
            "S": 32.066,
            "F": 18.9984032
        }

        line_number = 1
        molecule_name = ""
        n_atoms = 0
        n_bonds = 0
        molecule = ''
        absolute_line = 1
        property_name = None
        reading_property = False
        for line in sdf_file:
            # decide where are we based on the line number
            if SDFReader.is_header(line_number):
                # we append the header lines to the name of the molecule
                if line_number > 1:
                    molecule_name += ','
                molecule_name += line.strip()
            elif SDFReader.is_counts_line(line_number):
                # we can create the molecule and add its name
                molecule = Molecule(molecule_name)
                # reset the name
                molecule_name = ''
                # extract the number of atoms and bonds
                # number of atoms are the first three characters and number of bonds the next three
                n_atoms = int(line[0:3])
                n_bonds = int(line[3:6])
            elif SDFReader.is_atom_table(line_number, n_atoms):
                # the coordinates are given in the first 30 characters, 10 per each dimension
                x_pos = float(line[0:10])
                y_pos = float(line[10:20])
                z_pos = float(line[20:30])
                # then the name of the atom
                symbol = line[30:33].strip()
                # insert the atom in the molecule
                # TODO: use a table to find the atomic mass based on the atomic symbol!
                atomic_mass = atomic_masses.get(symbol)
                molecule.add_atom(
                    Atom(symbol, Point3D(x_pos, y_pos, z_pos), atomic_mass))
            elif SDFReader.is_bond_table(line_number, n_atoms, n_bonds):
                # the first three characters represent the first atom
                atom_one_index = int(line[0:3])
                # the 4th-6th characters represent the second atom
                atom_two_index = int(line[3:6])
                # the 7th-9th characters represent the bond type
                bond_type = int(line[6:9])
                # remember that in python indices start from 0, while in mol files, they start from 1!!!
                molecule.add_bond(atom_one_index - 1, atom_two_index - 1,
                                  bond_type)
            elif SDFReader.is_properties_block(line_number, n_atoms, n_bonds):
                if line == "M  END":
                    # ignore
                    pass
                elif line.startswith('>'):
                    # property
                    reading_property = True
                    # this is what the line looks like_
                    # > <PUBCHEM_COMPOUND_CID>
                    property_name = line[3:line.rfind('>')]
                elif reading_property:
                    # the name has been read, we take the whole line as the value (remove line break)
                    # right now, no support for multi-line property values is available...
                    molecule.add_property(property_name, line.rstrip('\n'))
                    reading_property = False
                elif line.strip().startswith("$$$$"):
                    # end of molecule!
                    read_molecules.append(molecule)
                    line_number = 0

            # done reading this line
            line_number += 1
            absolute_line += 1

        # done reading file, close it
        try:
            sdf_file.close()
        except IOError:
            # ignore the error
            pass

        return read_molecules
Пример #3
0
class GeomReader:

    def read_cartesian(self,fname):
        molc=Molecule()
        f = open(fname,'r')
        while (True):
            str = f.readline()
            if (str==''):
                break
            sp = str.split()
            if (len(sp) != 4):
                continue
            molc.add_atom(Atom(sp[0].lower(),0,float(sp[1]),float(sp[2]),float(sp[3])))
        self.mol = molc
        return  molc

    def read_zmatrix(self,fname):
        f = open(fname,'r')
        self.file = f;
        name = f.readline().strip()
        self.mol = Molecule()
        self.mol.add_atom(Atom(name.lower(),1,0,0,0,connect=0));

        str = f.readline();
        sp = str.split();
        if (sp == []):
            return self.mol
        sp[0] = sp[0].lower()
        if (sp==[]):
            return self.mol
        try:
            bl = float(sp[2])
        except:
            bl = self.lookup_var(sp[2])
        self.mol.add_atom(Atom(sp[0],2,0,0,bl,int(sp[1])))
        self.mol.add_bond(2,int(sp[1]))



        str = f.readline();
        sp = str.split();
        if (sp == []):
            return self.mol
        sp[0] = sp[0].lower()
        try:
            bl = float(sp[2]);
        except:
            bl = self.lookup_var(sp[2])

        try:
            ba = float(sp[4]);
        except:
            ba = self.lookup_var(sp[4])
        a = Atom(sp[0],3,math.sin(ba*math.pi/180.0)*bl,0,self.mol.atoms[int(sp[1]) -1].z - math.cos(ba*math.pi/180.0)*bl,int(sp[1]))# BIG CHANGE

        self.mol.add_atom(a)
        self.mol.add_bond(3,int(sp[1]))

        atom_number = 4;
        while (True):
            #print atom_number
            str = f.readline();
            sp = str.split();
            if (sp==[]):
                print 'end of matrix'
                break
            sp[0] = sp[0].lower()
            try:
                bl = float(sp[2]);
            except:
                bl = self.lookup_var(sp[2])

            try:
                ba = float(sp[4]);
            except:
                ba = self.lookup_var(sp[4])
            try:
                dh = float(sp[6]);
            except:
                dh = self.lookup_var(sp[6])

            connect = int(sp[1])
            angle_connect = int(sp[3])
            dihed_connect = int(sp[5])


            atoms = self.mol.atoms;
            #print 'connect: %i angle_connect: %i' %(connect,angle_connect)
            vector1 = vec_minus(atoms[connect-1].xyz,atoms[angle_connect-1].xyz );
            vector2 = vec_minus(atoms[connect-1].xyz,atoms[dihed_connect-1].xyz );
            norm1 = vec_cross(vector1,vector2)
            norm2 = vec_cross(vector1,norm1)
            norm1 = normalise(norm1)
            norm2 = normalise(norm2)

            norm1 =vec_times(norm1,-1*math.sin(dh*math.pi/180))
            norm2 = vec_times(norm2,math.cos(dh*math.pi/180))

            vector3 =vec_add(norm1,norm2)
            vector3 =normalise(vector3)

            vector3 = vec_times(vector3,bl*math.sin(ba*math.pi/180.0))

            vector1 = normalise(vector1)

            vector1 = vec_times(vector1,bl*math.cos(ba*math.pi/180.0))

            vector2 = vec_add(atoms[connect - 1].xyz,vector3)
            vector2 = vec_minus(vector2,vector1)

            a = Atom(sp[0],atom_number,vector2[0],vector2[1],vector2[2],int(sp[1]))
            self.mol.add_atom(a)
            self.mol.add_bond(atom_number,int(sp[1]))

            atom_number+=1;
        return self.mol


    def lookup_var(self,name):
       # print 'Looking up ',name
        mark = self.file.tell();
        name.strip()
        mul = 1;
        if (name[0] == '-'):
            mul = -1;
            name = name[1:]

        while (True):
            str = self.file.readline();
            if (str == ''):
                print 'Lookup error'
                break;
            sp = str.split()
            if (sp ==[]):
                continue
            if (sp[0]==name):
                self.file.seek(mark);
                return float(sp[1])*mul;
        print 'Couldnt look up ', name;
        self.file.seek(mark);
        return None;

    def __init__(self, fname):
        if fname.endswith(".zmat"):
            self.read_zmatrix(fname)
        else:
            self.read_cartesian(fname)

    def write_atoms(self,fname):
        f = open(fname,'w')
        for at in self.mol.atoms:
            f.write('%s %f %f %f\n'%(at.name,at.x,at.y,at.z))
        f.close()


    def print_mol_ats(self):
        key = {}
        for at in self.mol.atoms:
            try:
                key[at.name]+=1
            except:
                key[at.name]=1
        for i in key:
            print '%s %i' % (i,key[i])