def parseLines(self, lines): """Parse list of lines in XYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # first valid line gives number of atoms try: lfs = linefields[start] w1 = linefields[start][0] if len(lfs) == 1 and str(int(w1)) == w1: p_natoms = int(w1) #try to get lattice vectors from description line try: latticeVecs = map(float, linefields[start + 1]) assert len(latticeVecs) == 9 from matter.Lattice import Lattice reshaped = [ latticeVecs[0:3], latticeVecs[3:6], latticeVecs[6:9] ] stru.lattice = Lattice(base=reshaped) needsDescription = True except: needsDescription = False stru.description = lines[start + 1].strip() start += 2 else: emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg) except (IndexError, ValueError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError, emsg, exc_traceback # find the last valid record stop = len(lines) while stop > start and len(linefields[stop - 1]) == 0: stop -= 1 # get out for empty structure if p_natoms == 0 or start >= stop: return stru # here we have at least one valid record line nfields = len(linefields[start]) if nfields != 4 and nfields != 5: emsg = "%d: invalid XYZ format, expected 4 or 5 columns" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:]: p_nl += 1 if fields == []: continue elif len(fields) != 4 and len(fields) != 5: emsg = ('%d: all lines must have ' + 'a symbol, position, and optionally charge') % p_nl raise StructureFormatError(emsg) symbol = fields[0] symbol = symbol[0].upper() + symbol[1:].lower() xyz = [float(f) for f in fields[1:4]] if len(fields) == 5: charge = float(fields[4]) else: charge = 0.0 stru.addNewAtom(symbol, xyz=xyz, charge=charge) except ValueError: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: invalid number format" % p_nl raise StructureFormatError, emsg, exc_traceback # finally check if all the atoms have been read if p_natoms is not None and len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) if needsDescription: stru.generateDescription() return stru
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure instance or raise StructureFormatError. """ try: stru = Structure() scale = numpy.identity(3, dtype=float) scaleU = numpy.zeros(3, dtype=float) p_nl = 0 for line in lines: p_nl += 1 # skip blank lines if not line.strip(): continue # make sure line has 80 characters if len(line) < 80: line = "%-80s" % line words = line.split() record = words[0] if record == "TITLE": continuation = line[8:10] if continuation.strip(): stru.description += line[10:].rstrip() else: stru.description = line[10:].rstrip() elif record == "CRYST1": a = float(line[7:15]) b = float(line[15:24]) c = float(line[24:33]) alpha = float(line[33:40]) beta = float(line[40:47]) gamma = float(line[47:54]) stru.lattice.setLatPar(a, b, c, alpha, beta, gamma) scale = numpy.transpose(stru.lattice.recbase) elif record == "SCALE1": sc = numpy.zeros((3,3), dtype=float) sc[0,:] = [float(x) for x in line[10:40].split()] scaleU[0] = float(line[45:55]) elif record == "SCALE2": sc[1,:] = [float(x) for x in line[10:40].split()] scaleU[1] = float(line[45:55]) elif record == "SCALE3": sc[2,:] = [float(x) for x in line[10:40].split()] scaleU[2] = float(line[45:55]) base = numpy.transpose(numpy.linalg.inv(sc)) abcABGcryst = numpy.array(stru.lattice.abcABG()) stru.lattice.setLatBase(base) abcABGscale = numpy.array(stru.lattice.abcABG()) reldiff = numpy.fabs(1.0 - abcABGscale/abcABGcryst) if not numpy.all(reldiff < self.epsilon): emsg = "%d: " % p_nl + \ "SCALE and CRYST1 are not consistent." raise StructureFormatError(emsg) if numpy.any(scaleU != 0.0): emsg = "Origin offset not yet implemented." raise NotImplementedError(emsg) elif record in ("ATOM", "HETATM"): name = line[12:16].strip() rc = [float(x) for x in line[30:54].split()] xyz = numpy.dot(scale, rc) + scaleU try: occupancy = float(line[54:60]) except ValueError: occupancy = 1.0 try: B = float(line[60:66]) U = numpy.identity(3)*B/(8*pi**2) except ValueError: U = numpy.zeros((3,3), dtype=float) symbol = line[76:78].strip() if symbol == "": # get symbol from the first 2 characters of name symbol = line[12:14].strip() symbol = symbol[0].upper() + symbol[1:].lower() #stru.addNewAtom(symbol, occupancy=occupancy, name=name, U=U) stru.addNewAtom(symbol, occupancy=occupancy, label=name, U=U) last_atom = stru.getLastAtom() last_atom.xyz_cartn = rc elif record == "SIGATM": sigrc = [float(x) for x in line[30:54].split()] sigxyz = numpy.dot(scale, sigrc) try: sigo = float(line[54:60]) except ValueError: sigo = 0.0 try: sigB = float(line[60:66]) sigU = numpy.identity(3)*sigB/(8*pi**2) except ValueError: sigU = numpy.zeros((3,3), dtype=float) last_atom.sigxyz = sigxyz last_atom.sigo = sigo last_atom.sigU = sigU elif record == "ANISOU": Uij = [ float(x)*1.0e-4 for x in line[28:70].split() ] for i in range(3): last_atom.U[i,i] = Uij[i] last_atom.U[0,1] = last_atom.U[1,0] = Uij[3] last_atom.U[0,2] = last_atom.U[2,0] = Uij[4] last_atom.U[1,2] = last_atom.U[2,1] = Uij[5] elif record == "SIGUIJ": sigUij = [ float(x)*1.0e-4 for x in line[28:70].split() ] for i in range(3): last_atom.sigU[i,i] = sigUij[i] last_atom.sigU[0,1] = last_atom.sigU[1,0] = sigUij[3] last_atom.sigU[0,2] = last_atom.sigU[2,0] = sigUij[4] last_atom.sigU[1,2] = last_atom.sigU[2,1] = sigUij[5] elif record in P_pdb.validRecords: pass else: emsg = "%d: invalid record name '%r'" % (p_nl, record) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: invalid PDB record" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() raise StructureFormatError, emsg, exc_traceback return stru
def parseLines(self, lines): """Parse list of lines in XYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # first valid line gives number of atoms try: lfs = linefields[start] w1 = linefields[start][0] if len(lfs) == 1 and str(int(w1)) == w1: p_natoms = int(w1) #try to get lattice vectors from description line try: latticeVecs = map(float, linefields[start+1]) assert len(latticeVecs)==9 from matter.Lattice import Lattice reshaped = [latticeVecs[0:3], latticeVecs[3:6], latticeVecs[6:9]] stru.lattice = Lattice(base=reshaped) needsDescription = True except: needsDescription = False stru.description = lines[start+1].strip() start += 2 else: emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg) except (IndexError, ValueError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError, emsg, exc_traceback # find the last valid record stop = len(lines) while stop > start and len(linefields[stop-1]) == 0: stop -= 1 # get out for empty structure if p_natoms == 0 or start >= stop: return stru # here we have at least one valid record line nfields = len(linefields[start]) if nfields != 4 and nfields != 5: emsg = "%d: invalid XYZ format, expected 4 or 5 columns" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:] : p_nl += 1 if fields == []: continue elif len(fields) != 4 and len(fields) !=5: emsg = ('%d: all lines must have ' + 'a symbol, position, and optionally charge') % p_nl raise StructureFormatError(emsg) symbol = fields[0] symbol = symbol[0].upper() + symbol[1:].lower() xyz = [ float(f) for f in fields[1:4] ] if len(fields)==5: charge = float(fields[4]) else: charge = 0.0 stru.addNewAtom(symbol, xyz=xyz, charge=charge) except ValueError: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: invalid number format" % p_nl raise StructureFormatError, emsg, exc_traceback # finally check if all the atoms have been read if p_natoms is not None and len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) if needsDescription: stru.generateDescription() return stru