def parseLines(self, lines): """Parse list of lines in RAWXYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # find the last valid record stop = len(lines) while stop > start and len(linefields[stop-1]) == 0: stop -= 1 # get out for empty structure if start >= stop: return stru # here we have at least one valid record line # figure out xyz layout from the first line for plain and raw formats floatfields = [ isfloat(f) for f in linefields[start] ] nfields = len(linefields[start]) if nfields not in (3, 4): emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" % (start + 1)) raise StructureFormatError(emsg) if floatfields[:3] == [True, True, True]: el_idx, x_idx = (None, 0) elif floatfields[:4] == [False, True, True, True]: el_idx, x_idx = (0, 1) else: emsg = "%d: invalid RAWXYZ format" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:] : p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError(emsg) element = el_idx is not None and fields[el_idx] or "" xyz = [ float(f) for f in fields[x_idx:x_idx+3] ] if len(xyz) == 2: xyz.append(0.0) stru.addNewAtom(element, xyz=xyz) except ValueError: emsg = "%d: invalid number" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
def parseLines(self, lines): """Parse list of lines in RAWXYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # find the last valid record stop = len(lines) while stop > start and len(linefields[stop - 1]) == 0: stop -= 1 # get out for empty structure if start >= stop: return stru # here we have at least one valid record line # figure out xyz layout from the first line for plain and raw formats floatfields = [isfloat(f) for f in linefields[start]] nfields = len(linefields[start]) if nfields not in (3, 4): emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" % (start + 1)) raise StructureFormatError(emsg) if floatfields[:3] == [True, True, True]: el_idx, x_idx = (None, 0) elif floatfields[:4] == [False, True, True, True]: el_idx, x_idx = (0, 1) else: emsg = "%d: invalid RAWXYZ format" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:]: p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError(emsg) element = el_idx is not None and fields[el_idx] or "" xyz = [float(f) for f in fields[x_idx:x_idx + 3]] if len(xyz) == 2: xyz.append(0.0) stru.addNewAtom(element, xyz=xyz) except ValueError: emsg = "%d: invalid number" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
class P_cif(StructureParser): """Simple parser for CIF structure format. Reads Structure from the first block containing _atom_site_label key. Following blocks, if any are ignored. Data members: format -- structure format name ciffile -- instance of CifFile from PyCifRW stru -- Structure instance used for cif input or output Data members used for input only: spacegroup -- instance of SpaceGroup used for symmetry expansion eps -- resolution in fractional coordinates for non-equal positions. Use for expansion of asymmetric unit. eau -- instance of ExpandAsymmetricUnit from SymmetryUtilities asymmetric_unit -- list of atom instances for the original asymmetric unit in the CIF file labelindex -- dictionary mapping unique atom label to index of atom in self.asymmetric_unit cif_sgname -- space group name obtained by looking up the value of _space_group_name_Hall, _symmetry_space_group_name_Hall, _space_group_name_H-M_alt, _symmetry_space_group_name_H-M items. None when neither is defined. """ # static data and methods ------------------------------------------------ # dictionary set of class methods for translating CIF values # to Atom attributes _atom_setters = dict.fromkeys(( '_tr_ignore', '_tr_atom_site_label', '_tr_atom_site_type_symbol', '_tr_atom_site_fract_x', '_tr_atom_site_fract_y', '_tr_atom_site_fract_z', '_tr_atom_site_cartn_x', '_tr_atom_site_cartn_y', '_tr_atom_site_cartn_z', '_tr_atom_site_U_iso_or_equiv', '_tr_atom_site_B_iso_or_equiv', '_tr_atom_site_adp_type', '_tr_atom_site_thermal_displace_type', '_tr_atom_site_occupancy', '_tr_atom_site_aniso_U_11', '_tr_atom_site_aniso_U_22', '_tr_atom_site_aniso_U_33', '_tr_atom_site_aniso_U_12', '_tr_atom_site_aniso_U_13', '_tr_atom_site_aniso_U_23', '_tr_atom_site_aniso_B_11', '_tr_atom_site_aniso_B_22', '_tr_atom_site_aniso_B_33', '_tr_atom_site_aniso_B_12', '_tr_atom_site_aniso_B_13', '_tr_atom_site_aniso_B_23', )) # make _atom_setters case insensitive for k in list(_atom_setters.keys()): _atom_setters[k] = _atom_setters[k.lower()] = k del k BtoU = 1.0/(8 * numpy.pi**2) def _tr_ignore(a, value): return _tr_ignore = staticmethod(_tr_ignore) def _tr_atom_site_label(a, value): a.label = str(value) # set element when not specified by _atom_site_type_symbol if not a.element: P_cif._tr_atom_site_type_symbol(a, value) _tr_atom_site_label = staticmethod(_tr_atom_site_label) # 3 regexp groups for nucleon number, atom symbol, and oxidation state _psymb = re.compile(r'(\d+-)?([a-zA-Z]+)(\d[+-])?') def _tr_atom_site_type_symbol(a, value): rx = P_cif._psymb.match(value) smbl = rx and rx.group(0) or value smbl = str(smbl) a.element = smbl[:1].upper() + smbl[1:].lower() _tr_atom_site_type_symbol = staticmethod(_tr_atom_site_type_symbol) def _tr_atom_site_fract_x(a, value): a.xyz[0] = leading_float(value) _tr_atom_site_fract_x = staticmethod(_tr_atom_site_fract_x) def _tr_atom_site_fract_y(a, value): a.xyz[1] = leading_float(value) _tr_atom_site_fract_y = staticmethod(_tr_atom_site_fract_y) def _tr_atom_site_fract_z(a, value): a.xyz[2] = leading_float(value) _tr_atom_site_fract_z = staticmethod(_tr_atom_site_fract_z) def _tr_atom_site_cartn_x(a, value): a.xyz_cartn[0] = leading_float(value) _tr_atom_site_cartn_x = staticmethod(_tr_atom_site_cartn_x) def _tr_atom_site_cartn_y(a, value): a.xyz_cartn[1] = leading_float(value) _tr_atom_site_cartn_y = staticmethod(_tr_atom_site_cartn_y) def _tr_atom_site_cartn_z(a, value): a.xyz_cartn[2] = leading_float(value) _tr_atom_site_cartn_z = staticmethod(_tr_atom_site_cartn_z) def _tr_atom_site_U_iso_or_equiv(a, value): a.Uisoequiv = leading_float(value) _tr_atom_site_U_iso_or_equiv = staticmethod(_tr_atom_site_U_iso_or_equiv) def _tr_atom_site_B_iso_or_equiv(a, value): a.Uisoequiv = P_cif.BtoU * leading_float(value) _tr_atom_site_B_iso_or_equiv = staticmethod(_tr_atom_site_B_iso_or_equiv) def _tr_atom_site_adp_type(a, value): a.anisotropy = value not in ("Uiso", "Biso") _tr_atom_site_adp_type = staticmethod(_tr_atom_site_adp_type) _tr_atom_site_thermal_displace_type = _tr_atom_site_adp_type def _tr_atom_site_occupancy(a, value): a.occupancy = leading_float(value, 1.0) _tr_atom_site_occupancy = staticmethod(_tr_atom_site_occupancy) def _tr_atom_site_aniso_U_11(a, value): a.U11 = leading_float(value) _tr_atom_site_aniso_U_11 = staticmethod(_tr_atom_site_aniso_U_11) def _tr_atom_site_aniso_U_22(a, value): a.U22 = leading_float(value) _tr_atom_site_aniso_U_22 = staticmethod(_tr_atom_site_aniso_U_22) def _tr_atom_site_aniso_U_33(a, value): a.U33 = leading_float(value) _tr_atom_site_aniso_U_33 = staticmethod(_tr_atom_site_aniso_U_33) def _tr_atom_site_aniso_U_12(a, value): a.U12 = leading_float(value) _tr_atom_site_aniso_U_12 = staticmethod(_tr_atom_site_aniso_U_12) def _tr_atom_site_aniso_U_13(a, value): a.U13 = leading_float(value) _tr_atom_site_aniso_U_13 = staticmethod(_tr_atom_site_aniso_U_13) def _tr_atom_site_aniso_U_23(a, value): a.U23 = leading_float(value) _tr_atom_site_aniso_U_23 = staticmethod(_tr_atom_site_aniso_U_23) def _tr_atom_site_aniso_B_11(a, value): a.U11 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_11 = staticmethod(_tr_atom_site_aniso_B_11) def _tr_atom_site_aniso_B_22(a, value): a.U22 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_22 = staticmethod(_tr_atom_site_aniso_B_22) def _tr_atom_site_aniso_B_33(a, value): a.U33 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_33 = staticmethod(_tr_atom_site_aniso_B_33) def _tr_atom_site_aniso_B_12(a, value): a.U12 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_12 = staticmethod(_tr_atom_site_aniso_B_12) def _tr_atom_site_aniso_B_13(a, value): a.U13 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_13 = staticmethod(_tr_atom_site_aniso_B_13) def _tr_atom_site_aniso_B_23(a, value): a.U23 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_23 = staticmethod(_tr_atom_site_aniso_B_23) def _get_atom_setters(cifloop): """Find translators of CifLoop items to data in Atom instance. Static method. cifloop -- instance of CifLoop Return a list of setter functions in the order of cifloop.keys(). """ rv = [] for p in cifloop.keys(): lcname = "_tr" + p.lower() fncname = P_cif._atom_setters.get(lcname, '_tr_ignore') f = getattr(P_cif, fncname) rv.append(f) return rv _get_atom_setters = staticmethod(_get_atom_setters) # normal methods --------------------------------------------------------- def __init__(self, eps=None): """Initialize the parser for CIF structure files. eps -- fractional coordinates cutoff for duplicate positions. When None use the default for ExpandAsymmetricUnit. """ StructureParser.__init__(self) self.format = "cif" self.ciffile = None self.stru = None self.spacegroup = None self.eps = eps self.eau = None self.asymmetric_unit = None self.labelindex = {} self.cif_sgname = None pass def parse(self, s): """Create Structure instance from a string in CIF format. Return Structure instance or raise StructureFormatError. """ self.ciffile = None self.filename = '' fp = six.StringIO(s) rv = self._parseCifDataSource(fp) return rv def parseLines(self, lines): """Parse list of lines in CIF format. lines -- list of strings stripped of line terminator Return Structure instance or raise StructureFormatError. """ s = "\n".join(lines) + '\n' return self.parse(s) def parseFile(self, filename): """Create Structure from an existing CIF file. filename -- path to structure file Return Structure object. Raise StructureFormatError or IOError. """ self.ciffile = None self.filename = filename fileurl = _fixIfWindowsPath(filename) rv = self._parseCifDataSource(fileurl) # all good here return rv def _parseCifDataSource(self, datasource): """\ Open and process CIF data from the specified `datasource`. Parameters ---------- datasource : str or a file-like object This is used as an argument to the CifFile class. The CifFile instance is stored in `ciffile` attribute of this Parser. Returns ------- Structure The Structure object loaded from the specified data source. Raises ------ StructureFormatError When the data do not constitute a valid CIF format. """ from CifFile import CifFile, StarError self.stru = None try: with _suppressCifParserOutput(): self.ciffile = CifFile(datasource) for blockname in self.ciffile.keys(): self._parseCifBlock(blockname) # stop after reading the first structure if self.stru is not None: break except (StarError, ValueError, IndexError) as err: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = str(err).strip() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return self.stru def _parseCifBlock(self, blockname): """Translate CIF file block, skip blocks without _atom_site_label. Updates data members stru, eau. blockname -- name of top level block in self.ciffile No return value. """ block = self.ciffile[blockname] if '_atom_site_label' not in block: return # here block contains structure, initialize output data self.stru = Structure() self.labelindex.clear() # execute specialized block parsers self._parse_lattice(block) self._parse_atom_site_label(block) self._parse_atom_site_aniso_label(block) self._parse_space_group_symop_operation_xyz(block) return def _parse_lattice(self, block): """Obtain lattice parameters from a CifBlock. This method updates self.stru.lattice. block -- instance of CifBlock No return value. """ if '_cell_length_a' not in block: return # obtain lattice parameters try: latpars = ( leading_float(block['_cell_length_a']), leading_float(block['_cell_length_b']), leading_float(block['_cell_length_c']), leading_float(block['_cell_angle_alpha']), leading_float(block['_cell_angle_beta']), leading_float(block['_cell_angle_gamma']), ) except KeyError as err: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = str(err) e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) self.stru.lattice = Lattice(*latpars) return def _parse_atom_site_label(self, block): """Obtain atoms in asymmetric unit from a CifBlock. This method inserts Atom instances to self.stru and updates labelindex dictionary. block -- instance of CifBlock No return value. """ # process _atom_site_label atom_site_loop = block.GetLoop('_atom_site_label') # get a list of setters for atom_site values prop_setters = P_cif._get_atom_setters(atom_site_loop) # index of the _atom_site_label item for the labelindex dictionary ilb = atom_site_loop.keys().index('_atom_site_label') # loop through the values and pass them to the setters sitedatalist = zip(*atom_site_loop.values()) for values in sitedatalist: curlabel = values[ilb] # skip entries that have invalid label if curlabel == '?': continue self.labelindex[curlabel] = len(self.stru) self.stru.addNewAtom() a = self.stru.getLastAtom() for fset, val in zip(prop_setters, values): fset(a, val) return def _parse_atom_site_aniso_label(self, block): """Obtain value of anisotropic thermal displacements from a CifBlock. This method updates U members of Atom instances in self.stru. The labelindex dictionary has to be defined beforehand. block -- instance of CifBlock No return value. """ if '_atom_site_aniso_label' not in block: return # was anisotropy processed in the _atom_site_label loop? isotropy_done = _hasAtomSiteADPType(block) # something to do here: adp_loop = block.GetLoop('_atom_site_aniso_label') # index of the _atom_site_label column ilb = adp_loop.keys().index('_atom_site_aniso_label') # get a list of setters for this loop prop_setters = P_cif._get_atom_setters(adp_loop) sitedatalist = zip(*adp_loop.values()) for values in sitedatalist: idx = self.labelindex[values[ilb]] a = self.stru[idx] if not isotropy_done: a.anisotropy = True for fset, val in zip(prop_setters, values): fset(a, val) return def _parse_space_group_symop_operation_xyz(self, block): """Process symmetry operations from a CifBlock. The method updates spacegroup and eau data according to symmetry operations defined in _space_group_symop_operation_xyz or _symmetry_equiv_pos_as_xyz items in CifBlock. block -- instance of CifBlock No return value. """ from diffpy.structure.spacegroups import IsSpaceGroupIdentifier from diffpy.structure.spacegroups import SpaceGroup, GetSpaceGroup from diffpy.structure.spacegroups import FindSpaceGroup self.asymmetric_unit = list(self.stru) sym_synonyms = ('_space_group_symop_operation_xyz', '_symmetry_equiv_pos_as_xyz') sym_loop_name = [n for n in sym_synonyms if n in block] # recover explicit list of symmetry operations symop_list = [] if sym_loop_name: # sym_loop exists here and we know its cif name sym_loop_name = sym_loop_name[0] sym_loop = block.GetLoop(sym_loop_name) for eqxyz in sym_loop[sym_loop_name]: opcif = getSymOp(eqxyz) symop_list.append(opcif) # determine space group number sg_nameHall = (block.get('_space_group_name_Hall', '') or block.get('_symmetry_space_group_name_Hall', '')) sg_nameHM = (block.get('_space_group_name_H-M_alt', '') or block.get('_symmetry_space_group_name_H-M', '')) self.cif_sgname = (sg_nameHall or sg_nameHM or None) sgid = (int(block.get('_space_group_IT_number', '0')) or int(block.get('_symmetry_Int_Tables_number', '0')) or sg_nameHM) self.spacegroup = None # try to reuse existing space group from symmetry operations if symop_list: try: self.spacegroup = FindSpaceGroup(symop_list) except ValueError: pass # otherwise lookup the space group from its identifier if self.spacegroup is None and sgid and IsSpaceGroupIdentifier(sgid): self.spacegroup = GetSpaceGroup(sgid) # define new spacegroup when symmetry operations were listed, but # there is no match to an existing definition if symop_list and self.spacegroup is None: new_short_name = "CIF " + (sg_nameHall or 'data') new_crystal_system = ( block.get('_space_group_crystal_system') or block.get('_symmetry_cell_setting') or 'TRICLINIC' ).upper() self.spacegroup = SpaceGroup( short_name=new_short_name, crystal_system=new_crystal_system, symop_list=symop_list) if self.spacegroup is None: emsg = "CIF file has unknown space group identifier {!r}." raise StructureFormatError(emsg.format(sgid)) self._expandAsymmetricUnit(block) return def _expandAsymmetricUnit(self, block): """Perform symmetry expansion of self.stru using self.spacegroup. This method updates data in stru and eau. Parameters ---------- block : CifBlock The top-level block containing crystal structure data. """ from diffpy.structure.symmetryutilities import ExpandAsymmetricUnit corepos = [a.xyz for a in self.stru] coreUijs = [a.U for a in self.stru] self.eau = ExpandAsymmetricUnit(self.spacegroup, corepos, coreUijs, eps=self.eps) # setup anisotropy according to symmetry requirements # was isotropy flag already processed isotropy_done = (_hasAtomSiteADPType(block) or '_atom_site_aniso_label' in block) if not isotropy_done: for ca, uisotropy in zip(self.stru, self.eau.Uisotropy): ca.anisotropy = not uisotropy # build a nested list of new atoms: newatoms = [] for i, ca in enumerate(self.stru): eca = [] # expanded core atom for j in range(self.eau.multiplicity[i]): a = Atom(ca) a.xyz = self.eau.expandedpos[i][j] if j > 0: a.label += '_' + str(j + 1) if a.anisotropy: a.U = self.eau.expandedUijs[i][j] eca.append(a) newatoms.append(eca) # insert new atoms where they belong self.stru[:] = sum(newatoms, []) return # conversion to CIF ------------------------------------------------------ def toLines(self, stru): """Convert Structure stru to a list of lines in basic CIF format. Return list of strings. """ import time lines = [] # may be replaced with filtered Structure.title # for now, we can add the title as a comment if stru.title.strip() != "": title_lines = stru.title.split('\n') lines.extend([ "# " + line.strip() for line in title_lines ]) lines.append("") lines.append("data_3D") iso_date = "%04i-%02i-%02i" % time.gmtime()[:3] lines.extend([ "%-31s %s" % ("_audit_creation_date", iso_date), "%-31s %s" % ("_audit_creation_method", "P_cif.py"), "", "%-31s %s" % ("_symmetry_space_group_name_H-M", "'P1'"), "%-31s %s" % ("_symmetry_Int_Tables_number", "1"), "%-31s %s" % ("_symmetry_cell_setting", "triclinic"), "" ]) # there should be no need to specify equivalent positions for P1 # _symmetry_equiv_posi_as_xyz x,y,z lines.extend([ "%-31s %.6g" % ("_cell_length_a", stru.lattice.a), "%-31s %.6g" % ("_cell_length_b", stru.lattice.b), "%-31s %.6g" % ("_cell_length_c", stru.lattice.c), "%-31s %.6g" % ("_cell_angle_alpha", stru.lattice.alpha), "%-31s %.6g" % ("_cell_angle_beta", stru.lattice.beta), "%-31s %.6g" % ("_cell_angle_gamma", stru.lattice.gamma), "" ]) # build a list of site labels and adp (displacement factor) types element_count = {} a_site_label = [] a_adp_type = [] for a in stru: cnt = element_count[a.element] = element_count.get(a.element,0)+1 a_site_label.append( "%s%i" % (a.element, cnt) ) if numpy.all(a.U == a.U[0,0]*numpy.identity(3)): a_adp_type.append("Uiso") else: a_adp_type.append("Uani") # list all atoms lines.extend([ "loop_", " _atom_site_label", " _atom_site_type_symbol", " _atom_site_fract_x", " _atom_site_fract_y", " _atom_site_fract_z", " _atom_site_U_iso_or_equiv", " _atom_site_adp_type", " _atom_site_occupancy" ]) for i in range(len(stru)): a = stru[i] line = " %-5s %-3s %11.6f %11.6f %11.6f %11.6f %-5s %.4f" % ( a_site_label[i], a.element, a.xyz[0], a.xyz[1], a.xyz[2], a.Uisoequiv, a_adp_type[i], a.occupancy ) lines.append(line) # find anisotropic atoms idx_aniso = [ i for i in range(len(stru)) if a_adp_type[i] != "Uiso" ] if idx_aniso != []: lines.extend([ "loop_", " _atom_site_aniso_label", " _atom_site_aniso_U_11", " _atom_site_aniso_U_22", " _atom_site_aniso_U_33", " _atom_site_aniso_U_12", " _atom_site_aniso_U_13", " _atom_site_aniso_U_23" ]) for i in idx_aniso: a = stru[i] line = " %-5s %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f" % ( a_site_label[i], a.U[0,0], a.U[1,1], a.U[2,2], a.U[0,1], a.U[0,2], a.U[1,2] ) lines.append(line) return lines
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure object or raise StructureFormatError. """ xcfg_Number_of_particles = None xcfg_A = None xcfg_H0 = numpy.zeros((3,3), dtype=float) xcfg_H0_set = numpy.zeros((3,3), dtype=bool) xcfg_NO_VELOCITY = False xcfg_entry_count = None p_nl = 0 p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =") p_auxiliary = {} stru = Structure() # ignore trailing blank lines stop = len(lines) for line in reversed(lines): if line.strip(): break stop -= 1 # iterator over the valid data lines ilines = iter(lines[:stop]) try: # read XCFG header for line in ilines: p_nl += 1 stripped_line = line.strip() # blank lines and lines starting with # are ignored if stripped_line == "" or line[0] == '#': continue elif xcfg_Number_of_particles is None: if line.find("Number of particles =") != 0: emsg = ("%d: first line must " + "contain 'Number of particles ='") % p_nl raise StructureFormatError(emsg) xcfg_Number_of_particles = int(line[21:].split(None, 1)[0]) p_natoms = xcfg_Number_of_particles elif line.find("A =") == 0: xcfg_A = float(line[3:].split(None, 1)[0]) elif line.find("H0(") == 0: i, j = (int(line[3]) - 1, int(line[5]) - 1) xcfg_H0[i,j] = float(line[10:].split(None, 1)[0]) xcfg_H0_set[i,j] = True elif line.find(".NO_VELOCITY.") == 0: xcfg_NO_VELOCITY = True elif line.find("entry_count =") == 0: xcfg_entry_count = int(line[13:].split(None, 1)[0]) elif p_auxiliary_re.match(line): m = p_auxiliary_re.match(line) idx = int(m.group(1)) p_auxiliary[idx] = line[m.end():].split(None, 1)[0] else: break # check header for consistency if numpy.any(xcfg_H0_set == False): emsg = "H0 tensor is not properly defined" raise StructureFormatError(emsg) p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys())+1 for i in range(p_auxnum): if not i in p_auxiliary: p_auxiliary[i] = "aux%d" % i sorted_aux_keys = sorted(p_auxiliary.keys()) if p_auxnum != 0: stru.xcfg = { 'auxiliaries' : [ p_auxiliary[k] for k in sorted_aux_keys ] } ecnt = len(p_auxiliary) + (3 if xcfg_NO_VELOCITY else 6) if ecnt != xcfg_entry_count: emsg = ("%d: auxiliary fields are " "not consistent with entry_count") % p_nl raise StructureFormatError(emsg) # define proper lattice stru.lattice.setLatBase(xcfg_H0) # here we are inside the data block p_element = None for line in ilines: p_nl += 1 words = line.split() # ignore atom mass if len(words) == 1 and isfloat(words[0]): continue # parse element allowing empty symbol elif len(words) <= 1: w = line.strip() p_element = w[:1].upper() + w[1:].lower() elif len(words) == xcfg_entry_count and p_element is not None: fields = [float(w) for w in words] xyz = [xcfg_A * xi for xi in fields[:3]] stru.addNewAtom(p_element, xyz=xyz) a = stru[-1] _assign_auxiliaries(a, fields, auxiliaries=p_auxiliary, no_velocity=xcfg_NO_VELOCITY) else: emsg = "%d: invalid record" % p_nl raise StructureFormatError(emsg) if len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: file is not in XCFG format" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure instance or raise StructureFormatError. """ try: stru = Structure() scale = numpy.identity(3, dtype=float) scaleU = numpy.zeros(3, dtype=float) p_nl = 0 for line in lines: p_nl += 1 # skip blank lines if not line.strip(): continue # make sure line has 80 characters if len(line) < 80: line = "%-80s" % line words = line.split() record = words[0] if record == "TITLE": continuation = line[8:10] if continuation.strip(): stru.title += line[10:].rstrip() else: stru.title = line[10:].rstrip() elif record == "CRYST1": a = float(line[7:15]) b = float(line[15:24]) c = float(line[24:33]) alpha = float(line[33:40]) beta = float(line[40:47]) gamma = float(line[47:54]) stru.lattice.setLatPar(a, b, c, alpha, beta, gamma) scale = numpy.transpose(stru.lattice.recbase) elif record == "SCALE1": sc = numpy.zeros((3,3), dtype=float) sc[0,:] = [float(x) for x in line[10:40].split()] scaleU[0] = float(line[45:55]) elif record == "SCALE2": sc[1,:] = [float(x) for x in line[10:40].split()] scaleU[1] = float(line[45:55]) elif record == "SCALE3": sc[2,:] = [float(x) for x in line[10:40].split()] scaleU[2] = float(line[45:55]) base = numpy.transpose(numpy.linalg.inv(sc)) abcABGcryst = numpy.array(stru.lattice.abcABG()) stru.lattice.setLatBase(base) abcABGscale = numpy.array(stru.lattice.abcABG()) reldiff = numpy.fabs(1.0 - abcABGscale/abcABGcryst) if not numpy.all(reldiff < 1.0e-4): emsg = "%d: " % p_nl + \ "SCALE and CRYST1 are not consistent." raise StructureFormatError(emsg) if numpy.any(scaleU != 0.0): emsg = "Origin offset not yet implemented." raise NotImplementedError(emsg) elif record in ("ATOM", "HETATM"): name = line[12:16].strip() rc = [float(x) for x in line[30:54].split()] try: occupancy = float(line[54:60]) except ValueError: occupancy = 1.0 try: B = float(line[60:66]) uiso = B/(8*pi**2) except ValueError: uiso = 0.0 element = line[76:78].strip() if element == "": # get element from the first 2 characters of name element = line[12:14].strip() element = element[0].upper() + element[1:].lower() stru.addNewAtom(element, occupancy=occupancy, label=name) last_atom = stru.getLastAtom() last_atom.xyz_cartn = rc last_atom.Uisoequiv = uiso elif record == "SIGATM": sigrc = [float(x) for x in line[30:54].split()] sigxyz = numpy.dot(scale, sigrc) try: sigo = float(line[54:60]) except ValueError: sigo = 0.0 try: sigB = float(line[60:66]) sigU = numpy.identity(3)*sigB/(8*pi**2) except ValueError: sigU = numpy.zeros((3,3), dtype=float) last_atom.sigxyz = sigxyz last_atom.sigo = sigo last_atom.sigU = sigU elif record == "ANISOU": last_atom.anisotropy = True Uij = [ float(x)*1.0e-4 for x in line[28:70].split() ] Ua = last_atom.U for i in range(3): Ua[i,i] = Uij[i] Ua[0,1] = Ua[1,0] = Uij[3] Ua[0,2] = Ua[2,0] = Uij[4] Ua[1,2] = Ua[2,1] = Uij[5] elif record == "SIGUIJ": sigUij = [ float(x)*1.0e-4 for x in line[28:70].split() ] for i in range(3): last_atom.sigU[i,i] = sigUij[i] last_atom.sigU[0,1] = last_atom.sigU[1,0] = sigUij[3] last_atom.sigU[0,2] = last_atom.sigU[2,0] = sigUij[4] last_atom.sigU[1,2] = last_atom.sigU[2,1] = sigUij[5] elif record in P_pdb.validRecords: pass else: emsg = "%d: invalid record name '%r'" % (p_nl, record) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: invalid PDB record" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
def parseLines(self, lines): """Parse list of lines in XYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # first valid line gives number of atoms try: lfs = linefields[start] w1 = linefields[start][0] if len(lfs) == 1 and str(int(w1)) == w1: p_natoms = int(w1) stru.title = lines[start + 1].strip() start += 2 else: emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg) except (IndexError, ValueError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) # find the last valid record stop = len(lines) while stop > start and len(linefields[stop - 1]) == 0: stop -= 1 # get out for empty structure if p_natoms == 0 or start >= stop: return stru # here we have at least one valid record line nfields = len(linefields[start]) if nfields != 4: emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:]: p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError(emsg) element = fields[0] element = element[0].upper() + element[1:].lower() xyz = [float(f) for f in fields[1:4]] stru.addNewAtom(element, xyz=xyz) except ValueError: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: invalid number format" % p_nl e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) # finally check if all the atoms have been read if p_natoms is not None and len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) return stru
class P_cif(StructureParser): """Simple parser for CIF structure format. Reads Structure from the first block containing _atom_site_label key. Following blocks, if any are ignored. Data members: format -- structure format name ciffile -- instance of CifFile from PyCifRW stru -- Structure instance used for cif input or output Data members used for input only: spacegroup -- instance of SpaceGroup used for symmetry expansion eps -- resolution in fractional coordinates for non-equal positions. Use for expansion of asymmetric unit. eau -- instance of ExpandAsymmetricUnit from SymmetryUtilities asymmetric_unit -- list of atom instances for the original asymmetric unit in the CIF file labelindex -- dictionary mapping unique atom label to index of atom in self.asymmetric_unit cif_sgname -- space group name obtained by looking up the value of _space_group_name_Hall, _symmetry_space_group_name_Hall, _space_group_name_H-M_alt, _symmetry_space_group_name_H-M items. None when neither is defined. """ # static data and methods ------------------------------------------------ # dictionary set of class methods for translating CIF values # to Atom attributes _atom_setters = dict.fromkeys(( '_tr_ignore', '_tr_atom_site_label', '_tr_atom_site_type_symbol', '_tr_atom_site_fract_x', '_tr_atom_site_fract_y', '_tr_atom_site_fract_z', '_tr_atom_site_cartn_x', '_tr_atom_site_cartn_y', '_tr_atom_site_cartn_z', '_tr_atom_site_U_iso_or_equiv', '_tr_atom_site_B_iso_or_equiv', '_tr_atom_site_adp_type', '_tr_atom_site_thermal_displace_type', '_tr_atom_site_occupancy', '_tr_atom_site_aniso_U_11', '_tr_atom_site_aniso_U_22', '_tr_atom_site_aniso_U_33', '_tr_atom_site_aniso_U_12', '_tr_atom_site_aniso_U_13', '_tr_atom_site_aniso_U_23', '_tr_atom_site_aniso_B_11', '_tr_atom_site_aniso_B_22', '_tr_atom_site_aniso_B_33', '_tr_atom_site_aniso_B_12', '_tr_atom_site_aniso_B_13', '_tr_atom_site_aniso_B_23', )) # make _atom_setters case insensitive for k in list(_atom_setters.keys()): _atom_setters[k] = _atom_setters[k.lower()] = k del k BtoU = 1.0/(8 * numpy.pi**2) def _tr_ignore(a, value): return _tr_ignore = staticmethod(_tr_ignore) def _tr_atom_site_label(a, value): a.label = str(value) # set element when not specified by _atom_site_type_symbol if not a.element: P_cif._tr_atom_site_type_symbol(a, value) _tr_atom_site_label = staticmethod(_tr_atom_site_label) # 3 regexp groups for nucleon number, atom symbol, and oxidation state _psymb = re.compile(r'(\d+-)?([a-zA-Z]+)(\d[+-])?') def _tr_atom_site_type_symbol(a, value): rx = P_cif._psymb.match(value) smbl = rx and rx.group(0) or value smbl = str(smbl) a.element = smbl[:1].upper() + smbl[1:].lower() _tr_atom_site_type_symbol = staticmethod(_tr_atom_site_type_symbol) def _tr_atom_site_fract_x(a, value): a.xyz[0] = leading_float(value) _tr_atom_site_fract_x = staticmethod(_tr_atom_site_fract_x) def _tr_atom_site_fract_y(a, value): a.xyz[1] = leading_float(value) _tr_atom_site_fract_y = staticmethod(_tr_atom_site_fract_y) def _tr_atom_site_fract_z(a, value): a.xyz[2] = leading_float(value) _tr_atom_site_fract_z = staticmethod(_tr_atom_site_fract_z) def _tr_atom_site_cartn_x(a, value): a.xyz_cartn[0] = leading_float(value) _tr_atom_site_cartn_x = staticmethod(_tr_atom_site_cartn_x) def _tr_atom_site_cartn_y(a, value): a.xyz_cartn[1] = leading_float(value) _tr_atom_site_cartn_y = staticmethod(_tr_atom_site_cartn_y) def _tr_atom_site_cartn_z(a, value): a.xyz_cartn[2] = leading_float(value) _tr_atom_site_cartn_z = staticmethod(_tr_atom_site_cartn_z) def _tr_atom_site_U_iso_or_equiv(a, value): a.Uisoequiv = leading_float(value) _tr_atom_site_U_iso_or_equiv = staticmethod(_tr_atom_site_U_iso_or_equiv) def _tr_atom_site_B_iso_or_equiv(a, value): a.Uisoequiv = P_cif.BtoU * leading_float(value) _tr_atom_site_B_iso_or_equiv = staticmethod(_tr_atom_site_B_iso_or_equiv) def _tr_atom_site_adp_type(a, value): a.anisotropy = value not in ("Uiso", "Biso") _tr_atom_site_adp_type = staticmethod(_tr_atom_site_adp_type) _tr_atom_site_thermal_displace_type = _tr_atom_site_adp_type def _tr_atom_site_occupancy(a, value): a.occupancy = leading_float(value, 1.0) _tr_atom_site_occupancy = staticmethod(_tr_atom_site_occupancy) def _tr_atom_site_aniso_U_11(a, value): a.U11 = leading_float(value) _tr_atom_site_aniso_U_11 = staticmethod(_tr_atom_site_aniso_U_11) def _tr_atom_site_aniso_U_22(a, value): a.U22 = leading_float(value) _tr_atom_site_aniso_U_22 = staticmethod(_tr_atom_site_aniso_U_22) def _tr_atom_site_aniso_U_33(a, value): a.U33 = leading_float(value) _tr_atom_site_aniso_U_33 = staticmethod(_tr_atom_site_aniso_U_33) def _tr_atom_site_aniso_U_12(a, value): a.U12 = leading_float(value) _tr_atom_site_aniso_U_12 = staticmethod(_tr_atom_site_aniso_U_12) def _tr_atom_site_aniso_U_13(a, value): a.U13 = leading_float(value) _tr_atom_site_aniso_U_13 = staticmethod(_tr_atom_site_aniso_U_13) def _tr_atom_site_aniso_U_23(a, value): a.U23 = leading_float(value) _tr_atom_site_aniso_U_23 = staticmethod(_tr_atom_site_aniso_U_23) def _tr_atom_site_aniso_B_11(a, value): a.U11 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_11 = staticmethod(_tr_atom_site_aniso_B_11) def _tr_atom_site_aniso_B_22(a, value): a.U22 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_22 = staticmethod(_tr_atom_site_aniso_B_22) def _tr_atom_site_aniso_B_33(a, value): a.U33 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_33 = staticmethod(_tr_atom_site_aniso_B_33) def _tr_atom_site_aniso_B_12(a, value): a.U12 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_12 = staticmethod(_tr_atom_site_aniso_B_12) def _tr_atom_site_aniso_B_13(a, value): a.U13 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_13 = staticmethod(_tr_atom_site_aniso_B_13) def _tr_atom_site_aniso_B_23(a, value): a.U23 = P_cif.BtoU * leading_float(value) _tr_atom_site_aniso_B_23 = staticmethod(_tr_atom_site_aniso_B_23) def _get_atom_setters(cifloop): """Find translators of CifLoop items to data in Atom instance. Static method. cifloop -- instance of CifLoop Return a list of setter functions in the order of cifloop.keys(). """ rv = [] for p in cifloop.keys(): lcname = "_tr" + p.lower() fncname = P_cif._atom_setters.get(lcname, '_tr_ignore') f = getattr(P_cif, fncname) rv.append(f) return rv _get_atom_setters = staticmethod(_get_atom_setters) # normal methods --------------------------------------------------------- def __init__(self, eps=None): """Initialize the parser for CIF structure files. eps -- fractional coordinates cutoff for duplicate positions. When None use the default for ExpandAsymmetricUnit. """ StructureParser.__init__(self) self.format = "cif" self.ciffile = None self.stru = None self.spacegroup = None self.eps = eps self.eau = None self.asymmetric_unit = None self.labelindex = {} self.cif_sgname = None pass def parse(self, s): """Create Structure instance from a string in CIF format. Return Structure instance or raise StructureFormatError. """ self.ciffile = None self.filename = '' fp = six.StringIO(s) rv = self._parseCifDataSource(fp) return rv def parseLines(self, lines): """Parse list of lines in CIF format. lines -- list of strings stripped of line terminator Return Structure instance or raise StructureFormatError. """ s = "\n".join(lines) + '\n' return self.parse(s) def parseFile(self, filename): """Create Structure from an existing CIF file. filename -- path to structure file Return Structure object. Raise StructureFormatError or IOError. """ self.ciffile = None self.filename = filename fileurl = _fixIfWindowsPath(filename) rv = self._parseCifDataSource(fileurl) # all good here return rv def _parseCifDataSource(self, datasource): """\ Open and process CIF data from the specified `datasource`. Parameters ---------- datasource : str or a file-like object This is used as an argument to the CifFile class. The CifFile instance is stored in `ciffile` attribute of this Parser. Returns ------- Structure The Structure object loaded from the specified data source. Raises ------ StructureFormatError When the data do not constitute a valid CIF format. """ from CifFile import CifFile, StarError self.stru = None try: with _suppressCifParserOutput(): self.ciffile = CifFile(datasource) for blockname in self.ciffile.keys(): self._parseCifBlock(blockname) # stop after reading the first structure if self.stru is not None: break except (StarError, ValueError, IndexError) as err: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = str(err).strip() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return self.stru def _parseCifBlock(self, blockname): """Translate CIF file block, skip blocks without _atom_site_label. Updates data members stru, eau. blockname -- name of top level block in self.ciffile No return value. """ block = self.ciffile[blockname] if '_atom_site_label' not in block: return # here block contains structure, initialize output data self.stru = Structure() self.labelindex.clear() # execute specialized block parsers self._parse_lattice(block) self._parse_atom_site_label(block) self._parse_atom_site_aniso_label(block) self._parse_space_group_symop_operation_xyz(block) return def _parse_lattice(self, block): """Obtain lattice parameters from a CifBlock. This method updates self.stru.lattice. block -- instance of CifBlock No return value. """ if '_cell_length_a' not in block: return # obtain lattice parameters try: latpars = ( leading_float(block['_cell_length_a']), leading_float(block['_cell_length_b']), leading_float(block['_cell_length_c']), leading_float(block['_cell_angle_alpha']), leading_float(block['_cell_angle_beta']), leading_float(block['_cell_angle_gamma']), ) except KeyError as err: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = str(err) e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) self.stru.lattice = Lattice(*latpars) return def _parse_atom_site_label(self, block): """Obtain atoms in asymmetric unit from a CifBlock. This method inserts Atom instances to self.stru and updates labelindex dictionary. block -- instance of CifBlock No return value. """ # process _atom_site_label atom_site_loop = block.GetLoop('_atom_site_label') # get a list of setters for atom_site values prop_setters = P_cif._get_atom_setters(atom_site_loop) # index of the _atom_site_label item for the labelindex dictionary ilb = atom_site_loop.keys().index('_atom_site_label') # loop through the values and pass them to the setters sitedatalist = zip(*atom_site_loop.values()) for values in sitedatalist: curlabel = values[ilb] # skip entries that have invalid label if curlabel == '?': continue self.labelindex[curlabel] = len(self.stru) self.stru.addNewAtom() a = self.stru.getLastAtom() for fset, val in zip(prop_setters, values): fset(a, val) return def _parse_atom_site_aniso_label(self, block): """Obtain value of anisotropic thermal displacements from a CifBlock. This method updates U members of Atom instances in self.stru. The labelindex dictionary has to be defined beforehand. block -- instance of CifBlock No return value. """ if '_atom_site_aniso_label' not in block: return # was anisotropy set in the _atom_site_label loop? atom_site_loop = block.GetLoop('_atom_site_label') anisotropy_already_set = ( '_atom_site_adp_type' in atom_site_loop or '_atom_site_thermal_displace_type' in atom_site_loop) # something to do here: adp_loop = block.GetLoop('_atom_site_aniso_label') # index of the _atom_site_label column ilb = adp_loop.keys().index('_atom_site_aniso_label') # get a list of setters for this loop prop_setters = P_cif._get_atom_setters(adp_loop) sitedatalist = zip(*adp_loop.values()) for values in sitedatalist: idx = self.labelindex[values[ilb]] a = self.stru[idx] if not anisotropy_already_set: a.anisotropy = True for fset, val in zip(prop_setters, values): fset(a, val) return def _parse_space_group_symop_operation_xyz(self, block): """Process symmetry operations from a CifBlock. The method updates spacegroup and eau data according to symmetry operations defined in _space_group_symop_operation_xyz or _symmetry_equiv_pos_as_xyz items in CifBlock. block -- instance of CifBlock No return value. """ from diffpy.structure.spacegroups import IsSpaceGroupIdentifier from diffpy.structure.spacegroups import SpaceGroup, GetSpaceGroup self.asymmetric_unit = list(self.stru) sym_synonyms = ('_space_group_symop_operation_xyz', '_symmetry_equiv_pos_as_xyz') sym_loop_name = [n for n in sym_synonyms if n in block] # recover explicit list of symmetry operations symop_list = [] if sym_loop_name: # sym_loop exists here and we know its cif name sym_loop_name = sym_loop_name[0] sym_loop = block.GetLoop(sym_loop_name) for eqxyz in sym_loop[sym_loop_name]: opcif = getSymOp(eqxyz) symop_list.append(opcif) # determine space group number sg_nameHall = (block.get('_space_group_name_Hall', '') or block.get('_symmetry_space_group_name_Hall', '')) sg_nameHM = (block.get('_space_group_name_H-M_alt', '') or block.get('_symmetry_space_group_name_H-M', '')) self.cif_sgname = (sg_nameHall or sg_nameHM or None) sgid = (int(block.get('_space_group_IT_number', '0')) or int(block.get('_symmetry_Int_Tables_number', '0')) or sg_nameHM) # try to reuse existing space group self.spacegroup = None if sgid and IsSpaceGroupIdentifier(sgid): sgstd = GetSpaceGroup(sgid) oprep_std = [str(op) for op in sgstd.iter_symops()] oprep_std.sort() oprep_cif = [str(op) for op in symop_list] oprep_cif.sort() # make sure symmetry operations have the same order if oprep_std == oprep_cif: self.spacegroup = copy.copy(sgstd) self.spacegroup.symop_list = symop_list # use standard definition when symmetry operations were not listed elif not symop_list: self.spacegroup = sgstd # define new spacegroup when symmetry operations were listed, but # there is no match to an existing definition if symop_list and self.spacegroup is None: new_short_name = "CIF " + (sg_nameHall or 'data') new_crystal_system = ( block.get('_space_group_crystal_system') or block.get('_symmetry_cell_setting') or 'TRICLINIC' ).upper() self.spacegroup = SpaceGroup( short_name=new_short_name, crystal_system=new_crystal_system, symop_list=symop_list) if self.spacegroup is None: emsg = "CIF file has unknown space group identifier {!r}." raise StructureFormatError(emsg.format(sgid)) self._expandAsymmetricUnit() return def _expandAsymmetricUnit(self): """Perform symmetry expansion of self.stru using self.spacegroup. This method updates data in stru and eau. No return value. """ from diffpy.structure.symmetryutilities import ExpandAsymmetricUnit # get reverse-ordered unique indices corepos = [a.xyz for a in self.stru] coreUijs = [a.U for a in self.stru] self.eau = ExpandAsymmetricUnit(self.spacegroup, corepos, coreUijs, eps=self.eps) # build a nested list of new atoms: newatoms = [] for i, ca in enumerate(self.stru): eca = [] # expanded core atom for j in range(self.eau.multiplicity[i]): a = Atom(ca) a.xyz = self.eau.expandedpos[i][j] if j > 0: a.label += '_' + str(j + 1) if a.anisotropy: a.U = self.eau.expandedUijs[i][j] eca.append(a) newatoms.append(eca) # insert new atoms where they belong self.stru[:] = sum(newatoms, []) return # conversion to CIF ------------------------------------------------------ def toLines(self, stru): """Convert Structure stru to a list of lines in basic CIF format. Return list of strings. """ import time lines = [] # may be replaced with filtered Structure.title # for now, we can add the title as a comment if stru.title.strip() != "": title_lines = stru.title.split('\n') lines.extend([ "# " + line.strip() for line in title_lines ]) lines.append("") lines.append("data_3D") iso_date = "%04i-%02i-%02i" % time.gmtime()[:3] lines.extend([ "%-31s %s" % ("_audit_creation_date", iso_date), "%-31s %s" % ("_audit_creation_method", "P_cif.py"), "", "%-31s %s" % ("_symmetry_space_group_name_H-M", "'P1'"), "%-31s %s" % ("_symmetry_Int_Tables_number", "1"), "%-31s %s" % ("_symmetry_cell_setting", "triclinic"), "" ]) # there should be no need to specify equivalent positions for P1 # _symmetry_equiv_posi_as_xyz x,y,z lines.extend([ "%-31s %.6g" % ("_cell_length_a", stru.lattice.a), "%-31s %.6g" % ("_cell_length_b", stru.lattice.b), "%-31s %.6g" % ("_cell_length_c", stru.lattice.c), "%-31s %.6g" % ("_cell_angle_alpha", stru.lattice.alpha), "%-31s %.6g" % ("_cell_angle_beta", stru.lattice.beta), "%-31s %.6g" % ("_cell_angle_gamma", stru.lattice.gamma), "" ]) # build a list of site labels and adp (displacement factor) types element_count = {} a_site_label = [] a_adp_type = [] for a in stru: cnt = element_count[a.element] = element_count.get(a.element,0)+1 a_site_label.append( "%s%i" % (a.element, cnt) ) if numpy.all(a.U == a.U[0,0]*numpy.identity(3)): a_adp_type.append("Uiso") else: a_adp_type.append("Uani") # list all atoms lines.extend([ "loop_", " _atom_site_label", " _atom_site_type_symbol", " _atom_site_fract_x", " _atom_site_fract_y", " _atom_site_fract_z", " _atom_site_U_iso_or_equiv", " _atom_site_adp_type", " _atom_site_occupancy" ]) for i in range(len(stru)): a = stru[i] line = " %-5s %-3s %11.6f %11.6f %11.6f %11.6f %-5s %.4f" % ( a_site_label[i], a.element, a.xyz[0], a.xyz[1], a.xyz[2], a.Uisoequiv, a_adp_type[i], a.occupancy ) lines.append(line) # find anisotropic atoms idx_aniso = [ i for i in range(len(stru)) if a_adp_type[i] != "Uiso" ] if idx_aniso != []: lines.extend([ "loop_", " _atom_site_aniso_label", " _atom_site_aniso_U_11", " _atom_site_aniso_U_22", " _atom_site_aniso_U_33", " _atom_site_aniso_U_12", " _atom_site_aniso_U_13", " _atom_site_aniso_U_23" ]) for i in idx_aniso: a = stru[i] line = " %-5s %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f" % ( a_site_label[i], a.U[0,0], a.U[1,1], a.U[2,2], a.U[0,1], a.U[0,2], a.U[1,2] ) lines.append(line) return lines
def parseLines(self, lines): """Parse list of lines in XYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # first valid line gives number of atoms try: lfs = linefields[start] w1 = linefields[start][0] if len(lfs) == 1 and str(int(w1)) == w1: p_natoms = int(w1) stru.title = lines[start+1].strip() start += 2 else: emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg) except (IndexError, ValueError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) # find the last valid record stop = len(lines) while stop > start and len(linefields[stop-1]) == 0: stop -= 1 # get out for empty structure if p_natoms == 0 or start >= stop: return stru # here we have at least one valid record line nfields = len(linefields[start]) if nfields != 4: emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:] : p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError(emsg) element = fields[0] element = element[0].upper() + element[1:].lower() xyz = [ float(f) for f in fields[1:4] ] stru.addNewAtom(element, xyz=xyz) except ValueError: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: invalid number format" % p_nl e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) # finally check if all the atoms have been read if p_natoms is not None and len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) return stru