def to_atoms(self): #check if necessary input there & all makes sense if self.has_key('GEOMETRY'): block = self['geometry='] #now create atoms = Atoms(n=len(block)) field_list = [line.strip() for line in block] field_list = [line.split() for line in block] #Way this currently works any labels will be lost #if want to use these for molecule specification #will have to do something more clever #label = re.compile('[0-9]') #field_list = map(label.split, field_list[0]) elements = map(operator.itemgetter(0), field_list) #Look up elements by atomic number elements = [ not el.isdigit() and atomic_number(el) or el for el in elements ] #Transfer positions to Atoms object # Set the element and pos data atoms.set_atoms(elements) #Elements still needs to be defined, farray is a function atoms.pos[:,:] = farray([ [float(x) for x in row] \ for row in [field[1:4] for field in field_list]]).T return atoms
def read_xml_output(xmlfile,energy_from=None, extract_forces=False, extract_dipole=False, datafile=None, cluster=None): #parse an xml output file and return cluster with updated info # datafile tells which energies, forces to look for, cluster Atoms object which gets returned, this is echoed in the xml file so can be left out # If extract_forces is not given and the FORCE keyword is found in datafile, the default is to set extract_forces=True log = logging.getLogger('molpro_driver') if datafile is None: datafile=MolproDatafile(xml=xmlfile) if 'FORCE' in datafile: extract_forces=True energy_names = OrderedDict() energy_names['CCSD(T)-F12'] = ["total energy"] energy_names['CCSD(T)'] = ["total energy"] energy_names['MP2'] = ["total energy"] energy_names['DF-MP2'] = ["total energy"] energy_names['DF-RMP2'] = ["energy"] energy_names['RKS'] = ["Energy"] energy_names['RHF'] = ["Energy"] energy_names['DF-RHF'] = ["Energy"] energy_names['HF'] = ["Energy"] energy_names['DF-HF'] = ["Energy"] #etc gradient_names = OrderedDict() gradient_names['CCSD(T)'] =[""] gradient_names['RKS'] =['RKS GRADIENT'] gradient_names['MP2'] =['MP2 GRADIENT'] all_methods=OrderedDict() all_methods['HF']=["RHF"] all_methods['DF-HF']=["RHF"] all_methods['RHF']=["RHF"] all_methods['DF-RHF']=["RHF"] all_methods['MP2']=["MP2"] all_methods['DF-MP2']=["MP2"] all_methods['DF-RMP2']=["DF-RMP2"] all_methods['RKS']=["RKS"] all_methods['CCSD(T)-F12']=["CCSD(T)-F12a","CCSD(T)-F12b"] all_methods['CCSD(T)']=["CCSD(T)"] if energy_from is None: log.critical("don't know which energy to extract, use keyword energy_from with options "+str([all_methods[k] for k in iter(all_methods)]).replace('[','').replace(']','')) #loop through datafile to look for methods. calcs=[] #holds the keys for getting correct method, energy_name, gradient_name data_keys_upper = [key.upper() for key in datafile._keys] for key in all_methods._keys: if key in data_keys_upper: calcs.append(key) dom = minidom.parse(xmlfile) elements=[] position_matrix=[] cml = dom.documentElement.getElementsByTagName('cml:atomArray') for l in cml[0].childNodes: if l.nodeType== 1: element=l.attributes['elementType'].value.encode('ascii','ignore') elements.append(atomic_number(element)) posx = l.attributes['x3'].value.encode('ascii','ignore') posy = l.attributes['y3'].value.encode('ascii','ignore') posz = l.attributes['z3'].value.encode('ascii','ignore') position_matrix.append([float(posx),float(posy),float(posz)]) if cluster is None: cluster = Atoms(n=len(elements)) cluster.set_atoms(elements) position_matrix=farray(position_matrix).T if not 'ANGSTROM' in datafile._keys and not 'angstrom' in datafile._keys: position_matrix = position_matrix * (1.0/0.529177249) cluster.pos[:,:]=position_matrix #note this leaves the lattice undefined #now look for each of these energies in xml file energy_found=False props = dom.documentElement.getElementsByTagName('property') for prop in props: prop_name = prop.attributes['name'].value.encode('ascii','ignore') prop_method = prop.attributes['method'].value.encode('ascii','ignore') for calc in calcs: if prop_name in energy_names[calc] and prop_method in all_methods[calc]: energy_param_name="_".join([prop_method,prop_name]) energy_param_name=energy_param_name.replace(" ","_") #log.info("found "+energy_param_name) # dated routines for finding monomer pairs, triplets in Topology module energy_param=prop.attributes['value'].value.encode('ascii','ignore') my_energy=energy_param_name i_en=1 while my_energy in cluster.params.iterkeys(): i_en+=1 my_energy='_'.join([energy_param_name,str(i_en)]) cluster.params[my_energy] = float(energy_param) * HARTREE if prop_method == energy_from: cluster.params['Energy']=float(energy_param) * HARTREE energy_found=True elif extract_dipole and prop_name=='Dipole moment': dipole_param_name="_".join([prop_method,prop_name]) dipole_param_name=dipole_param_name.replace(" ","_") log.info("found dipole moment: "+dipole_param_name) dipole_param=prop.attributes['value'].value.encode('ascii','ignore') cluster.params[dipole_param_name]=dipole_param if not energy_found: log.critical("couldn't find energy from "+energy_from+" prop method : "+prop_method) # read gradients if requested if extract_forces: if not cluster.has_property('force'): cluster.add_property('force', 0.0, n_cols=3) grads = dom.documentElement.getElementsByTagName('gradient') force_matrix = grads[0].childNodes[0].data.split('\n') force_matrix = [str(i).split() for i in force_matrix] for i in force_matrix: try: force_matrix.remove([]) except ValueError: break force_matrix = [[(-1.0 * HARTREE / BOHR) * float(j) for j in i] for i in force_matrix] cluster.force[:] =farray(force_matrix).T if len(grads) != 1: for k in range(1,len(grads)): my_force='force%s'%str(k+1) force_matrix = grads[k].childNodes[0].data.split('\n') force_matrix = [str(i).split() for i in force_matrix] for i in force_matrix: try: force_matrix.remove([]) except ValueError: break force_matrix = [[(-1.0 * HARTREE / BOHR) * float(j) for j in i] for i in force_matrix] cluster.add_property(my_force,farray(force_matrix).T) return cluster
def param_to_xml(params, encoding='iso-8859-1'): from xml.sax.saxutils import XMLGenerator from StringIO import StringIO output = StringIO() xml = XMLGenerator(output, encoding) xml.startDocument() xml.startElement( 'TS_params', { 'cutoff': ' '.join([str(x) for x in params['rcut']]), 'n_types': str(params['nspecies']), 'betapol': str(params['betapol']), 'maxipol': str(params['maxipol']), 'tolpol': str(params['tolpol']), 'pred_order': str(params['pred_order']), 'yukalpha': str(params['yukalpha']), 'yuksmoothlength': str(params['yuksmoothlength']), 'tewald': params['tewald'] and 'T' or 'F', 'raggio': str(params['raggio']), 'a_ew': str(params['a_ew']), 'gcut': str(params['gcut']), 'iesr': ' '.join([str(x) for x in params.get('iesr', [0, 0, 0])]) }) ti_tj_to_index = {} n = 0 for ti in range(params['nspecies']): for tj in range(params['nspecies']): if tj > ti: continue ti_tj_to_index[(ti, tj)] = n n += 1 for ti in range(params['nspecies']): zi = atomic_number(params['species'][ti]) xml.startElement( 'per_type_data', { 'type': str(ti + 1), 'atomic_num': str(zi), 'pol': str(params['pol'][ti]), 'z': str(params['z'][ti]) }) xml.endElement('per_type_data') for tj in range(params['nspecies']): if tj > ti: continue idx = ti_tj_to_index[(ti, tj)] zj = atomic_number(params['species'][tj]) xml.startElement( 'per_pair_data', { 'atnum_i': str(zi), 'atnum_j': str(zj), 'D_ms': str(params['d_ms'][idx]), 'gamma_ms': str(params['gamma_ms'][idx]), 'R_ms': str(params['r_ms'][idx]), 'B_pol': str(params['bpol'][idx]), 'C_pol': str(params['cpol'][idx]), }) xml.endElement('per_pair_data') xml.endElement('TS_params') xml.endDocument() return output.getvalue()
def PosCelReader(basename=None, pos='pos.in', cel='cel.in', force='force.in', energy='energy.in', stress='stress.in', species_map={ 'O': 1, 'Si': 2 }, cel_angstrom=False, pos_angstrom=False, rydberg=True, format=None): if basename is not None: basename = os.path.splitext(basename)[0] pos = '%s.pos' % basename cel = '%s.cel' % basename energy = '%s.ene' % basename stress = '%s.str' % basename force = '%s.for' % basename doenergy = os.path.exists(energy) doforce = os.path.exists(force) dostress = os.path.exists(stress) if isinstance(pos, str): pos = open(pos) if isinstance(cel, str): cel = open(cel) if doenergy and isinstance(energy, str): energy = open(energy) if doforce and isinstance(force, str): force = open(force) if dostress and isinstance(stress, str): stress = open(stress) pos = iter(pos) cel = iter(cel) if doenergy: energy = iter(energy) if doforce: force = iter(force) if dostress: stress = iter(stress) pos.next() # throw away blank line at start if doforce: force.next() rev_species_map = dict(zip(species_map.values(), species_map.keys())) while True: poslines = list( itertools.takewhile( lambda L: L.strip() != '' and not L.strip().startswith('STEP'), pos)) if poslines == []: break cellines = list(itertools.islice(cel, 4)) #lattice = farray([ [float(x) for x in L.split()] for L in cellines[1:4] ]).T lattice = fzeros((3, 3)) for i in (1, 2, 3): lattice[:, i] = [float(x) for x in cellines[i].split()] if not cel_angstrom: lattice *= BOHR at = Atoms(n=len(poslines), lattice=lattice) at.pos[:] = farray([[float(x) for x in L.split()[0:3]] for L in poslines]).T if not pos_angstrom: at.pos[:] *= BOHR species = [rev_species_map[int(L.split()[3])] for L in poslines] elements = [ not el.isdigit() and atomic_number(el) or el for el in species ] at.set_atoms(elements) if doenergy: at.params['energy'] = float(energy.next().split()[0]) if rydberg: at.params['energy'] *= RYDBERG if dostress: stress_lines = list(itertools.islice(stress, 4)) virial = farray([[float(x) for x in L.split()] for L in stress_lines[1:4]]) virial *= at.cell_volume() / (10.0 * GPA) at.params['virial'] = virial if doforce: at.add_property('force', 0.0, n_cols=3) force_lines = list( itertools.takewhile(lambda L: L.strip() != '', force)) if len(force_lines) != at.n: raise ValueError("len(force_lines) (%d) != at.n (%d)" % (len(force_lines), at.n)) at.force[:] = farray([[float(x) for x in L.split()[0:3]] for L in force_lines]).T if rydberg: at.force[:] *= RYDBERG / BOHR yield at