def parse_poscar(file): ''' Parse Vasp's POSCAR file and return a Crys object ''' c = Crys() junk_title = file.readline() scaling_factor = float(file.readline().split([0])) for i in range(3): data = file.readline().split() for j in range(3): c.matrix[i][j] = float(data[j]) * scaling_factor poscar_atomtypes = file.readline().split() poscar_atomnumbers = file.readline().split() for i in range(len(poscar_atomtypes)): for j in range(int(poscar_atomnumbers[i])): c.atom_type.append(poscar_atomtypes[i]) c.natom = len(c.atom_type) coord_type = file.readline().split()[0] if coord_type.lower() == 'direct': for i in range(c.natom): data = file.readline().split() c.atom_fract.append( [float(data[0]), float(data[1]), float(data[2])]) elif coord_type.lower() == 'cartesian': for i in range(c.natom): data = file.readline().split() c.atom_xyz.append([float(data[0]), float(data[1]), float(data[2])]) return c
def parse_pdb(file): ''' Parse .pdb file and return Crys object ''' c = Crys() while True: line = file.readline() data = line.split() if line == "": break elif len(data) > 0 and (data[0] == 'END' or data[0] == 'ENDMDL'): break elif len(data) > 0 and data[0] == 'CRYST1': c.length = [ float(line[0o6:15]), float(line[15:24]), float(line[24:33]) ] c.angle_deg = [ float(line[33:40]), float(line[40:47]), float(line[47:54]) ] elif len(data) > 0 and (data[0] == "ATOM" or data[0] == "HETATM"): c.atom_type.append(data[2]) #maybe read to data[-1] c.atom_xyz.append( [float(line[30:38]), float(line[38:46]), float(line[46:54])]) return c
def parse_xyz_tm3(file): ''' Parse .xyz file (tailor made #3) and return Crys object ''' c = Crys() junk = file.readline() #the first line has the filename data = file.readline().split() c.length = [float(data[1]), float(data[2]), float(data[3])] ac.angle_deg = [float(data[4]), float(data[5]), float(data[6])] c.natom = int(file.readline().split()[0]) for i in range(c.natom): data = file.readline().split() c.atom_type.append(data[0]) c.atom_fract.append([float(data[1]), float(data[2]), float(data[3])]) if len(data) > 4: # if not, the Qeq method crashed: no charge charge.append(float(data[5])) return c
def parse_cssr(file): ''' Parse .cssr file and return a Crys object ''' # File format description: http://www.chem.cmu.edu/courses/09-560/docs/msi/modenv/D_Files.html#944777 c = Crys() data = file.readline().split() c.length = [float(data[0]), float(data[1]), float(data[2])] data = file.readline().split() c.angle_deg = [float(data[0]), float(data[1]), float(data[2])] c.natom = int(file.readline().split()[0]) junk = file.readline() for i in range(c.natom): data = file.readline().split() c.atom_type.append(data[1]) c.atom_fract.append([float(data[2]), float(data[3]), float(data[4])]) if len(data) == 14: c.atom_charge.append(float(data[13])) return c
def parse_car(file): ''' Parse .car file and return Crys object. TESTED just for Chongli Zhong's hCOF database.''' c = Crys() read_coord = False while True: line = file.readline() data = line.split() if line == "": break elif len(data) > 0 and data[0] == 'end': break elif len(data) > 0 and data[0] == 'PBC': c.length = [float(data[1]), float(data[2]), float(data[3])] c.angle_deg = [float(data[4]), float(data[5]), float(data[6])] read_coord = True elif read_coord: c.atom_type.append(data[0]) c.atom_xyz.append([float(data[1]), float(data[2]), float(data[3])]) return c
def parse_cube(file): ''' Parse .cube file and return a Crys object ''' c = Crys() junk = file.readline() #header1 junk = file.readline() #header2 data = file.readline().split() c.natom = int(data[0]) for i in range(3): data = file.readline().split() for j in range(3): c.matrix[i][j] = float(data[0]) * float(data[j]) / ANGS2BOHR for i in range(c.natom): data = file.readline().split() # convert from atomic number to element c.atom_type.append(ptab_atnum_inv[int(data[0])]) c.atom_xyz.append([ float(data[2]) / ANGS2BOHR, float(data[3]) / ANGS2BOHR, float(data[4]) / ANGS2BOHR ]) return c
def parse_axsf(file): ''' Parse .axsf and .xsf files and return a Crys object ''' c = Crys() while True: if file.readline().split()[0] == 'PRIMVEC': break for i in range(3): data = file.readline().split() for j in range(3): c.matrix[i][j] = float(data[j]) while True: if file.readline().split()[0] == 'PRIMCOORD': break c.natom = int(file.readline().split()[0]) for i in range(c.natom): data = file.readline().split() # In this format the atom type can be given as element or atomic number if is_number(data[0]): # convert from atomic number to element c.atom_type.append(ptab_atnum_inv[data[0]]) else: c.atom_type.append(data[0]) c.atom_xyz.append([float(data[1]), float(data[2]), float(data[3])]) return c
def parse_xyz(file): ''' Parse .xyz file and return Crys object ''' c = Crys() c.natom = int(file.readline().split()[0]) data = file.readline().split() if len(data) >= 7 and data[0] == 'CELL:': c.length = [float(data[1]), float(data[2]), float(data[3])] c.angle_deg = [float(data[4]), float(data[5]), float(data[6])] elif len(data) >= 10 and data[0] == 'cell:': c.matrix[0] = [float(data[1]), float(data[2]), float(data[3])] c.matrix[1] = [float(data[4]), float(data[5]), float(data[6])] c.matrix[2] = [float(data[7]), float(data[8]), float(data[9])] elif len(data) >= 23 and data[0] == 'jmolscript:': #Chargemol c.matrix[0] = [float(data[10]), float(data[11]), float(data[12])] c.matrix[1] = [float(data[15]), float(data[16]), float(data[17])] c.matrix[2] = [float(data[20]), float(data[21]), float(data[22])] elif len(data) >= 9 and data[0].split("\"")[0] == "Lattice=": #ASE c.matrix[0] = [ float(data[0].split("\"")[1]), float(data[1]), float(data[2]) ] c.matrix[1] = [float(data[3]), float(data[4]), float(data[5])] c.matrix[2] = [ float(data[6]), float(data[7]), float(data[8].split('\"')[0]) ] else: sys.exit('WARNING: xyz file with no cell provided! EXIT.') for i in range(c.natom): data = file.readline().split() c.atom_type.append(data[0]) c.atom_xyz.append([float(data[1]), float(data[2]), float(data[3])]) if len(data) == 5: # if there is an extra column read it as charge c.atom_charge.append(float(data[4])) return c
def parse_pwo(file): ''' Parse Quantum Espresso's .pwo and .pwi files and return Crys object ''' c = Crys() # Parse cell: # search for the last time the cell/coord are printed and jump to that # line (no need to be converged). ONLY if they are not found, it reads the # initial input cell with file as myFile: for num, line in enumerate(myFile, 1): if 'CELL_PARAMETERS' in line: cell_line = num file.seek(0) if 'cell_line' in locals(): #read cell in vc-relax calculation for i in range(0, cell_line): skip = file.readline() #title line for i in range(3): data = file.readline().split() for j in range(3): c.matrix[i][j] = float(data[j]) else: #read cell in scf or relax calculation while True: data = file.readline().split() if len(data) > 0 and (data[0] == "celldm(1)="): celldm1 = float(data[1]) / ANGS2BOHR skip = file.readline().split() skip = file.readline().split() skip = file.readline().split() for i in range(3): data = file.readline().split() for j in range(3): c.matrix[i][j] = float(data[3 + j]) * celldm1 break file.seek(0) # Parse atomic coordinates with file as myFile: for num, line in enumerate(myFile, 1): if 'ATOMIC_POSITIONS' in line: atomicpositions_line = num if line.split()[1] == 'angstrom' \ or line.split()[1] == '(angstrom)': readfractional = False elif line.split()[1] == 'crystal' \ or line.split()[1] == '(crystal)': readfractional = True file.seek(0) if 'atomicpositions_line' in locals(): #read atomic in vc-relax and relax. for i in range(0, atomicpositions_line): skip = file.readline() i = 0 while True: data = file.readline().split() if len(data) < 4: #if the coordinates are finished, break break else: c.atom_type.append(data[0]) if readfractional: c.atom_fract.append( [float(data[1]), float(data[2]), float(data[3])]) else: c.atom_xyz.append( [float(data[1]), float(data[2]), float(data[3])]) else: #read atomic in scf calculation while True: data = file.readline().split() if len(data) > 0 and (data[0] == "celldm(1)="): celldm1 = float(data[1]) / ANGS2BOHR if len(data) > 3 and (data[3] == "positions"): while True: data = file.readline().split() if len(data) < 10: #if the file is finished stop break else: c.atom_type.append(data[1]) c.atom_xyz.append( [float(data[6]), float(data[7]), float(data[8])]) break return c
def parse_cp2k(file): ''' Parse any CP2K input file and return a Crys object. Constraints: - &CELL should be before &COORD - can read both A B C (cell) and ABC ALPHA_BETA_GAMMA (CELL) - it complains and exit if units are not [angstrom] and [deg] - if SCALED coord, the flag should be before the fract coordinates ''' c = Crys() while True: data = file.readline().split() # Read cell: A B C cell_dict = {"A": 0, "B": 1, "C": 2} if len(data) > 0 and data[0] in cell_dict: if data[1][0] != "[": # No unit specified. Default: Angstrom. shift = 0 elif data[1].lower() == "[angstrom]": shift = 1 else: sys.exit("WARNING: in parsing CP2K, weird units. EXIT") for i in range(3): c.matrix[cell_dict[data[0]]][i] = float(data[1 + i + shift]) # Read CELL: ABC if len(data) > 3 and data[0] == 'ABC': if data[1][0] != "[": # No unit specified. Default: Angstrom. shift = 0 elif data[1].lower() == "[angstrom]": shift = 1 else: sys.exit("WARNING: in parsing CP2K, weird units. EXIT") c.length[0] = float(data[1 + shift]) c.length[1] = float(data[2 + shift]) c.length[2] = float(data[3 + shift]) # Read CELL: ALPHA_BETA_GAMMA if len(data) > 3 and data[0] == 'ALPHA_BETA_GAMMA': if data[1][0] != "[": # No unit specified. Default: deg. shift = 0 elif data[1].lower() == "[deg]": shift = 1 else: sys.exit("WARNING: in parsing CP2K, weird units. EXIT") c.angle_deg[0] = float(data[1 + shift]) c.angle_deg[1] = float(data[2 + shift]) c.angle_deg[2] = float(data[3 + shift]) if len(data) > 0 and (data[0] == "&COORD"): break scaled_coord = False #Default while True: data = file.readline().split() if data[0] == "SCALED" \ and (len(data)==1 or data[1].lower() in ["t", "true", ".true."]): scaled_coord = True elif data[0] == "SCALED" \ and data[1].lower() in ["f", "false", ".false."]: scaled_coord = False elif data[0] == "&END": # End of &COORD section break elif len(data) > 0: c.atom_type.append(data[0]) if scaled_coord: c.atom_fract.append( [float(data[1]), float(data[2]), float(data[3])]) else: c.atom_xyz.append( [float(data[1]), float(data[2]), float(data[3])]) return c
def parse_cif(file): ''' Parse .cif file and return a Crys object. Constraints: - only valid for P1 symmetry - cell data can be specified before/after the atom coordinates - it stops reading coordinates when it find a line with less than 4 columns (so no interruptions must exist) ''' c = Crys() reading_coord = False while True: line = file.readline() data = line.split() if line == "": #EOF break if len(data) > 0 \ and len(data[0].split("_")) > 1 \ and 'cell' in data[0].split("_"): reading_coord = False if data[0] == "_cell_length_a": c.length[0] = parsefloat(data[1]) elif data[0] == "_cell_length_b": c.length[1] = parsefloat(data[1]) elif data[0] == "_cell_length_c": c.length[2] = parsefloat(data[1]) elif data[0] == "_cell_angle_alpha": c.angle_deg[0] = parsefloat(data[1]) elif data[0] == "_cell_angle_beta": c.angle_deg[1] = parsefloat(data[1]) elif data[0] == "_cell_angle_gamma": c.angle_deg[2] = parsefloat(data[1]) # if the "_atom_site_***" section starts, remember the order if len(data) > 0 \ and len(data[0].split("_")) > 1 \ and data[0].split("_")[1] == "atom": data_order_dic = {} order = 0 while len(data[0].split("_")) > 1 \ and data[0].split("_")[1] == "atom": data_order_dic[data[0]] = order line = file.readline() data = line.split() order += 1 reading_coord = True #entering in 'reading_coordinate MODE' # if less than 'Atom x y z' acan not be a coordinate! if len(data) < 4: reading_coord = False if reading_coord: # looks for "type_symbol" before and, if missing for "label" if "_atom_site_type_symbol" in data_order_dic: c.atom_type.append( data[data_order_dic["_atom_site_type_symbol"]]) elif "_atom_site_label" in data_order_dic: c.atom_type.append(data[data_order_dic["_atom_site_label"]]) else: sys.exit("EXIT: in cif missing type_symbol and label") if "_atom_site_fract_x" in data_order_dic: c.atom_fract.append([ float(data[data_order_dic["_atom_site_fract_x"]]), float(data[data_order_dic["_atom_site_fract_y"]]), float(data[data_order_dic["_atom_site_fract_z"]]), ]) elif "_atom_site_Cartn_x" in data_order_dic: c.atom_xyz.append([ float(data[data_order_dic["_atom_site_Cartn_x"]]), float(data[data_order_dic["_atom_site_Cartn_y"]]), float(data[data_order_dic["_atom_site_Cartn_z"]]), ]) else: sys.exit("EXIT: in cif missing fract_ and Cartn_ coordinates") if "_atom_site_charge" in data_order_dic: c.atom_charge.append( float(data[data_order_dic["_atom_site_charge"]])) return c