def _parse_cntrl(f: TextIO) -> Respin.Cntrl: line_re = re.compile(" (\w+) =\s+([0-9.]+)") kwargs: Dict[str, Union[int, float]] = {} for line in f: if line.rstrip('\n') == " &end": break if line.rstrip('\n') == "": continue line_match = line_re.match(line) if line_match is None: raise InputFormatError( f"Failed parsing cntrl section of respin file:\n{line}") key = line_match.group(1) value = line_match.group(2) kwargs[key] = float(value) if key == "qwt" else int(value) # nmol is not a parameter of Cntrl.__init__ and must be equal to 1. nmol = kwargs.pop("nmol", None) if nmol is not None and nmol != 1: raise InputFormatError("Parsing multiple structures is not supported") return Respin.Cntrl(** kwargs) # type: ignore # (not sure why not recognized)
def parse_gaussian_esp(f: TextIO) -> GaussianEspData: """Parse a file in the Gaussian .esp file format Parameters ---------- f : TextIO File object opened in read mode containing the .esp file to be parsed. The file can be generated with Gaussian by specifying the ``IOp(6/50=1)`` override. Raises ------ InputFormatError Raised when the file does not follow the expected format. Note that this function has only been tested with the output of Gaussian 09. Returns ------- GaussianEspData A dataclass representing the information in the given .esp file. """ charge, multiplicity, atom_count = _parse_prelude([get_line(f) for i in range(3)]) molecule = Molecule([_parse_atom(get_line(f)) for _ in range(atom_count)]) if get_line(f) != " DIPOLE MOMENT:": raise InputFormatError("Expected dipole moment section header.") dipole_moment = _parse_dipole(get_line(f)) if get_line(f) != " TRACELESS QUADRUPOLE MOMENT:": raise InputFormatError("Expected quadrupole moment section header.") quadrupole_moment = _parse_quadrupole([get_line(f), get_line(f)]) points_header_re = re.compile(" ESP VALUES AND GRID POINT COORDINATES. #POINTS =\s+([0-9]+)") points_header_match = points_header_re.match(get_line(f)) if points_header_match is None: raise InputFormatError("Expected ESP points section header.") point_count = int(points_header_match.group(1)) field = _parse_esp_points(f) if len(field.mesh) != point_count: raise InputFormatError( f"The number of ESP points ({len(field.mesh)}) does not agree with that " f"specified in section header ({point_count})." ) return GaussianEspData(charge, multiplicity, molecule, dipole_moment, quadrupole_moment, field)
def parse_resp_charges(f: TextIO) -> List[Charge]: """Parse a file in the ``resp`` charges format Parameters ---------- f : TextIO File object opened in read mode containing charges in the ``resp`` format. Raises ------ InputFormatError Raised when the file does not follow the expected format. Returns ------- typing.List[Charge] List of charges described in the given input file. """ formatter = FR("8F10.6") try: return list( map( Charge, filter(lambda elem: elem is not None, reduce(add, [formatter.read(line) for line in f], [])))) except ValueError as e: raise InputFormatError(e)
def _get_charges_sections( f: TextIO, charges_section_parser: ChargesSectionParser) -> List[List[str]]: """Extract all charges sections which *may* be of the given type Further verification of charge type is necessary based on parsing the section. """ charges_sections: List[List[str]] = [] current_section: Optional[List[str]] = None for line in f: line = line.rstrip('\n') if charges_section_parser.is_section_start(line): if current_section is not None: raise InputFormatError( "Encountered start of new charge section start while " "parsing a charge section. Please submit a bug report " "attaching the input file that failed parsing.") current_section = [] if current_section is not None: current_section.append(line) # Section end lines are less generic, hence we're only checking for # them when inside a section. if charges_section_parser.is_section_end(line): charges_sections.append(current_section) current_section = None return charges_sections
def parse_section(self, section: List[str]) -> EspChargesSectionData: charges_and_stats_re = re.compile( " Charges from ESP fit, RMS=\s+(\d+\.\d+) RRMS=\s+(\d+\.\d+):$") for i, line in enumerate(section): matched_charges_and_stats = charges_and_stats_re.match(line) if matched_charges_and_stats is not None: rms = Esp(matched_charges_and_stats.group(1)) rrms = float(matched_charges_and_stats.group(2)) break charges = [] for line in section[i + 3:]: if self.is_section_end(line): break try: _label, _symbol, charge = line.split() except ValueError: raise InputFormatError( f"Failed to parse the charge on atom from the following line:\n{line}" ) charges.append(Charge(charge)) return EspChargesSectionData(charges, rms, rrms)
def make_value(info: Cube.Info, value: str) -> FieldValue: check_title: Callable[ [str], bool] = lambda title: title.startswith(expected_title_start) if verify_title and not check_title(info.title_line): raise InputFormatError( f'Title of cube file does not start with "{expected_title_start}".' ) return value_ctor(value)
def _parse_dipole(line: str) -> DipoleMoment: dipole_line_re = re.compile(" X=\s+([-+0-9.D]+) Y=\s+([-+0-9.D]+) Z=\s+([-+0-9.D]+) Total=\s+([-+0-9.D]+)") dipole_line_match = dipole_line_re.match(line) if dipole_line_match is None: raise InputFormatError("Failed parsing dipole specification.") return DipoleMoment( DipoleMomentValue(dipole_line_match.group(1).replace('D', 'E')), DipoleMomentValue(dipole_line_match.group(2).replace('D', 'E')), DipoleMomentValue(dipole_line_match.group(3).replace('D', 'E')) )
def _verify_charges_section(charges_section: ChargesSectionData, verify_against: Optional[Molecule[Atom]]) -> None: # TODO: This could be extended to check atom identities if those get parsed if verify_against is None: return elif len(verify_against.atoms) != len(charges_section.charges): raise InputFormatError( "Charges from log file failed verification against given molecule." ) else: return
def _parse_grid_prelude(line: str) -> _GridPrelude: line_split = line.split() if len(line_split) in (4, 5): atom_count, *origin_coords = line_split[:4] nval = line_split[4] if len(line_split) == 5 else "1" else: raise InputFormatError( f"Cube file incorrectly formatted! Expected four or five fields " "(atom count, 3*origin coordinates, [NVal]) on line 3, found " "{len(line_split)} fields.") return _GridPrelude(int(atom_count), Coords(origin_coords), int(nval))
def parse_resp_esp(f: TextIO) -> EspData: """Parse a file in the .esp file format defined by ``resp`` Parameters ---------- f : TextIO File object opened in read mode containing the .esp file to be parsed. Raises ------ InputFormatError Raised when the file does not follow the expected format. Returns ------- EspData A dataclass representing the information in the given .esp file. """ atom_and_point_count = get_line(f).split() if len(atom_and_point_count) != 2: raise InputFormatError( "Expected atom and point counts on the first line of .esp file in the `resp` format" ) atom_count = int(atom_and_point_count[0]) point_count = int(atom_and_point_count[1]) atoms_coords = [Coords(get_line(f).split()) for _ in range(atom_count)] mesh_coords: List[Coords] = [] esp_values: List[Esp] = [] for _ in range(point_count): val, *coords = get_line(f).split() mesh_coords.append(Coords(coords)) esp_values.append(Esp(val)) field = Field( Mesh( mesh_coords ), esp_values ) return EspData( atoms_coords, field )
def _parse_prelude(lines: List[str]) -> Tuple[int, int, int]: assert len(lines) == 3 # Line 1 if lines[0] != " ESP FILE - ATOMIC UNITS": raise InputFormatError("Unexpected first line of .esp line.") # Line 2 charge_and_multiplicity_re = re.compile(" CHARGE =\s+([-0-9.]+) - MULTIPLICITY =\s+([0-9.]+)") charge_and_multiplicity = charge_and_multiplicity_re.match(lines[1]) if charge_and_multiplicity is None: raise InputFormatError("Failed parsing line 2 (charge and multiplicity expected).") charge = int(charge_and_multiplicity.group(1)) multiplicity = int(charge_and_multiplicity.group(2)) # Line 3 atom_count_re = re.compile(" ATOMIC COORDINATES AND ESP CHARGES. #ATOMS =\s+([0-9.]+)") atom_count = atom_count_re.match(lines[2]) if atom_count is None: raise InputFormatError("Failed parsing line 3 (molecule header and atom count).") return charge, multiplicity, int(atom_count.group(1))
def _parse_quadrupole(lines: List[str]) -> QuadrupoleMoment: assert len(lines) == 2 line1_components = ("XX", "YY", "ZZ") line2_components = ("XY", "XZ", "YZ") get_line_re: Callable[[Tuple[str, str, str]], Pattern[str]] = lambda components: re.compile( " {}=\s+([-+0-9.D]+) {}=\s+([-+0-9.D]+) {}=\s+([-+0-9.D]+)".format(*components) ) line1_match = get_line_re(line1_components).match(lines[0]) line2_match = get_line_re(line2_components).match(lines[1]) if line1_match is None or line2_match is None: raise InputFormatError("Failed parsing quadrupole specification.") return QuadrupoleMoment( QuadrupoleMomentValue(line1_match.group(1).replace('D', 'E')), QuadrupoleMomentValue(line1_match.group(2).replace('D', 'E')), QuadrupoleMomentValue(line1_match.group(3).replace('D', 'E')), QuadrupoleMomentValue(line2_match.group(1).replace('D', 'E')), QuadrupoleMomentValue(line2_match.group(2).replace('D', 'E')), QuadrupoleMomentValue(line2_match.group(3).replace('D', 'E')) )
def parse_cube( f: TextIO, make_value: Callable[[Cube.Info, str], FieldValue]) -> Cube[FieldValue]: """Parse a file in the Gaussian "cube" file format You probably mean to use `parse_ed_cube` or `parse_esp_cube` unless your cube file is of neither of those types. Note that the values are expected to be space separated. If your cube file comes from elsewhere than Gaussian, you should ensure that the coordinates are given in bohr. Parameters ---------- f : TextIO File object opened in read mode containing the cube file to be parsed. make_value : Callable[[Cube.Info, str], FieldValue] A function taking two parameters: the cube information and a string representing the field value. The function should parse the field value into the desired internal representation, for example an `Esp` object. The cube information is provided in case verification of the cube file type is required. Example ------- In the simplest case this could be:: lambda _, str_: float(str_) which ignores the cube information (thus performing no verification) and simply parses the string value as a float. Raises ------ InputFormatError Raised when the file does not follow the expected format. Returns ------- Cube[FieldValue] Data from the parsed cube file. """ # Lines 1-2 info = Cube.Info(input_line=get_line(f), title_line=get_line(f)) # Line 3 grid_prelude = _parse_grid_prelude(get_line(f)) if grid_prelude.nval != 1: raise InputFormatError( "Number of values per point (NVal) is different than 1, which isn't currently supported." ) # Lines 4-6 grid = _parse_grid(grid_prelude.origin, [get_line(f) for i in range(3)]) # Molecule molecule = Molecule( [_parse_atom(get_line(f)) for i in range(grid_prelude.atom_count)]) # Field values value_ctor: Callable[[str], FieldValue] = lambda x: make_value(info, x) values = [value_ctor(x) for x in f.read().split()] return Cube( info, molecule, # The implicit assumption here is that the order of points in `grid` # is the same as the order of `values`. This is correct, as the order # of points in a GridMesh is the same as that in a cube file. Field(grid, values))
def parse_respin(f: TextIO) -> Respin: """Parse a file in the "respin" format (input format of ``resp``) Note that only files describing a single structure fit are currently supported. Parameters ---------- f : TextIO File object opened in read mode containing the "respin" file. Raises ------ InputFormatError Raised when the file does not follow the expected format. Returns ------- Respin Object representing the fitting instructions for the ``resp`` program. """ title = get_line(f) for line in f: if line == " &cntrl\n": break cntrl = _parse_cntrl(f) wtmol = get_line(f).strip() if not math.isclose(float(wtmol), 1.0, rel_tol=0, abs_tol=1e-6): raise InputFormatError( f"Encountered value of `wtmol` different from 1.0 ({wtmol}) but " f"parsing is supported only for single-structure respin files.") subtitle = get_line(f) charge_and_iuniq = get_line(f) if len(charge_and_iuniq.split()) != 2: raise InputFormatError( f"Expected two ints for the line specifying charge and iuniq, found:\n{charge_and_iuniq}" ) charge = int(charge_and_iuniq.split()[0]) iuniq = int(charge_and_iuniq.split()[1]) atoms: List[Atom] = [] ivary = Respin.Ivary([]) for line in f: if line.rstrip('\n') == "": break if len(line.split()) != 2: raise InputFormatError( f"Expected two ints for the line specifying atom and ivary, found:\n{line}" ) atoms.append(Atom(int(line.split()[0]))) ivary_value = int(line.split()[1]) # `respgen` uses a value of -99 but internally we use -1 as per resp spec. ivary.values.append(ivary_value if ivary_value != -99 else -1) if len(atoms) != iuniq: raise InputFormatError( f"The value of `iuniq` ({iuniq}) is different from the number of" f"atoms in the described molecule ({len(atoms)}).") return Respin(title, cntrl, subtitle, charge, Molecule(atoms), ivary)