Пример #1
0
    def read_gau_log(self, gau_log):
        """
        Read the last coordinates from a gaussian log file.
        """
        log_resume = ""
        get_lines = False
        with open(gau_log, "r") as gau_log:
            for line in gau_log:
                if line.startswith(" 1\\1\\") is True:
                    get_lines = True
                elif line.endswith("\\@\n") is True:
                    get_lines = False
                else:
                    pass

                if get_lines is True:
                    line = line.lstrip()
                    line = line.rstrip()
                    # create one huge string since lines could be oddly wrapped
                    log_resume += line

        # split string by its entries
        log_resume = log_resume.split("\\")
        del line
        del get_lines

        cframe = []
        coordinates_cntr = 0

        for entry in log_resume:
            if entry == '':
                coordinates_cntr += 1

            if coordinates_cntr == 3:
                entry = entry.split(",")
                if len(entry) == 4:
                    csitnam = entry[0]
                    ccoords = np.array([float(i) for i in entry[1:4]])

                    # store info
                    cur_atom = mds.Atom(sitnam=csitnam)
                    self.atoms.append(cur_atom)
                    cframe.append(ccoords)

        self.ts_coords.append(cframe)
Пример #2
0
    def read_xyz(self, xyzfile, overwrite_data=False):
        """
        Read the contents of a xyz-file.
        XYZ-Format: https://en.wikipedia.org/wiki/XYZ_file_format

        A boxtype may be given in the comment line. The line has to look like the following
        Boxtype lattice a 52.213276 b 52.213276 c 52.213276 alpha 90.000000 beta 90.000000 gamma 90.000000
        """
        with open(xyzfile, "r") as xyz_in:
            for line in xyz_in:

                if line == "\n" or line == "":
                    #print("Reached EOF.")
                    break

                num_atms = int(
                    line.split()[0])  # line with number of atoms (mandatory)
                comment_line = next(xyz_in)  # comment line (may be empty)

                # read box information if there is any
                if "Boxtype" in comment_line:
                    comment_line = comment_line.split()
                    box = [
                        None if i == "None" else
                        None if round(float(i), 8) == 0 else float(i)
                        for i in comment_line[3::2]
                    ]

                    if comment_line[1] == "cartesian":
                        current_box = mdb.Box(
                            boxtype=comment_line[1],
                            crt_a=box[0],
                            crt_b=box[1],
                            crt_c=box[2],
                        )
                    elif comment_line[1] == "lattice":
                        current_box = mdb.Box(
                            boxtype=comment_line[1],
                            ltc_a=box[0],
                            ltc_b=box[1],
                            ltc_c=box[2],
                            ltc_alpha=np.radians(box[3]),
                            ltc_beta=np.radians(box[4]),
                            ltc_gamma=np.radians(box[5]),
                        )
                    else:
                        current_box = mdb.Box(
                            boxtype=comment_line[1],
                            lmp_xlo=box[0],
                            lmp_xhi=box[1],
                            lmp_ylo=box[2],
                            lmp_yhi=box[3],
                            lmp_zlo=box[4],
                            lmp_zhi=box[5],
                            lmp_xy=box[6],
                            lmp_xz=box[7],
                            lmp_yz=box[8],
                        )

                    del (box, comment_line)

                    # overwrite given boxes
                    if overwrite_data is True:
                        self.ts_boxes = []

                    self.ts_boxes.append(current_box)
                else:
                    pass

                cframe = []

                # parse coordinates section
                for iid in range(num_atms):
                    catm = next(xyz_in).split()
                    csitnam = catm[0]
                    ccoords = np.array([float(i) for i in catm[1:5]])

                    # charge of current atom
                    ccharge = None
                    if len(catm) > 4:
                        ccharge = float(catm[4])

                    # check if an instance of Atom with index iid already exists;
                    # overwrite info if it does or create a new one if it does not
                    try:
                        self.atoms[iid]

                        # overwrite data
                        if overwrite_data is True:
                            self.atoms[iid].atm_id = iid
                            self.atoms[iid].sitnam = csitnam

                            if ccharge is not None:
                                self.atoms[iid].chge = ccharge

                        # complement data
                        else:

                            if not hasattr(self.atoms[iid], "atm_id"):
                                self.atoms[iid].atm_id = iid

                            if not hasattr(self.atoms[iid], "sitnam"):
                                self.atoms[iid].sitnam = csitnam

                            if (not hasattr(self.atoms[iid], "chge")
                                    and ccharge is not None):
                                self.atoms[iid].chge = ccharge

                    except IndexError:
                        catm = mds.Atom(atm_id=iid, sitnam=csitnam)
                        if ccharge is not None:
                            catm.chge = ccharge
                        self.atoms.append(catm)

                    cframe.append(ccoords)

                # append current frame
                self.ts_coords.append(cframe)
Пример #3
0
    def read_pwin(self, pwin):
        """
        Read an input file for pw.x.

        Cell vector alat (celldm(1)) is converted to angstrom when read.
        """
        # container to supply atoms from "ATOMIC_POSITIONS" with info from "ATOMIC_SPECIES"
        # which may be compared to lammps "Masses" and "Atoms" input
        #TODO write frozen atoms
        atm_types_ptrs = {}

        with open(pwin) as opened_pwin:
            line = opened_pwin.readline()
            while line != '':

                if line.startswith("&CONTROL"):
                    while line != '':
                        line = opened_pwin.readline()

                        # skip empty lines, end if end of block ('/') is reached
                        if line.startswith("\n"):
                            continue
                        elif line.startswith("/"):
                            break
                        else:
                            pass

                        # split line by '=' and ' '
                        split_line = re.split(r'\s*=\s*', line)
                        split_line[1] = split_line[1].strip()

                        # convert str float/int if possible
                        if re.match(r"^\d+?\.\d+?$", split_line[1]):
                            split_line[1] = float(split_line[1])
                        else:
                            try:
                                split_line[1] = int(split_line[1])
                            except ValueError:
                                pass

                        self.pw_entries["CONTROL"][
                            split_line[0].strip()] = split_line[1]
                elif line.startswith("&SYSTEM"):
                    while line != '':
                        line = opened_pwin.readline()

                        if line.startswith("\n"):
                            continue
                        elif line.startswith("/"):
                            break
                        else:
                            pass

                        split_line = re.split(r'\s*=\s*', line)
                        split_line[1] = split_line[1].strip()

                        # convert str float/int if possible
                        if re.match(r"^\d+?\.\d+?$", split_line[1]):
                            split_line[1] = float(split_line[1])
                        else:
                            try:
                                split_line[1] = int(split_line[1])
                            except ValueError:
                                pass

                        self.pw_entries["SYSTEM"][
                            split_line[0].strip()] = split_line[1]
                elif line.startswith("&ELECTRONS"):
                    while line != '':
                        line = opened_pwin.readline()

                        if line.startswith("\n"):
                            continue
                        elif line.startswith("/"):
                            break
                        else:
                            pass

                        split_line = re.split(r'\s*=\s*', line)
                        split_line[1] = split_line[1].strip()

                        # convert str float/int if possible
                        if re.match(r"^\d+?\.\d+?$", split_line[1]):
                            split_line[1] = float(split_line[1])
                        else:
                            try:
                                split_line[1] = int(split_line[1])
                            except ValueError:
                                pass

                        self.pw_entries["ELECTRONS"][
                            split_line[0].strip()] = split_line[1]
                elif line.startswith("&IONS"):
                    while line != '':
                        line = opened_pwin.readline()

                        if line.startswith("\n"):
                            continue
                        elif line.startswith("/"):
                            break
                        else:
                            pass

                        split_line = re.split(r'\s*=\s*', line)
                        split_line[1] = split_line[1].strip()

                        # convert str float/int if possible
                        if re.match(r"^\d+?\.\d+?$", split_line[1]):
                            split_line[1] = float(split_line[1])
                        else:
                            try:
                                split_line[1] = int(split_line[1])
                            except ValueError:
                                pass

                        self.pw_entries["IONS"][
                            split_line[0].strip()] = split_line[1]
                elif line.startswith("&CELL"):
                    while line != '':
                        line = opened_pwin.readline()

                        if line.startswith("\n"):
                            continue
                        elif line.startswith("/"):
                            break
                        else:
                            pass

                        split_line = re.split(r'\s*=\s*', line)
                        split_line[1] = split_line[1].strip()

                        # convert str float/int if possible
                        if re.match(r"^\d+?\.\d+?$", split_line[1]):
                            split_line[1] = float(split_line[1])
                        else:
                            try:
                                split_line[1] = int(split_line[1])
                            except ValueError:
                                pass

                        self.pw_entries["CELL"][
                            split_line[0].strip()] = split_line[1]
                elif line.startswith("ATOMIC_SPECIES"):
                    # GET ATOM TYPES
                    atmcnt = 0

                    while line != '':
                        line = opened_pwin.readline()

                        # end of block
                        if line == "\n":
                            break

                        split_line = line.split()
                        # add current atom type to atom types
                        self.atm_types[atmcnt] = mds.Atom(
                            sitnam=split_line[0],
                            weigh=float(split_line[1]),
                            pseudopotential=split_line[2])
                        # "C": 0, "H": 1, and so on
                        atm_types_ptrs[split_line[0]] = atmcnt
                        atmcnt += 1
                elif line.startswith("ATOMIC_POSITIONS"):
                    # GET ATOM COORDINATES
                    self.ts_coords.append([])

                    while line != '':
                        line = opened_pwin.readline()

                        # end of block
                        if line == "\n":
                            break

                        split_line = line.split()
                        # omit further information if dictionary is empty (should not happen)
                        cur_atm_sitnam = split_line[0]

                        if atm_types_ptrs == {}:
                            self.atoms.append(mds.Atom(sitnam=cur_atm_sitnam))
                        else:
                            cur_atm_key = atm_types_ptrs[cur_atm_sitnam]
                            #cur_atm_key = self.atm_types[cur_atm_key_idx]
                            catom = mds.Atom(sitnam=cur_atm_sitnam,
                                             atm_key=cur_atm_key)

                            # read frozen info if given
                            if len(split_line) == 7:
                                catom.ifrz_x = int(split_line[-3])
                                catom.ifrz_y = int(split_line[-2])
                                catom.ifrz_z = int(split_line[-1])

                            self.atoms.append(catom)

                        # add coordinates from current atom to the current frame
                        self.ts_coords[-1].append(
                            np.array([float(i) for i in split_line[1:4]]))

                elif line.startswith("K_POINTS"):
                    kpoints_line = line.split()
                    self.pw_entries["K_POINTS"]["option"] = kpoints_line[1]

                    # read upcoming lines
                    while line != '':
                        line = opened_pwin.readline()

                        if line.startswith("\n"):
                            break

                        split_line = line.split()

                        if kpoints_line[1].strip("{}()") == "automatic":
                            self.pw_entries["K_POINTS"]["k_point_grid"] = [
                                int(i) for i in split_line
                            ]
                            break
                        elif kpoints_line[1] == "gamma":
                            self.pw_entries["K_POINTS"]["k_point_grid"] = []
                            break
                        else:
                            pass

                elif line.startswith("CELL_PARAMETERS"):
                    #TODO: calculate from Bohr to Angstrom in place?
                    box_unit = line.split()[1].strip("{}")

                    # get box vectors
                    cbox = mdb.Box(boxtype="cartesian", unit=box_unit)

                    for line_cntr in range(3):
                        line = [
                            float(i) for i in opened_pwin.readline().split()
                        ]

                        # allot each vector to the box
                        if line_cntr == 0:
                            cbox.crt_a = line  # vector a
                        elif line_cntr == 1:
                            cbox.crt_b = line  # vector b
                        else:
                            cbox.crt_c = line  # vector c

                    self.ts_boxes.append(cbox)
                else:
                    pass

                line = opened_pwin.readline()

        # convert cell to lattice cell and append it to the current cells
        if ("A" in self.pw_entries["SYSTEM"]
                and "B" in self.pw_entries["SYSTEM"]
                and "C" in self.pw_entries["SYSTEM"]
                and "cosAB" in self.pw_entries["SYSTEM"]
                and "cosAC" in self.pw_entries["SYSTEM"]
                and "cosBC" in self.pw_entries["SYSTEM"]):
            #
            cbox = mdb.Box(
                ltc_a=float(self.pw_entries["SYSTEM"]["A"]),
                ltc_b=float(self.pw_entries["SYSTEM"]["B"]),
                ltc_c=float(self.pw_entries["SYSTEM"]["C"]),
                ltc_alpha=math.acos(float(self.pw_entries["SYSTEM"]["cosBC"])),
                ltc_beta=math.acos(float(self.pw_entries["SYSTEM"]["cosAC"])),
                ltc_gamma=math.acos(float(self.pw_entries["SYSTEM"]["cosAB"])),
                boxtype="lattice",
                unit="angstrom")

            # delete surplus box entries
            del self.pw_entries["SYSTEM"]["A"]
            del self.pw_entries["SYSTEM"]["B"]
            del self.pw_entries["SYSTEM"]["C"]
            del self.pw_entries["SYSTEM"]["cosBC"]
            del self.pw_entries["SYSTEM"]["cosAC"]
            del self.pw_entries["SYSTEM"]["cosAB"]

            # add celldm(1) and convert it to bohr
            #self.pw_entries["SYSTEM"]["celldm(1)"] = cbox.ltc_a * ANGSTROM_BOHR
            self.pw_entries["SYSTEM"]["ibrav"] = 0

            # convert lattice box to cartesian
            cbox.box_lat2cart()
            self.ts_boxes.append(cbox)

        # convert celldm (= alat) to box vector a with angstrom
        #try:
        #self.ts_boxes[-1].ltc_a = float(self.pw_entries["SYSTEM"]["celldm(1)"]*BOHR_ANGSTROM)
        #except KeyError:
        #pass

        # final check
        if len(self.atoms) != self.pw_entries["SYSTEM"]["nat"]:
            print("***Warning: Number of atoms and SYSTEM entry 'nat' differ!")
            #time.sleep(5)

        if os.path.isdir(
                self.pw_entries["CONTROL"]["pseudo_dir"].strip("'")) is False:
            print("***Warning: Folder for Pseudopotentials does not exist!")
Пример #4
0
    def read_pwout(self,
                   pwout,
                   read_crystal_sections=False,
                   save_all_scf_steps=True):
        """
        CAVEAT: UNDER CONSTRUCTION! Read the output of pw.x.

        Currently this only reads the coordinates and cell vectors.
        Cell vector alat (celldm(1)) is converted to angstrom when read.
        #TODO READ FROZEN ATOMS
        """
        #print(pwout)
        with open(pwout) as opened_pwout:
            line = opened_pwout.readline()
            while line != '':
                if line.startswith("CELL_PARAMETERS"):
                    # get alat
                    split_line = line.split()

                    # get box vectors
                    cbox = mdb.Box(boxtype="cartesian")

                    if "alat" in line:
                        #self.pw_entries["SYSTEM"]["celldm(1)"] = float(split_line[2].strip(")"))
                        cbox.unit = "alat"
                    elif "bohr" in line:
                        cbox.unit = "bohr"
                    elif "angstrom" in line:
                        cbox.unit = "angstrom"
                    else:
                        raise Warning(
                            "Keyword for box unknown and not implemented.")

                    cbox.crt_a = [
                        float(i) for i in opened_pwout.readline().split()
                    ]
                    cbox.crt_b = [
                        float(i) for i in opened_pwout.readline().split()
                    ]
                    cbox.crt_c = [
                        float(i) for i in opened_pwout.readline().split()
                    ]

                    if cbox.unit == "alat":
                        cbox.alat2angstrom(float(split_line[2].strip(")")))

                    self.ts_boxes.append(cbox)

                elif line.startswith("     atomic species"):
                    line = opened_pwout.readline()
                    atm_types_ptrs = {}
                    atm_type_cntr = 0

                    while line != '\n':
                        #print(repr(line))
                        atm_types_ptrs[line.split()[0]] = atm_type_cntr
                        atm_type_cntr += 1
                        line = opened_pwout.readline()

                elif line.startswith("ATOMIC_POSITIONS"):
                    #TODO: need a smarter way to do this!
                    # overwrite existing atoms
                    self.atoms = []
                    # prepare container for coordinates to come
                    self.ts_coords.append([])
                    atm_cntr = 0

                    # read the coordinates
                    while line != '':
                        line = opened_pwout.readline()

                        # stop reading when EOF is reached
                        if line.startswith("\n") or line.startswith(
                                "End final coordinates"):
                            break

                        split_line = line.split()

                        cur_atm = mds.Atom(
                            sitnam=split_line[0],
                            atm_id=atm_cntr,
                            atm_key=atm_types_ptrs[split_line[0]])
                        self.atoms.append(cur_atm)
                        cur_atm_coords = np.array(
                            [float(i) for i in split_line[1:]])
                        self.ts_coords[-1].append(cur_atm_coords)
                        atm_cntr += 1

                elif line.startswith("!    total energy"):
                    split_line = line.split()
                    energy = float(split_line[-2]) * RYDBERG_EV
                    self.pw_other_info["ENERGIES"].append(energy)
                elif line.startswith("     density ="):
                    split_line = line.split()
                    density = float(split_line[-2])
                    self.pw_other_info["DENSITIES"].append(density)
                elif line.startswith("     new unit-cell volume"):
                    split_line = line.split()
                    volume = float(split_line[-3])
                    self.pw_other_info["VOLUMES"].append(volume)
                else:
                    pass

                # get alat value
                if read_crystal_sections is True:

                    if line.startswith("     lattice parameter (alat)"):
                        # not sure why alat was converted here before, seems to be wrong
                        # when the whole box is converted later anyway
                        alat = float(line.split()[-2])  #* BOHR_ANGSTROM
                        #print(alat)

                    # get box with box vectors
                    elif line.startswith(
                            "     crystal axes: (cart. coord. in units of alat)"
                    ):
                        # get box vectors
                        cbox = mdb.Box(boxtype="cartesian")
                        cbox.unit = "alat"

                        cbox.crt_a = [
                            float(i)
                            for i in opened_pwout.readline().split()[3:6]
                        ]
                        cbox.crt_b = [
                            float(i)
                            for i in opened_pwout.readline().split()[3:6]
                        ]
                        cbox.crt_c = [
                            float(i)
                            for i in opened_pwout.readline().split()[3:6]
                        ]
                        cbox.alat2angstrom(alat)
                        self.ts_boxes.append(cbox)

                    elif line.startswith("   Cartesian axes"):
                        # overwrite existing atoms
                        self.atoms = []
                        # prepare container for coordinates to come
                        self.ts_coords.append([])
                        atm_cntr = 0

                        # skip next two lines
                        opened_pwout.readline()
                        opened_pwout.readline()

                        # read the coordinates
                        while line != '':
                            line = opened_pwout.readline()

                            # stop reading when end of current entry is reached
                            if line.startswith("\n"):
                                break

                            split_line = line.split()

                            cur_atm = mds.Atom(
                                sitnam=split_line[1],
                                atm_id=atm_cntr,
                                atm_key=atm_types_ptrs[split_line[1]])
                            self.atoms.append(cur_atm)
                            cur_atm_coords = np.array(
                                [float(i) * alat for i in split_line[6:9]])
                            self.ts_coords[-1].append(cur_atm_coords)
                            atm_cntr += 1

                line = opened_pwout.readline()
                split_line = None

        # save only the last frame
        if save_all_scf_steps is False:
            self.ts_coords = [self.ts_coords[-1]]

            for key in self.pw_other_info:

                try:
                    self.pw_other_info[key] = [self.pw_other_info[key][-1]]
                except IndexError:
                    pass
Пример #5
0
    def read_gau(self, gauin, overwrite=False):
        """
        """
        print("***Gau-Info: Reading  Gaussian-Input-File!")

        (chk, nproc, mem, job_type, method, basis_set, geom, charge,
         multiplicity) = [None for _ in range(9)]
        cframe = []

        with open(gauin, "r") as gau_in:
            for line in gau_in:
                if "%chk" in line:
                    if not hasattr(self, "chk"):
                        self.chk = line.split("=")[1].strip("\n")
                elif "%nproc" in line:
                    if not hasattr(self, "nproc"):
                        self.nproc = int(line.split("=")[1].strip("\n"))
                elif "%mem" in line:
                    if not hasattr(self, "mem"):
                        mem = line.split("=")[1].strip("\n")
                        self.mem = int(re.findall(r'^\d+', mem)[0])
                elif line.startswith("#"):
                    # job settings direction
                    job_settings = line.split()

                    # classify job settings
                    for setting in job_settings:
                        # remove # e.g. #SP
                        if setting.startswith("#"):
                            setting = setting.strip("#")

                        if setting in self.job_types:
                            if not hasattr(self, "job_type"):
                                self.job_type = setting
                        elif "geom" in setting:
                            if not hasattr(self, "geom"):
                                self.geom = setting.split("=")[1]
                        elif "EmpiricalDispersion" in setting:
                            if not hasattr(self, "dispersion"):
                                self.dispersion = setting.split("=")[1]
                        elif "/" in setting:
                            setting = setting.split("/")
                            if not hasattr(self, "method"):
                                self.method = setting[0]
                            if not hasattr(self, "basis_set"):
                                self.basis_set = setting[1]
                        else:
                            pass

                elif re.findall(r'^-?\d+ \d$', line):
                    line = line.split()

                    if not hasattr(self, "charge"):
                        self.charge = int(line[0])

                    if not hasattr(self, "multiplicity"):
                        self.multiplicity = int(line[1])

                    read_coords_section = True

                    while read_coords_section is True:

                        # read info
                        try:
                            line = next(gau_in)
                        except StopIteration:
                            # last line of document reached
                            break

                        # quit reading section when empty line occurs
                        if line == "\n":
                            read_coords_section = False
                            break

                        line = line.split()
                        csitnam = line[0]
                        ccoords = np.array([float(i) for i in line[1:]])

                        # store info
                        cur_atom = mds.Atom(sitnam=csitnam)
                        self.atoms.append(cur_atom)
                        cframe.append(ccoords)

                    self.ts_coords.append(cframe)

                elif hasattr(self, "geom") and self.geom == "connectivity":
                    # section with bonds-information
                    if bool(re.search(r'(^\d+ \d+ \d+.\d+)|(^\d+\n$)',
                                      line)) is True:
                        line = line.split()

                        # get rest of the bond-entries
                        read_connectivity_section = True

                        while read_connectivity_section is True:

                            # stop reading current entry if line is empty
                            if line == []:
                                break

                            # only process line if it really has bond information
                            if len(line) > 1:
                                # get atm-id 1 (always the same for current line)
                                catm_id1 = int(line[0])

                                for idx, cur_subentry in enumerate(line[1:]):

                                    # get atm-id 2
                                    if idx % 2 == 0:
                                        catm_id2 = int(cur_subentry)
                                        # decrease indices by 1 since internally
                                        # atom-indices start with 0
                                        cbnd = mds.Bond(atm_id1=catm_id1 - 1,
                                                        atm_id2=catm_id2 - 1)
                                    else:
                                        cbnd_order = float(cur_subentry)
                                        cbnd.bnd_order = cbnd_order
                                        self.bonds.append(cbnd)

                            # if there is only one empty line at file bottom
                            try:
                                line = gau_in.next().split()
                            except StopIteration:
                                break

                else:
                    pass
Пример #6
0
    def read_gau(self,
                 gauin,
                 coordinate_style="cartesian",
                 overwrite=False,
                 debug=False):
        """
        """
        self.coordinate_style = coordinate_style

        print("***Gau-Info: Reading  Gaussian-Input-File!")
        cframe = []
        reading = True

        with open(gauin, "r") as gau_in:
            line = gau_in.readline()

            if debug is True:
                print("Parsing link 0 and route section")

            # line != "" -> just to be safe this will eventually finish when
            # loading a wrong file by accident
            while line != "":

                # // LINK 0 SECTION
                if "%oldchk" in line:
                    if not hasattr(self, "oldchk") or overwrite is True:
                        self.oldchk = line.split("=")[1].strip("\n")

                elif "%chk" in line:
                    if not hasattr(self, "chk") or overwrite is True:
                        self.chk = line.split("=")[1].strip("\n")

                elif "%nproc" in line:
                    if not hasattr(self, "nproc") or overwrite is True:
                        self.nproc = int(line.split("=")[1].strip("\n"))

                elif "%mem" in line:
                    if not hasattr(self, "mem") or overwrite is True:
                        self.mem = line.split("=")[1].strip("\n")

                # TBD
                elif "%rwf" in line:
                    if not hasattr(self, "rwf") or overwrite is True:
                        self.rwf = line.split("=")[1].strip("\n")

                #// ROUTE SECTION (may be scattered over several files)
                elif line.startswith("#"):
                    #pdb.set_trace()

                    # keep reading until empty line is reached, end loop
                    # when it is
                    while line != "\n":
                        if self.job_settings == "" or overwrite is True:
                            self.job_settings += line.rstrip("\n") + " "
                        line = gau_in.readline()

                        # eof reached
                        if line == "":
                            break

                    else:
                        break

                else:
                    pass

                line = gau_in.readline()

            #// TITLE SECTION
            if debug is True:
                print("Title section")

            # skip all empty lines until title line is reached
            while line == "\n":
                line = gau_in.readline()
            else:
                title_line = line
                line = gau_in.readline()

            #// MOLECULE SPECIFICATION SECTION
            if debug is True:
                print("Molecule specification section")

            while line == "\n":
                line = gau_in.readline()
            else:
                charge_mutliplicity_line = re.findall(r'[\w]+', line)

                if self.gaussian_charges == [] or overwrite is True:
                    self.gaussian_charges = [
                        int(i)
                        for idx, i in enumerate(charge_mutliplicity_line)
                        if idx % 2 == 0
                    ]

                if self.gaussian_multiplicities == [] or overwrite is True:
                    self.gaussian_multiplicities = [
                        int(i)
                        for idx, i in enumerate(charge_mutliplicity_line[1:])
                        if idx % 2 == 0
                    ]

                line = gau_in.readline()

            #// COORDINATES SECTION
            if debug is True:
                print("Coordinates section")

            g_atoms = []
            g_frame = []  # gaussian coordinates
            # counter to substitute given atoms or complement their attributes
            atom_index = 0

            while line != "\n":
                line = line.split()
                columns_coordinates_section_atomic = len(line)

                # element parameters section
                if "Fragment=" in line[0]:
                    fragment_id = re.search(r"Fragment=\d+", line[0]).group(0)
                    fragment_id = int(re.search(r"\d+", fragment_id).group(0))
                    csitnam = line[0].split("(")[0]
                else:
                    fragment_id = None
                    csitnam = line[0]

                # translate atomic number to its element
                if csitnam.isdigit():
                    csitnam = int(csitnam)
                    csitnam = mde.atomicnumber_element[csitnam]

                if "Iso=" in line[0]:
                    print("Iso not implemented yet")

                if "Spin=" in line[0]:
                    print("Iso not implemented yet")

                # element freeze section
                if len(line) > 4 and (line[-4] == "0" or line[-4] == "-1"):
                    cifrz = int(line[-4])
                else:
                    cifrz = None

                # store element info
                cur_atom = mds.Atom(sitnam=csitnam,
                                    atm_id=atom_index,
                                    ifrz=cifrz,
                                    grp_id=fragment_id)

                #g_atoms.append(cur_atom)

                try:
                    if overwrite is True:
                        self.atoms[atom_index] = cur_atom
                    else:
                        if not hasattr(self.atoms[atom_index], "sitnam"):
                            self.atoms[atom_index].sitnam = cur_atom.sitnam

                        # add ifrz to current atom if overwrite option is set or ifrz attribute has not been defined yet
                        if hasattr(cur_atom, "ifrz") and not hasattr(
                                self.atoms[atom_index], "ifrz"):
                            self.atoms[atom_index].ifrz = cur_atom.ifrz

                        if hasattr(cur_atom, "grp_id") and not hasattr(
                                self.atoms[atom_index], "grp_id"):
                            self.atoms[atom_index].grp_id = cur_atom.grp_id

                except IndexError:
                    # overwrite the whole entry if one index does not fit
                    self.atoms.append(cur_atom)

                # add coordinates
                if self.coordinate_style == "cartesian":
                    ccoords = np.array([float(i) for i in line[-3:]])
                    g_frame.append(ccoords)
                else:
                    print("Z-Matrix not implemented yet.")

                line = gau_in.readline()
                atom_index += 1

                if line == "":
                    break

            # overwrite or append frame (coordinates) to existing one(s)
            g_frame = np.array(g_frame)

            if self.ts_coords != [] and overwrite is True:
                self.ts_coords[-1] = g_frame
            else:
                self.ts_coords.append(g_frame)

            # skip all empty lines until title line is reached
            while line == "\n":
                line = gau_in.readline()  # first bond line

                # just to be sure this will end
                if line == "":
                    break

            # BOND SECTION
            if debug is True:
                print("Bonds Section")

            if "CONNECTIVITY" in self.job_settings.upper() and bool(
                    re.search(r'(^\d+ \d+ \d+.\d+)|(^\d+\n$)', line)) is True:
                gau_bonds = []

                while line != "\n":
                    line = line.split()

                    # only process line if it really has bond information
                    if len(line) > 1:
                        # get atm-id 1 (always the same for current line)
                        catm_id1 = int(line[0])
                        catm_id1 -= 1  # decrement atom index by 1
                        # read other further bond partners (if present)
                        catms_id2 = [
                            int(i) - 1 for idx, i in enumerate(line[1:])
                            if idx % 2 == 0
                        ]
                        # read bond orders (if present)
                        cbnd_orders = [
                            float(i) for idx, i in enumerate(line[2:])
                            if idx % 2 == 0
                        ]

                        # append bond to gaussian given bonds
                        for catm_id2, cbnd_order in zip(
                                catms_id2, cbnd_orders):
                            cbnd = mds.Bond(atm_id1=catm_id1,
                                            atm_id2=catm_id2,
                                            bnd_order=cbnd_order)
                            gau_bonds.append(cbnd)

                    # if there is only one empty line at file bottom
                    line = gau_in.readline()

                    if line == "":
                        break

                # complement attributes that do not exist or overwrite existing ones if wanted
                if len(gau_bonds) == len(self.bonds) and overwrite is False:
                    for idx, gau_bnd in enumerate(gau_bonds):
                        for universe_bnd in self.bonds:
                            if (universe_bnd.atm_id1 == gau_bnd.atm_id1 and
                                    universe_bnd.atm_id2 == gau_bnd.atm_id2):
                                # complement attribute or overwrite existing one
                                # if overwriting is active
                                if not hasattr(universe_bnd, "bnd_order"):
                                    universe_bnd.bnd_order = gau_bnd.bnd_order
                                break
                else:
                    self.bonds = gau_bonds

                # create molecules by bond information
                self.fetch_molecules_by_bonds()
Пример #7
0
    def read_gau_log(self,
                     gau_log,
                     save_all_scf_steps=False,
                     overwrite=False,
                     read_summary=False):
        """
        Read the last coordinates from a gaussian log file.

        Overwrite overwrites the last frame.
        """
        #TODO read atom by initial coordinates and not by scf cycles

        if overwrite is True:
            self.ts_coords = []

        #print("Reading last frame of the output file.")
        if "scf_energies" not in self.gaussian_other_info or overwrite is True:
            self.gaussian_other_info["scf_energies"] = []
            self.gaussian_other_info["Counterpoise corrected energy"] = None

        all_scf_cycles_coords = []
        current_scf_cycles_coords = []
        all_scf_cycles_energies = []
        scf_cycles_energies = []
        log_resume = ""
        g_atoms = []
        atom_index = 0
        read_element_numbers = True
        #scanned_coordinates = []

        # read geometries from all scf cycles and their corresponding energies
        with open(gau_log, "r") as opened_gau_log:
            line = opened_gau_log.readline()

            # read all scf cycles and the corresponding energy
            while line != "":

                if "Standard orientation" in line:
                    cframe = []
                    # skip the following 4 lines
                    for _ in range(5):
                        line = opened_gau_log.readline()

                    while not line.startswith(" ----------------------------"):
                        split_line = line.split()
                        coords = [float(i) for i in split_line[3:]]
                        coords = np.array(coords)
                        cframe.append(coords)

                        # get element number, i.e. element name
                        if read_element_numbers is True:
                            element_number = int(split_line[1])
                            element_name = mde.element_name[element_number]
                            cur_atom = mds.Atom(sitnam=element_name,
                                                atm_id=atom_index)
                            g_atoms.append(cur_atom)
                            atom_index += 1

                        line = opened_gau_log.readline()

                    # append current frame to current scf_cycle
                    current_scf_cycles_coords.append(cframe)
                    # stop reading the element numbers after first entry
                    read_element_numbers = False

                elif "SCF Done:" in line:
                    scf_energy = hartree_eV * float(line.split()[4])
                    scf_cycles_energies.append(scf_energy)

                elif "!   Optimized Parameters   !" in line:
                    # current entry is finished
                    all_scf_cycles_coords.append(current_scf_cycles_coords)
                    all_scf_cycles_energies.append(scf_cycles_energies)
                    # reset current optimized scf cycles
                    current_scf_cycles_coords = []
                    # reset energies for next run to read
                    scf_cycles_energies = []
                    #self.gaussian_other_info("optimized_parameters")

                    # skip the next 5 lines
                    for _ in range(6):
                        line = opened_gau_log.readline()

                    while not line.startswith(" ----------------------------"):
                        split_line = line.split()

                        parameter_definition = split_line[2]
                        parameter_value = float(split_line[3])
                        self.gaussian_other_info[parameter_definition].append(
                            parameter_value)

                        #for scanned_coordinate in scanned_coordinates:
                        #    if split_line[2] == scanned_coordinate:
                        #        self.gaussian_other_info[scanned_coordinate].append(float(split_line[3]))

                        line = opened_gau_log.readline()

                # get scanned coordinates (if mod redundant is used)
                elif "Initial Parameters" in line:
                    # skip the next 4 lines
                    for _ in range(5):
                        line = opened_gau_log.readline()

                    while not line.startswith(" ----------------------------"):
                        split_line = line.split()

                        # prepare containers for current parameter definition
                        parameter_definition = split_line[2]
                        self.gaussian_other_info[parameter_definition] = []

                        #if "Scan" in line:
                        #    scanned_coordinates.append(split_line[2])

                        line = opened_gau_log.readline()

                    #for scanned_coordinate in scanned_coordinates:
                    #    self.gaussian_other_info[scanned_coordinate] = []

                elif "Counterpoise corrected energy" in line:
                    self.gaussian_other_info[
                        "Counterpoise corrected energy"] = float(
                            line.split()[-1]) * hartree_eV

                # read the summary
                elif line.startswith(" 1\\1\\") is True:

                    # read every line except it ends with \\@
                    while not line.endswith("@\n"):
                        #print(line)
                        line = line.lstrip()
                        line = line.rstrip()
                        # create one huge string since lines could be oddly wrapped
                        log_resume += line
                        line = opened_gau_log.readline()
                    else:
                        # add the next line (that ends with \\@) as well
                        #print(line)
                        line = line.lstrip()
                        line = line.rstrip()
                        log_resume += line

                else:
                    pass

                line = opened_gau_log.readline()
                #print(log_resume)

        #return (all_scf_cycles_coords, all_scf_cycles_energies)
        # extract geometries with lowest energy, omit other scf geometries
        # and energies
        for scf_cycle_coords, scf_cycle_energy in zip(all_scf_cycles_coords,
                                                      all_scf_cycles_energies):
            cur_cycle_min_scf_energy = 1e12
            cur_cycle_min_scf_energy_idx = None

            # get index of frame with lowest energy
            for index_energy, cur_energy in enumerate(scf_cycle_energy):
                if cur_energy < cur_cycle_min_scf_energy:
                    cur_cycle_min_scf_energy = cur_energy
                    cur_cycle_min_scf_energy_idx = index_energy

            try:
                self.ts_coords.append(
                    scf_cycle_coords[cur_cycle_min_scf_energy_idx])
                self.gaussian_other_info["scf_energies"].append(
                    scf_cycle_energy[cur_cycle_min_scf_energy_idx])
            except IndexError:
                print(
                    "***Warning: Gaussian run was aborted before it finished")

        if read_summary is True:
            if log_resume != "":
                self._read_gau_log_summary(log_resume, overwrite=overwrite)
            else:
                print("{} has no result summary".format(gau_log))

        # # split string by its entries
        # log_resume = log_resume.split("\\")
        # del line
        # del get_lines

        # g_atoms = []
        # g_frame = []
        # # switch which defines that coordinates section is reached
        # coordinates_cntr = 0
        # # atom indx
        # cidx = 0

        # for entry in log_resume:
        #    if entry == "":
        #        coordinates_cntr += 1

        #    if coordinates_cntr == 3:
        #        entry = entry.split(",")

        #        if len(entry) == 4 or len(entry) == 5:
        #            if len(entry) == 4:
        #                ccoords = np.array([float(i) for i in entry[1:]])
        #            else:
        #                ccoords = np.array([float(i) for i in entry[2:]])

        #            csitnam = entry[0]

        #            # store info
        #            cur_atom = mds.Atom(sitnam=csitnam, atm_id=cidx)

        #            g_atoms.append(cur_atom)
        #            g_frame.append(ccoords)

        #            # increment atom index
        #            cidx += 1

        #    if "NImag" in entry:
        #        # get number of imaginary frequencies
        #        self.gaussian_other_info["NImag"] = int(entry.split("NImag=")[1])

        #    if "Version" in entry:
        #        other_entries = entry.split("\\")
        #        for other_entry in other_entries:
        #            if "HF" in other_entry:
        #                energies_entry = other_entry.split(",")
        #                energies_entry = [float(i)*hartree_eV for i in energies_entry]
        #                self.gaussian_other_info["energies_entry"] = energies_entry


#
#del cidx
#del coordinates_cntr
#
#g_frame = np.array(g_frame)
#
        for idx, gatm in enumerate(g_atoms):
            try:
                if overwrite is True:
                    self.atoms[idx] = gatm
                else:
                    if not hasattr(self.atoms[idx], "sitnam"):
                        self.atoms[idx].sitnam = gatm.sitnam

                    if not hasattr(self.atoms[idx], "atm_id"):
                        self.atoms[idx].atm_id = gatm.atm_id

            except IndexError:
                # overwrite the whole entry if one index does not fit
                self.atoms = g_atoms
                break
Пример #8
0
    def read_mae(self, maefile, overwrite_data=False):
        """
        Read Schroedinger Maestro's mae-files.
        """
        atm_id_old_new = {}

        with open(maefile, "r") as mae_in:
            for line in mae_in:

                # find header for atoms-section
                if "m_atom" in line:
                    cframe = []
                    num_atms = int(re.findall(r'\d+', line)[0])

                    # skip lines until coordinates section is reached
                    while ":::" not in line:
                        line = next(mae_in)

                    for iid in range(num_atms):
                        catm = mae_in.next().split()
                        atm_id_old_new[int(catm[0])] = iid
                        csitnam = catm[-1]
                        ccoords = np.array([float(i) for i in catm[2:5]])

                        # check if an instance of Atom with index iid already exists;
                        # overwrite info if it does or create a new one if it does not
                        try:
                            self.atoms[iid]

                            # overwrite data
                            if overwrite_data is True:
                                self.atoms[iid].atm_id = iid
                                self.atoms[iid].sitnam = csitnam
                            # complement data
                            else:

                                if not hasattr(self.atoms[iid], "atm_id"):
                                    self.atoms[iid].atm_id = iid

                                if not hasattr(self.atoms[iid], "sitnam"):
                                    self.atoms[iid].sitnam = csitnam

                        except IndexError:
                            catm = mds.Atom(atm_id=iid, sitnam=csitnam)
                            self.atoms.append(catm)

                        cframe.append(ccoords)
                    # append current frame
                    self.ts_coords.append(cframe)

                elif "m_bond" in line:
                    num_bnds = int(re.findall(r'\d+', line)[0])

                    # skip lines until coordinates section is reached
                    while ":::" not in line:
                        line = next(mae_in)

                    for iid in range(num_bnds):
                        cbnd = mae_in.next().split()

                        # store section
                        atm_1 = atm_id_old_new[int(cbnd[1])]
                        atm_2 = atm_id_old_new[int(cbnd[2])]
                        bnd_order = int(cbnd[3])

                        # check if an instance of Bond with index iid already exists;
                        # overwrite info if it does or create a new one if it does not
                        try:
                            self.bonds[iid]

                            # overwrite data
                            if overwrite_data is True:
                                self.bonds[iid].bnd_id = iid
                                self.bonds[iid].bnd_order = bnd_order
                            # complement data
                            else:

                                if not hasattr(self.bonds[iid], "bnd_id"):
                                    self.bonds[iid].bnd_id = iid

                                if not hasattr(self.bonds[iid], "atm_id1"):
                                    self.bonds[iid].atm_id1 = atm_1

                                if not hasattr(self.bonds[iid], "atm_id2"):
                                    self.bonds[iid].atm_id2 = atm_2

                                if not hasattr(self.bonds[iid], "bnd_order"):
                                    self.bonds[iid].bnd_order = bnd_order

                        except IndexError:
                            cbnd = mds.Bond(bnd_id=iid,
                                            atm_id1=atm_1,
                                            atm_id2=atm_2,
                                            bnd_order=bnd_order)
                            self.bonds.append(cbnd)
                else:
                    pass
Пример #9
0
    def read_pdb(self, pdb, overwrite_data=False, debug=False):
        """
        Read a pdb file. Work in Progress!
        """
        with open(pdb, "r") as pdb_file:
            line = pdb_file.readline()
            atm_idx = 0
            bnd_idx = 0
            atm_id_old_new = {}
            all_pdb_coords = []
            tmp_bnds = []

            while line != '':
                if line.startswith("HETATM"):
                    atom_line = line.split()
                    atm_id = int(atom_line[1])
                    sitnam = atom_line[2]
                    coords = np.array([float(i) for i in atom_line[3:6]])
                    atm_id_old_new[atm_id] = atm_idx
                    cur_atm = mds.Atom(
                        atm_id=atm_idx,
                        sitnam=sitnam)
                    self.atoms.append(cur_atm)
                    all_pdb_coords.append(coords)
                    atm_idx += 1
                elif line.startswith("CONECT"):
                    bond_line = line.split()
                    # convert old indices to new ones
                    new_indices = [atm_id_old_new[int(i)] for i in bond_line[1:]]
                    for i, atom_index in enumerate(new_indices):

                        # only covalent bonds are of interest (columns 1-4)
                        if i != 0 and i < 5:
                            id_1 = new_indices[0]
                            id_2 = atom_index

                            # sort by id
                            if id_1 > id_2:
                                id_1, id_2 = atom_index, new_indices[0]

                            if [id_1, id_2] not in tmp_bnds:
                                tmp_bnds.append([id_1, id_2])
                                cbnd = mds.Bond(bnd_id=bnd_idx,
                                                atm_id1=id_1,
                                                atm_id2=id_2)
                                bnd_idx += 1
                                self.bonds.append(cbnd)
                elif line.startswith("CRYST1"):
                    a = float(line[7:16])
                    b = float(line[16:25])
                    c = float(line[25:34])
                    alpha = float(line[34:41])
                    beta = float(line[40:47])
                    gamma = float(line[48:55])
                    cbox = mdb.Box(
                        boxtype="lattice", ltc_a=a, ltc_b=b, ltc_c=c,
                        ltc_alpha=alpha, ltc_beta=beta, ltc_gamma=gamma)
                    self.ts_boxes.append(cbox)
                else:
                    pass

                line = pdb_file.readline()

            # append coordinates
            self.ts_coords.append(all_pdb_coords)

        self.fetch_molecules_by_bonds()
Пример #10
0
    def read_prmtop(self, prmtop):
        """
        Read the contents of the amber prmtop-file. CHARMM-Entries will not be
        read!

        Input:
            >   mode        str; complement|overwrite|append;
                            complement (missing) data, overwrite given data
                            or append to given data
        """
        # /// parse file
        with open(prmtop, "r") as prmtop_in:
            prmtop_version = prmtop_in.readline()

            # parse sections
            for line in prmtop_in:

                if line.startswith("%FLAG TITLE"):
                    # section contains the title of the topology file
                    next(prmtop_in)  # line with formatting info
                    prmtop_title = next(prmtop_in)
                elif line.startswith("%FLAG POINTERS"):
                    # section which contains the information about how many
                    # parameters are present in all of the sections

                    next(prmtop_in)  # line with formatting info

                    line = next(prmtop_in).split()
                    (natom, ntypes, nbonh, mbona, ntheth, mtheta, nphih, mphia,
                     nhparm, nparm) = [int(i) for i in line]

                    line = next(prmtop_in).split()
                    (nnb, nres, nbona, ntheta, nphia, numbnd, numang, nptra,
                     natyp, nphb) = [int(i) for i in line]

                    line = next(prmtop_in).split()
                    (ifpert, nbper, ngper, ndper, mbper, mgper, mdper, ifbox,
                     nmxrs, ifcap) = [int(i) for i in line]

                    line = next(prmtop_in).split()
                    numextra = line[0]

                    try:
                        ncopy = line[1]  # entry "copy" need not to be given
                    except (IndexError):
                        pass

                elif line.startswith("%FLAG ATOM_NAME"):
                    # SECTION: ATOM_NAME
                    # section which contains the atom name for every atom in
                    # the prmtop
                    section_atom_name = agphf.parse_section(prmtop_in, natom)

                elif line.startswith("%FLAG CHARGE"):
                    # SECTION: CHARGE
                    # section which contains the charge for every atom in the prmtop
                    # prmtop-charges must be divided by 18.2223 (reasons unknown)
                    section_charge = agphf.parse_section(prmtop_in, natom)

                elif line.startswith("%FLAG ATOMIC NUMBER"):
                    pass
                elif line.startswith("%FLAG MASS"):
                    # SECTION: MASS
                    # section which  contains the atomic mass of every atom
                    # in g/mol.
                    section_mass = agphf.parse_section(prmtop_in, natom)

                elif line.startswith("%FLAG ATOM_TYPE_INDEX"):
                    # SECTION ATOM TYPE INDEX
                    # section which contains the Lennard-Jones atom type index.
                    # The Lennard-Jones potential contains parameters for every
                    # pair of atoms in the system. All atoms with the same
                    # sigma and epsilon parameters are assigned to the same type (regardless
                    # of whether they have the same AMBER ATOM TYPE).
                    section_atom_type_index = agphf.parse_section(prmtop_in,
                                                                  natom,
                                                                  itype="int")

                elif line.startswith("%FLAG NUMBER_EXCLUDED_ATOMS"):
                    # section which contains the number of atoms that need to be
                    # excluded from the non-bonded calculation loop for atom i
                    # because i is involved in a bond, angle, or torsion with those atoms
                    section_number_excluded_atoms = agphf.parse_section(
                        prmtop_in, natom, itype="int")

                elif line.startswith("%FLAG NONBONDED_PARM_INDEX"):
                    # section which contains the pointers for each pair of LJ
                    # atom types into the LENNARD JONES ACOEF and
                    # LENNARD JONES BCOEF arrays
                    section_nonbonded_parm_index = agphf.parse_section(
                        prmtop_in, ntypes * ntypes, itype="int")

                elif line.startswith("%FLAG RESIDUE_LABEL"):
                    # section which contains the residue name for every residue
                    # in the prmtop
                    section_residue_label = agphf.parse_section(
                        prmtop_in, nres)
                elif line.startswith("%FLAG RESIDUE_POINTER"):
                    section_residue_pointer = agphf.parse_section(prmtop_in,
                                                                  nres,
                                                                  itype="int")
                elif line.startswith("%FLAG BOND_FORCE_CONSTANT"):
                    # section which lists all of the bond force constants
                    # k in kcal/(mol*Angstrom**2) for each unique bond type
                    section_bond_force_constant = agphf.parse_section(
                        prmtop_in, numbnd)
                elif line.startswith("%FLAG BOND_EQUIL_VALUE"):
                    # section which lists all of the bond equilibrium distances
                    # in "Angstrom" for each unique bond type
                    section_bond_equil_value = agphf.parse_section(
                        prmtop_in, numbnd)
                elif line.startswith("%FLAG ANGLE_FORCE_CONSTANT"):
                    # section which contains all of the angle equilibrium angles
                    # in "radians"
                    section_angle_force_constant = agphf.parse_section(
                        prmtop_in, numang)
                elif line.startswith("%FLAG ANGLE_EQUIL_VALUE"):
                    # section which contains all of the angle equilibrium angles
                    # in "radians"
                    section_angle_equil_value = agphf.parse_section(
                        prmtop_in, numang)
                elif line.startswith("%FLAG DIHEDRAL_FORCE_CONSTANT"):
                    # section which lists the torsion force constants in kcal/mol
                    # for each unique torsion type
                    section_dihedral_force_constant = agphf.parse_section(
                        prmtop_in, nptra)
                elif line.startswith("%FLAG DIHEDRAL_PERIODICITY"):
                    # section which lists the periodicity n for each unique
                    # torsion type; only int
                    section_dihedral_periodicity = agphf.parse_section(
                        prmtop_in, nptra)
                elif line.startswith("%FLAG DIHEDRAL_PHASE"):
                    # section which lists the phase shift for each unique
                    # torsion type in "radians"
                    section_dihedral_phase = agphf.parse_section(
                        prmtop_in, nptra)
                elif line.startswith("%FLAG SCEE_SCALE_FACTOR"):
                    # section which lists the factor by which 1-4 electrostatic
                    # interactions are divided (i.e., the two atoms on either
                    # end of a torsion)
                    section_scee_scale_factor = agphf.parse_section(
                        prmtop_in, nptra)
                elif line.startswith("%FLAG SCNB_SCALE_FACTOR"):
                    # section which lists the factor by which 1-4 van der Waals
                    # interactions are divided (i.e., the two atoms on either
                    # end of a torsion)
                    section_scnb_scale_factor = agphf.parse_section(
                        prmtop_in, nptra)
                elif line.startswith("%FLAG SOLTY"):
                    # section which is unused
                    section_solty = agphf.parse_section(prmtop_in, natyp)
                elif line.startswith("%FLAG LENNARD_JONES_ACOEF"):
                    # section contains the LJ A-coefficients for all pairs of
                    # distinct LJ types
                    section_lennard_jones_acoef = agphf.parse_section(
                        prmtop_in,
                        ntypes * (ntypes + 1) / 2)
                elif line.startswith("%FLAG LENNARD_JONES_BCOEF"):
                    # section contains the LJ A-coefficients for all pairs of
                    # distinct LJ types
                    section_lennard_jones_bcoef = agphf.parse_section(
                        prmtop_in,
                        ntypes * (ntypes + 1) / 2)
                elif line.startswith("%FLAG BONDS_INC_HYDROGEN"):
                    # section which contains a list of every bond in the system
                    # in which at least one atom is Hydrogen
                    section_bonds_inc_hydrogen = agphf.parse_section(
                        prmtop_in, 3 * nbonh, chunksize=3, itype="int")
                elif line.startswith("%FLAG BONDS_WITHOUT_HYDROGEN"):
                    # section contains a list of every bond in the system in
                    # which neither atom is a Hydrogen
                    section_bonds_without_hydrogen = agphf.parse_section(
                        prmtop_in, 3 * nbona, chunksize=3, itype="int")
                elif line.startswith("%FLAG ANGLES_INC_HYDROGEN"):
                    # section contains a list of every angle in the system in
                    # which at least one atom is Hydrogen
                    section_angles_inc_hydrogen = agphf.parse_section(
                        prmtop_in, 4 * ntheth, chunksize=4, itype="int")
                elif line.startswith("%FLAG ANGLES_WITHOUT_HYDROGEN"):
                    # section which contains a list of every angle in the system
                    # in which no atom is Hydrogen
                    section_angles_without_hydrogen = agphf.parse_section(
                        prmtop_in, 4 * ntheta, chunksize=4, itype="int")
                elif line.startswith("%FLAG DIHEDRALS_INC_HYDROGEN"):
                    # section contains a list of every torsion in the system in
                    # which at least one atom is Hydrogen
                    section_dihedrals_inc_hydrogen = agphf.parse_section(
                        prmtop_in, 5 * nphih, chunksize=5, itype="int")
                elif line.startswith("%FLAG DIHEDRALS_WITHOUT_HYDROGEN"):
                    section_dihedrals_without_hydrogen = agphf.parse_section(
                        prmtop_in, 5 * nphia, chunksize=5, itype="int")
                elif line.startswith("%FLAG AMBER_ATOM_TYPE"):
                    section_amber_atom_type = agphf.parse_section(
                        prmtop_in, natom)
                elif line.startswith("%FLAG TREE_CHAIN_CLASSIFICATION"):
                    agphf.entry_not_read("TREE_CHAIN_CLASSIFICATION")
                elif line.startswith("%FLAG JOIN_ARRAY"):
                    agphf.entry_not_read("JOIN_ARRAY")
                elif line.startswith("%FLAG IROTAT"):
                    agphf.entry_not_read("IROTAT")
                elif line.startswith("%FLAG RADIUS_SET"):
                    agphf.entry_not_read("RADIUS_SET")
                elif line.startswith("%FLAG RADII"):
                    section_radii = agphf.parse_section(prmtop_in, natom)
                elif line.startswith("%FLAG SCREEN"):
                    agphf.entry_not_read("SCREEN")
                else:
                    pass

        # /// arrange data - force field section ///
        # /// atom-types
        #     (atoms with same epsilon and sigma (lj) have same type)

        # 1. get acoef/bcoef-ids for ii-interactions
        abcoef_ids = agphf.get_AB_ids(ntypes, section_nonbonded_parm_index)

        # 2. calculate sigma from coefs
        # dict to translate between internal and amber indices
        atm_key_old_new = {}

        for cidx, cid in enumerate(abcoef_ids):
            #cidx += 1
            cur_sig, cur_eps = agphf.sig_eps_from_AB(
                section_lennard_jones_acoef[cid],
                section_lennard_jones_bcoef[cid])

            # skip duplicates
            try:
                if self.atm_types[cidx]:
                    print("***Prmtop-Info: Skipping atom type {}".format(cid))
            except KeyError:
                self.atm_types[cidx] = mds.Atom(sigma=cur_sig,
                                                epsilon=cur_eps,
                                                energy_unit="kcal/mol")

            # add old key as key and new key as value
            atm_key_old_new[cidx + 1] = cidx

        # ityp:      atom type as number (= atom key)
        # imass:     mass of atom type
        # itypamb:   amber force field name of atom type
        # atypemass: dictionary with atom type masses and amber names assigned
        #            to atom-keys

        # type-id (key), mass, type-name
        atypemass = {}
        for ityp, imass, itypamb in zip(section_atom_type_index, section_mass,
                                        section_amber_atom_type):
            # iterate through names- and mass-list, overwrite duplicates
            # -> only single atom-types remain
            atypemass[ityp] = [imass, itypamb]

        for akey in atypemass:
            akey_new = atm_key_old_new[akey]  # translate old key
            self.atm_types[akey_new].weigh = atypemass[akey][0]
            self.atm_types[akey_new].sitnam = atypemass[akey][1]

        # /// bond-types
        bnd_key_old_new = {}
        for cbnd_id, (bnd_fconst, bnd_r0) in enumerate(
                zip(section_bond_force_constant, section_bond_equil_value)):
            self.bnd_types[cbnd_id] = mds.Bond(prm1=bnd_fconst,
                                               prm2=bnd_r0,
                                               energy_unit="kcal/mol")
            bnd_key_old_new[cbnd_id + 1] = cbnd_id

        # /// angle-types
        ang_key_old_new = {}
        for cang_id, (ang_fconst, ang_r0) in enumerate(
                zip(section_angle_force_constant, section_angle_equil_value)):
            self.ang_types[cang_id] = mds.Angle(prm1=ang_fconst,
                                                prm2=ang_r0,
                                                energy_unit="kcal/mol",
                                                angle_unit="rad")
            ang_key_old_new[cang_id + 1] = cang_id

        # /// dihedral- and improper-types
        #     assign dihedrals and impropers to "dih" and "imp"
        dih_imp_dict = agphf.unmask_imp(section_dihedrals_inc_hydrogen,
                                        section_dihedrals_without_hydrogen)

        # dicts where we can look up, which old dihedral-/improper-key points to the new key
        dih_old_new = {}
        imp_old_new = {}

        dih_cntr = 0  # dihedral key-counter, for consecutive numbering
        imp_cntr = 0  # improper key-counter, for consecutive numbering

        dih_key_old_new = {}
        #imp_key_old_new = {}
        for cdih_id, (dih_fconst, dih_prd, dih_phase) in enumerate(
                zip(section_dihedral_force_constant,
                    section_dihedral_periodicity, section_dihedral_phase)):
            cur_key = cdih_id + 1
            dih_prd = int(dih_prd)

            if dih_imp_dict[cur_key] == "dih":
                self.dih_types[dih_cntr] = mds.Dihedral(prm_k=dih_fconst,
                                                        prm_n=dih_prd,
                                                        prm_d=dih_phase,
                                                        energy_unit="kcal/mol",
                                                        angle_unit="rad")
                # pointer new-key -> old-key
                dih_old_new[cur_key] = dih_cntr
                dih_key_old_new[dih_cntr + 1] = dih_cntr
                dih_cntr += 1  # right enumeration for dih-key (starting at 1)

            elif dih_imp_dict[cur_key] == "imp":
                self.imp_types[imp_cntr] = mds.Improper(prm_k=dih_fconst,
                                                        prm_n=dih_prd,
                                                        prm_d=dih_phase,
                                                        energy_unit="kcal/mol",
                                                        angle_unit="rad")
                # check if prm_k and prm_n fulfill cvff convention (see lammps)
                agphf.check_cvff_compatibility(dih_phase, dih_prd)
                # pointer new-key -> old-key
                imp_old_new[cur_key] = imp_cntr
                #imp_key_old_new[imp_cntr+1] = imp_cntr
                imp_cntr += 1  # right enumeration for imp-key (starting at 1)
            else:
                raise RuntimeError(
                    "***Undefined dihedral! Something went totally wrong!")

        # /// arrange data - topology section ///
        # /// atoms
        atm_id_old_new = {}
        for iatmkey, (itype, ichge, isitnam) in enumerate(
                zip(section_atom_type_index, section_charge,
                    section_atom_name)):
            # since amber is/was based on fortran, increment atom-ids by 1
            self.atoms.append(
                mds.Atom(atm_id=iatmkey,
                         atm_key=atm_key_old_new[itype],
                         chge=ichge,
                         sitnam=isitnam,
                         grp_id=0))

            # save into the corresponding dictionary to translate amber-ids to internal ids
            #self.atm_idx_id[iatmkey] = iatmkey+1
            #self.atm_id_idx[iatmkey+1] = iatmkey
            atm_id_old_new[iatmkey + 1] = iatmkey

        # gather residue information
        for cur_atm_nr in range(nres):
            cur_res = section_residue_label[cur_atm_nr]  # residue name

            if cur_atm_nr == 0:  # first
                pptr = 0
            else:
                pptr = section_residue_pointer[
                    cur_atm_nr]  # present atom-pointer

            try:
                # atom-pointer after present atom-pointer
                iptr = section_residue_pointer[cur_atm_nr + 1]
            except (IndexError):
                iptr = None  # None, if no pointer after present pointer

            for iatm in self.atoms[pptr:iptr]:
                iatm.res = cur_res

        # /// bonds
        self.bonds = agphf.relocate_parsed(section_bonds_inc_hydrogen,
                                           self.bonds, "bonds",
                                           bnd_key_old_new, atm_id_old_new)

        self.bonds = agphf.relocate_parsed(section_bonds_without_hydrogen,
                                           self.bonds, "bonds",
                                           bnd_key_old_new, atm_id_old_new)

        # /// angles
        self.angles = agphf.relocate_parsed(section_angles_inc_hydrogen,
                                            self.angles, "angles",
                                            ang_key_old_new, atm_id_old_new)

        self.angles = agphf.relocate_parsed(section_angles_without_hydrogen,
                                            self.angles, "angles",
                                            ang_key_old_new, atm_id_old_new)

        # /// dihedrals
        self.dihedrals = agphf.relocate_parsed(section_dihedrals_inc_hydrogen,
                                               self.dihedrals, "dihedrals",
                                               dih_old_new, atm_id_old_new)

        self.dihedrals = agphf.relocate_parsed(
            section_dihedrals_without_hydrogen, self.dihedrals, "dihedrals",
            dih_old_new, atm_id_old_new)

        # /// impropers
        self.impropers = agphf.relocate_parsed(section_dihedrals_inc_hydrogen,
                                               self.impropers, "impropers",
                                               imp_old_new, atm_id_old_new)

        self.impropers = agphf.relocate_parsed(
            section_dihedrals_without_hydrogen, self.impropers, "impropers",
            imp_old_new, atm_id_old_new)

        # delete variables not needed (at the moment)
        del prmtop_version
        del prmtop_title
        del numextra

        # if no ncopy is specified
        try:
            del ncopy
        except UnboundLocalError:
            pass

        # delete unneeded variables (at least at the moment there is no need)
        try:
            del section_number_excluded_atoms
            del section_scee_scale_factor
            del section_scnb_scale_factor
            del section_solty
            del section_radii
        except UnboundLocalError:
            pass

        # convert amber charges to normal ones
        print("***Prmtop-Info: Converting prmtop-charges.")
        self._convert_prmtop_charges()

        # convert impropers to match cvff (i.e. cos of prm_d)
        print("***Prmtop-Info: Converting impropers to cvff-style.")
        self._amb_imp2cvff()

        # form molecules by given bonds
        self.fetch_molecules_by_bonds()