Python where_in примеры, autoparse.find.where_in Python примеры использования

Пример #1

0

Показать файл

Файл: rates.py Проект: Auto-Mech/autoio

def dos_rovib(ke_ped_out):
    """ Read the microcanonical pedoutput file and extracts rovibrational density
        of states of each fragment as a function of the energy

        units: kcal/mol, and for dos mol/kcal

        :param ke_ped_out: string of lines of microcanonical rates output file
        :type ke_ped_out: str

        :return dos_df: dataframe(columns:prod1, prod2, rows:energy [kcal/mol])
                        with the density of states
        :rtype dos_df: dataframe(float)
    """
    # apf.where data of interest are
    ke_lines = ke_ped_out.splitlines()

    i_in = apf.where_in(
        'Bimolecular fragments density of states:', ke_lines)[0]+2
    _labels = ke_lines[i_in-1].strip().split()[2:]
    en_dos_all = numpy.array(
        [line.strip().split() for line in ke_lines[i_in:]], dtype=float).T
    energy = en_dos_all[:][0]
    dos_all = en_dos_all[:][1:].T

    dos_rovib_df = pd.DataFrame(dos_all, index=energy, columns=_labels)
    # drop potentially duplicate columns
    dos_rovib_df = dos_rovib_df.T.drop_duplicates().T

    return dos_rovib_df

Пример #2

0

Показать файл

def get_hot_names(input_str):
    """ Reads the HotSpecies from the MESS input file string
        that were used in the master-equation calculation.
        :param input_str: string of lines of MESS input file
        :type input_str: str
        :return hotspecies: list of hotspecies
        :rtype: list(str)
    """

    # Get the MESS input lines
    mess_lines = input_str.splitlines()
    hotsp_i = apf.where_in('HotEnergies', mess_lines)[0]
    num_hotsp = int(mess_lines[hotsp_i].strip().split()[1])
    hotspecies = [None] * num_hotsp

    for i, line in enumerate(mess_lines[hotsp_i + 1:hotsp_i + 1 + num_hotsp]):
        hotspecies[i] = line.strip().split()[0]

    return tuple(hotspecies)

Пример #3

0

Показать файл

def get_ped(pedoutput_str, pedspecies, energy_dct):
    """ Read `PEDOutput` file and extract product energy distribution at T,P.
        Energy in output set with respect to the ground energy of the products

        :param pedoutput_str: string of lines of ped_output file
        :type pedoutput_str: str
        :param species: species of interest in pedoutput
        :type species: list(list(str))
        :param energies: dict of energies of the species/barriers in the PES
        :type energies: dct(label: float)
        :param barriers_dct: barriers associated to pedspecies
        :type barriers_dct: {'REAC->PROD': barrierame} (str)
        :return ped_df_dct: dct(dataframe(columns:P, rows:T))
                            with the Series of energy distrib
        :rtype ped_df_dct: {'REAC->PROD': dataframe(series(float))}
    """

    ped_lines = pedoutput_str.splitlines()

    # apf.where data of interest are
    pressure_i = apf.where_in('pressure', ped_lines)
    temperature_i = apf.where_in('temperature', ped_lines)

    # get T, P list
    pressure_lst = np.array(
        [ped_lines[P].strip().split('=')[1] for P in pressure_i], dtype=float)
    temperature_lst = np.array(
        [ped_lines[T].strip().split('=')[1] for T in temperature_i],
        dtype=float)

    ped_df_dct = dict.fromkeys(energy_dct.keys())
    # find the energy for the scaling: everything refers to the products
    for species in pedspecies:
        # allocate empty dataframe
        ped_df = pd.DataFrame(index=list(set(temperature_lst)),
                              columns=list(set(pressure_lst)),
                              dtype=object)
        _, prods = species
        label = '->'.join(species)

        # 0th of the energy: products energy
        ene0 = -energy_dct[prods]

        species_i = apf.where_in(label, ped_lines) + 1
        empty_i = apf.where_is('', ped_lines)
        final_i = np.array([empty_i[i < empty_i][0] for i in species_i])

        # column label
        column_i = apf.where_is(
            label, ped_lines[species_i[0] - 1].strip().split()[1:])[0]
        # reset species_i and final_i based on correspondence with label

        # check that length of pressure list is the same as new species_i
        # extract the data
        for i in np.arange(0, len(species_i)):
            # if labels don't match: go to next loop
            if label not in ped_lines[species_i[i] - 1]:
                continue

            pressure, temp = pressure_lst[i], temperature_lst[i]
            i_in, i_fin = species_i[i], final_i[i]

            en_prob_all = np.array(
                [line.strip().split() for line in ped_lines[i_in:i_fin]],
                dtype=float).T
            energy = en_prob_all[:][0] + ene0
            probability = en_prob_all[:][column_i]
            # build the series and put in dataframe after
            # removing negative probs and renormalizing
            # integrate with the trapezoidal rule
            prob_en = pd.Series(probability, index=energy, dtype=float)

            if len(prob_en[prob_en < 0]) > 0:
                # if there are negative values of the probability: remove them
                prob_en = prob_en[:prob_en[prob_en < 0].index[0]]

            prob_en = prob_en.sort_index()
            # integrate with trapz
            norm_factor = np.trapz(prob_en.values, x=prob_en.index)
            ped_df.loc[temp][pressure] = prob_en / norm_factor

        ped_df_dct[label] = ped_df

    return ped_df_dct

Пример #4

0

Показать файл

Файл: statmodels.py Проект: Auto-Mech/mechanalyzer

def get_dof_info(block, ask_for_ts=False):
    """ Gets the N of degrees of freedom and MW of each species
        :param block: bimol species of which you want the dofs
        :type block: list(str1, str2)
        :param ask_for_ts: build the dof info also for the ts
        :type ask_for_ts: bool
        :return dof_info: dataframe with vibrat/rot degrees of freedom
            and molecular weight
        :rtype: dataframe(index=species, columns=['vib dof', 'rot dof', 'mw'])
    """
    info_array = np.zeros((2 + int(ask_for_ts), 3))
    keys = []
    atoms_ts = 0
    # extract N of dofs and MW
    for i, block_i in enumerate(block):
        info = block_i.splitlines()
        where_name = find.where_in('Species', info)[0]
        where_hind = find.where_in('Hindered', info)
        where_geom = find.where_in('Geometry', info)[0]
        num_atoms = int(info[where_geom].strip().split()[1])
        atoms_ts += num_atoms

        key = info[where_name].strip().split()[1]
        keys.append(key)
        try:
            where_freq = find.where_in('Frequencies', info)[0]
            num_dof = (int(info[where_freq].strip().split()[1]) +
                       len(where_hind))
            if 3 * num_atoms - num_dof == 6:
                rot_dof = 3
            else:
                rot_dof = 2
        except IndexError:
            # if 1 atom only: no 'Frequencies', set to 0
            num_dof = 0
            rot_dof = 0
        # this allows to get 3N-5 or 3N-6 without analyzing the geometry
        info_array[i, 0] = num_dof
        info_array[i, 1] = rot_dof

        # MW from type of atoms:
        geom_in = where_geom + 1
        geom_fin = geom_in + num_atoms
        atoms_array = np.array([
            geomline.strip().split()[0] for geomline in info[geom_in:geom_fin]
        ])

        info_array[i, 2] = np.sum(
            np.array([MW_dct_elements[at] for at in atoms_array], dtype=float))

    # if ask for ts: assume first 2 blocks are 2 reactants of bimol reaction
    # and derive the DOFs of the TS
    if ask_for_ts:
        keys.append('TS')

        # assume there are no linear TSs
        info_array[2, :] = [
            3 * atoms_ts - 7, 3, info_array[0, 2] + info_array[1, 2]
        ]

    dof_info = pd.DataFrame(info_array,
                            index=keys,
                            columns=['vib dof', 'rot dof', 'mw'])

    return dof_info

Пример #5

0

Показать файл

def extract_hot_branching(hotenergies_str, hotspecies_lst, species_lst, temps,
                          pressures):
    """ Extract hot branching fractions for a single species
        :param hotenergies_str: string of mess log file
        :type hotenergies_str: str
        :param hotspecies_lst: list of hot species
        :type hotspecies_lst: list
        :param species_lst: list of all species on the PES
        :type species_lst: list
        :return hoten_dct: hot branching fractions for hotspecies
        :rtype hoten_dct: dct{hotspecies: df[P][T]:df[allspecies][energies]}
    """
    lines = hotenergies_str.splitlines()
    # for each species: dataframe of dataframes BF[Ti][pi]
    # each of them has BF[energy][species]
    # preallocations
    hoten_dct = {
        s: pd.DataFrame(index=temps, columns=pressures)
        for s in hotspecies_lst
    }

    # 1. for each P,T: extract the block
    pt_i_array = apf.where_in(['Pressure', 'Temperature'], lines)
    hot_i_array = apf.where_in(['Hot distribution branching ratios'], lines)
    end_hot_i_array = apf.where_in(['prompt', 'isomerization', 'dissociation'],
                                   lines)

    # 2. find Hot distribution branching ratios:
    for i, hot_i in enumerate(hot_i_array):

        # extract block, PT, and species for which BF is assigned
        lines_block = lines[hot_i + 2:end_hot_i_array[i]]

        _press, _temp = [
            float(var) for var in lines[pt_i_array[i]].strip().split()[2:7:4]
        ]

        species_bf_i = lines[hot_i + 1].strip().split()[3:]

        # for each hotspecies: read BFs
        for hotspecies in hotspecies_lst:
            hot_e_lvl, branch_ratio = [], []
            sp_i = apf.where_in(hotspecies, species_bf_i)

            for line in lines_block:
                line = line.strip()
                if line.startswith(hotspecies):
                    hot_e = float(line.split()[1])
                    if hot_e not in hot_e_lvl:
                        branch_ratio_arr = np.array(list(line.split()[2:]),
                                                    dtype=float)

                        # check that value of reactant branching not negative
                        # if > 1, keep it so you can account for that anyway
                        if sp_i.size > 0:
                            if branch_ratio_arr[sp_i] < 0:
                                continue
                            if branch_ratio_arr[sp_i] > 1:
                                branch_ratio_arr[sp_i] = 1
                            # elif branch_ratio_arr[sp_i] > 1:
                            #     branch_ratio_arr[sp_i] = 1
                        # remove negative values or values >1
                        _arr = [
                            abs(x * int(1e-5 < x <= 1))
                            for x in branch_ratio_arr
                        ]
                        br_filter = np.array(_arr, dtype=float)
                        # if all invalid: do not save
                        if all(br_filter == 0):
                            continue
                        br_renorm = br_filter / np.sum(br_filter)
                        # append values
                        branch_ratio.append(br_renorm)
                        hot_e_lvl.append(hot_e)

            hot_e_lvl = np.array(hot_e_lvl)
            branch_ratio = np.array(branch_ratio)

            # 3. allocate in the dataframe
            bf_hotspecies = pd.DataFrame(0,
                                         index=hot_e_lvl,
                                         columns=species_lst)
            bf_hotspecies[species_bf_i] = branch_ratio
            hoten_dct[hotspecies][_press][_temp] = bf_hotspecies

    return hoten_dct

Python where_in примеры использования