示例#1
0
    def generate_group_additivity_values(self, training_set, kunits, method='Arrhenius'):
        """
        Generate the group additivity values using the given `training_set`,
        a list of 2-tuples of the form ``(template, kinetics)``. You must also
        specify the `kunits` for the family and the `method` to use when
        generating the group values. Returns ``True`` if the group values have
        changed significantly since the last time they were fitted, or ``False``
        otherwise.
        """
        warnings.warn("Group additivity is no longer supported and may be"
                      " removed in version 2.3.", DeprecationWarning)
        # keep track of previous values so we can detect if they change
        old_entries = dict()
        for label, entry in self.entries.items():
            if entry.data is not None:
                old_entries[label] = entry.data

        # Determine a complete list of the entries in the database, sorted as in the tree
        group_entries = self.top[:]
        for entry in self.top:
            group_entries.extend(self.descendants(entry))

        # Determine a unique list of the groups we will be able to fit parameters for
        group_list = []
        for template, kinetics in training_set:
            for group in template:
                if group not in self.top:
                    group_list.append(group)
                    group_list.extend(self.ancestors(group)[:-1])
        group_list = list(set(group_list))
        group_list.sort(key=lambda x: x.index)

        if method == 'KineticsData':
            # Fit a discrete set of k(T) data points by training against k(T) data

            Tdata = np.array([300, 400, 500, 600, 800, 1000, 1500, 2000])

            # Initialize dictionaries of fitted group values and uncertainties
            group_values = {}
            group_uncertainties = {}
            group_counts = {}
            group_comments = {}
            for entry in group_entries:
                group_values[entry] = []
                group_uncertainties[entry] = []
                group_counts[entry] = []
                group_comments[entry] = set()

            # Generate least-squares matrix and vector
            A = []
            b = []

            kdata = []
            for template, kinetics in training_set:

                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.get_rate_coefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.get_rate_coefficient(T, 0) for T in Tdata]
                else:
                    raise TypeError('Unexpected kinetics model of type {0} for template '
                                    '{1}.'.format(kinetics.__class__, template))
                kdata.append(kd)

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = get_all_combinations(combinations)
                # Add a row to the matrix for each combination
                for groups in combinations:
                    Arow = [1 if group in groups else 0 for group in group_list]
                    Arow.append(1)
                    brow = [math.log10(k) for k in kd]
                    A.append(Arow)
                    b.append(brow)

                    for group in groups:
                        group_comments[group].add("{0!s}".format(template))

            if len(A) == 0:
                logging.warning('Unable to fit kinetics groups for family "{0}"; '
                                'no valid data found.'.format(self.label))
                return
            A = np.array(A)
            b = np.array(b)
            kdata = np.array(kdata)

            x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND)

            for t, T in enumerate(Tdata):

                # Determine error in each group (on log scale)
                stdev = np.zeros(len(group_list) + 1, np.float64)
                count = np.zeros(len(group_list) + 1, np.int)

                for index in range(len(training_set)):
                    template, kinetics = training_set[index]
                    kd = math.log10(kdata[index, t])
                    km = x[-1, t] + sum([x[group_list.index(group), t] for group in template if group in group_list])
                    variance = (km - kd) ** 2
                    for group in template:
                        groups = [group]
                        groups.extend(self.ancestors(group))
                        for g in groups:
                            if g not in self.top:
                                ind = group_list.index(g)
                                stdev[ind] += variance
                                count[ind] += 1
                    stdev[-1] += variance
                    count[-1] += 1
                stdev = np.sqrt(stdev / (count - 1))
                import scipy.stats
                ci = scipy.stats.t.ppf(0.975, count - 1) * stdev

                # Update dictionaries of fitted group values and uncertainties
                for entry in group_entries:
                    if entry == self.top[0]:
                        group_values[entry].append(10 ** x[-1, t])
                        group_uncertainties[entry].append(10 ** ci[-1])
                        group_counts[entry].append(count[-1])
                    elif entry in group_list:
                        index = group_list.index(entry)
                        group_values[entry].append(10 ** x[index, t])
                        group_uncertainties[entry].append(10 ** ci[index])
                        group_counts[entry].append(count[index])
                    else:
                        group_values[entry] = None
                        group_uncertainties[entry] = None
                        group_counts[entry] = None

            # Store the fitted group values and uncertainties on the associated entries
            for entry in group_entries:
                if group_values[entry] is not None:
                    entry.data = KineticsData(Tdata=(Tdata, "K"), kdata=(group_values[entry], kunits))
                    if not any(np.isnan(np.array(group_uncertainties[entry]))):
                        entry.data.kdata.uncertainties = np.array(group_uncertainties[entry])
                        entry.data.kdata.uncertainty_type = '*|/'
                    entry.short_desc = "Group additive kinetics."
                    entry.long_desc = "Fitted to {0} rates.\n".format(group_counts[entry])
                    entry.long_desc += "\n".join(group_comments[entry])
                else:
                    entry.data = None

        elif method == 'Arrhenius':
            # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data

            Tdata = np.array([300, 400, 500, 600, 800, 1000, 1500, 2000])
            logTdata = np.log(Tdata)
            Tinvdata = 1000. / (constants.R * Tdata)

            A = []
            b = []

            kdata = []
            for template, kinetics in training_set:

                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.get_rate_coefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.get_rate_coefficient(T, 0) for T in Tdata]
                else:
                    raise TypeError('Unexpected kinetics model of type {0} for template '
                                    '{1}.'.format(kinetics.__class__, template))
                kdata.append(kd)

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = get_all_combinations(combinations)

                # Add a row to the matrix for each combination at each temperature
                for t, T in enumerate(Tdata):
                    logT = logTdata[t]
                    Tinv = Tinvdata[t]
                    for groups in combinations:
                        Arow = []
                        for group in group_list:
                            if group in groups:
                                Arow.extend([1, logT, -Tinv])
                            else:
                                Arow.extend([0, 0, 0])
                        Arow.extend([1, logT, -Tinv])
                        brow = math.log(kd[t])
                        A.append(Arow)
                        b.append(brow)

            if len(A) == 0:
                logging.warning('Unable to fit kinetics groups for family "{0}"; '
                                'no valid data found.'.format(self.label))
                return
            A = np.array(A)
            b = np.array(b)
            kdata = np.array(kdata)

            x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND)

            # Store the results
            self.top[0].data = Arrhenius(
                A=(math.exp(x[-3]), kunits),
                n=x[-2],
                Ea=(x[-1], "kJ/mol"),
                T0=(1, "K"),
            )
            for i, group in enumerate(group_list):
                group.data = Arrhenius(
                    A=(math.exp(x[3 * i]), kunits),
                    n=x[3 * i + 1],
                    Ea=(x[3 * i + 2], "kJ/mol"),
                    T0=(1, "K"),
                )

        elif method == 'Arrhenius2':
            # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values

            A = []
            b = []

            for template, kinetics in training_set:

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = get_all_combinations(combinations)

                # Add a row to the matrix for each parameter
                if (isinstance(kinetics, Arrhenius) or
                        (isinstance(kinetics, ArrheniusEP) and kinetics.alpha.value_si == 0)):
                    for groups in combinations:
                        Arow = []
                        for group in group_list:
                            if group in groups:
                                Arow.append(1)
                            else:
                                Arow.append(0)
                        Arow.append(1)
                        Ea = kinetics.E0.value_si if isinstance(kinetics, ArrheniusEP) else kinetics.Ea.value_si
                        brow = [math.log(kinetics.A.value_si), kinetics.n.value_si, Ea / 1000.]
                        A.append(Arow)
                        b.append(brow)

            if len(A) == 0:
                logging.warning('Unable to fit kinetics groups for family "{0}"; '
                                'no valid data found.'.format(self.label))
                return
            A = np.array(A)
            b = np.array(b)

            x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND)

            # Store the results
            self.top[0].data = Arrhenius(
                A=(math.exp(x[-1, 0]), kunits),
                n=x[-1, 1],
                Ea=(x[-1, 2], "kJ/mol"),
                T0=(1, "K"),
            )
            for i, group in enumerate(group_list):
                group.data = Arrhenius(
                    A=(math.exp(x[i, 0]), kunits),
                    n=x[i, 1],
                    Ea=(x[i, 2], "kJ/mol"),
                    T0=(1, "K"),
                )

        # Add a note to the history of each changed item indicating that we've generated new group values
        changed = False
        for label, entry in self.entries.items():
            if entry.data is not None and label in old_entries:
                if (isinstance(entry.data, KineticsData) and
                        isinstance(old_entries[label], KineticsData) and
                        len(entry.data.kdata.value_si) == len(old_entries[label].kdata.value_si) and
                        all(abs(entry.data.kdata.value_si / old_entries[label].kdata.value_si - 1) < 0.01)):
                    # New group values within 1% of old
                    pass
                elif (isinstance(entry.data, Arrhenius) and
                        isinstance(old_entries[label], Arrhenius) and
                        abs(entry.data.A.value_si / old_entries[label].A.value_si - 1) < 0.01 and
                        abs(entry.data.n.value_si / old_entries[label].n.value_si - 1) < 0.01 and
                        abs(entry.data.Ea.value_si / old_entries[label].Ea.value_si - 1) < 0.01 and
                        abs(entry.data.T0.value_si / old_entries[label].T0.value_si - 1) < 0.01):
                    # New group values within 1% of old
                    pass
                else:
                    changed = True
                    break
            else:
                changed = True
                break

        return changed
示例#2
0
文件: rules.py 项目: zhedian/RMG-Py
    def fill_rules_by_averaging_up(self,
                                   root_template,
                                   already_done,
                                   verbose=False):
        """
        Fill in gaps in the kinetics rate rules by averaging child nodes.
        If verbose is set to True, then exact sources of kinetics are saved in the kinetics comments
        (warning: this uses up a lot of memory due to the extensively long comments)
        """
        root_label = ';'.join([g.label for g in root_template])

        if root_label in already_done:
            return already_done[root_label]

        # Generate the distance 1 pairings which must be averaged for this root template.
        # The distance 1 template is created by taking the parent node from one or more trees
        # and creating the combinations with children from a single remaining tree.
        # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and
        # (A's children, B).  For node (A,B,C), we would retrieve all combinations of (A,B,C's children)
        # (A,B's children,C) etc...
        # If a particular node has no children, it is skipped from the children expansion altogether.

        children_list = []
        distance_list = []
        for i, parent in enumerate(root_template):
            # Start with the root template, and replace the ith member with its children
            if parent.children:
                children_set = [[group] for group in root_template]
                children_set[i] = parent.children
                children_list.extend(get_all_combinations(children_set))
                distance_list.extend(
                    [k.nodal_distance for k in parent.children])

        if distance_list != []:  # average the minimum distance neighbors
            min_dist = min(distance_list)
            close_children_list = [
                children_list[i] for i in range(len(children_list))
                if distance_list[i] == min_dist
            ]
        else:
            close_children_list = []

        kinetics_list = []
        for template in children_list:
            label = ';'.join([g.label for g in template])

            if label in already_done:
                kinetics = already_done[label]
            else:
                kinetics = self.fill_rules_by_averaging_up(
                    template, already_done, verbose)

            if template in close_children_list and kinetics is not None:
                kinetics_list.append([kinetics, template])

        # See if we already have a rate rule for this exact template instead
        # and return it now that we have finished searching its children
        entry = self.get_rule(root_template)

        if entry is not None and entry.rank > 0:
            # We already have a rate rule for this exact template
            # If the entry has rank of zero, then we have so little faith
            # in it that we'd rather use an averaged value if possible
            # Since this entry does not have a rank of zero, we keep its
            # value
            already_done[root_label] = entry.data
            return entry.data

        if len(kinetics_list) > 0:

            if len(kinetics_list) > 1:
                # We found one or more results! Let's average them together
                kinetics = self._get_average_kinetics(
                    [k for k, t in kinetics_list])

                if verbose:
                    kinetics.comment = 'Average of [{0}]'.format(' + '.join(
                        k.comment if k.comment != '' else ';'.join(g.label
                                                                   for g in t)
                        for k, t in kinetics_list))

                else:
                    kinetics.comment = 'Average of [{0}]'.format(' + '.join(
                        ';'.join(g.label for g in t)
                        for k, t in kinetics_list))

            else:
                k, t = kinetics_list[0]
                kinetics = deepcopy(k)
                # Even though we are using just a single set of kinetics, it's still considered
                # an average.  It just happens that the other distance 1 children had no data.

                if verbose:
                    kinetics.comment = 'Average of [{0}]'.format(
                        k.comment if k.comment != '' else ';'.join(g.label
                                                                   for g in t))
                else:
                    kinetics.comment = 'Average of [{0}]'.format(';'.join(
                        g.label for g in t))

            entry = Entry(
                index=0,
                label=root_label,
                item=root_template,
                data=kinetics,
                rank=11,  # Indicates this is an averaged estimate
            )
            self.entries[entry.label] = [entry]
            already_done[root_label] = entry.data
            return entry.data

        already_done[root_label] = None
        return None