def generate_group_additivity_values(self, training_set, kunits, method='Arrhenius'): """ Generate the group additivity values using the given `training_set`, a list of 2-tuples of the form ``(template, kinetics)``. You must also specify the `kunits` for the family and the `method` to use when generating the group values. Returns ``True`` if the group values have changed significantly since the last time they were fitted, or ``False`` otherwise. """ warnings.warn("Group additivity is no longer supported and may be" " removed in version 2.3.", DeprecationWarning) # keep track of previous values so we can detect if they change old_entries = dict() for label, entry in self.entries.items(): if entry.data is not None: old_entries[label] = entry.data # Determine a complete list of the entries in the database, sorted as in the tree group_entries = self.top[:] for entry in self.top: group_entries.extend(self.descendants(entry)) # Determine a unique list of the groups we will be able to fit parameters for group_list = [] for template, kinetics in training_set: for group in template: if group not in self.top: group_list.append(group) group_list.extend(self.ancestors(group)[:-1]) group_list = list(set(group_list)) group_list.sort(key=lambda x: x.index) if method == 'KineticsData': # Fit a discrete set of k(T) data points by training against k(T) data Tdata = np.array([300, 400, 500, 600, 800, 1000, 1500, 2000]) # Initialize dictionaries of fitted group values and uncertainties group_values = {} group_uncertainties = {} group_counts = {} group_comments = {} for entry in group_entries: group_values[entry] = [] group_uncertainties[entry] = [] group_counts[entry] = [] group_comments[entry] = set() # Generate least-squares matrix and vector A = [] b = [] kdata = [] for template, kinetics in training_set: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.get_rate_coefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.get_rate_coefficient(T, 0) for T in Tdata] else: raise TypeError('Unexpected kinetics model of type {0} for template ' '{1}.'.format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = get_all_combinations(combinations) # Add a row to the matrix for each combination for groups in combinations: Arow = [1 if group in groups else 0 for group in group_list] Arow.append(1) brow = [math.log10(k) for k in kd] A.append(Arow) b.append(brow) for group in groups: group_comments[group].add("{0!s}".format(template)) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; ' 'no valid data found.'.format(self.label)) return A = np.array(A) b = np.array(b) kdata = np.array(kdata) x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND) for t, T in enumerate(Tdata): # Determine error in each group (on log scale) stdev = np.zeros(len(group_list) + 1, np.float64) count = np.zeros(len(group_list) + 1, np.int) for index in range(len(training_set)): template, kinetics = training_set[index] kd = math.log10(kdata[index, t]) km = x[-1, t] + sum([x[group_list.index(group), t] for group in template if group in group_list]) variance = (km - kd) ** 2 for group in template: groups = [group] groups.extend(self.ancestors(group)) for g in groups: if g not in self.top: ind = group_list.index(g) stdev[ind] += variance count[ind] += 1 stdev[-1] += variance count[-1] += 1 stdev = np.sqrt(stdev / (count - 1)) import scipy.stats ci = scipy.stats.t.ppf(0.975, count - 1) * stdev # Update dictionaries of fitted group values and uncertainties for entry in group_entries: if entry == self.top[0]: group_values[entry].append(10 ** x[-1, t]) group_uncertainties[entry].append(10 ** ci[-1]) group_counts[entry].append(count[-1]) elif entry in group_list: index = group_list.index(entry) group_values[entry].append(10 ** x[index, t]) group_uncertainties[entry].append(10 ** ci[index]) group_counts[entry].append(count[index]) else: group_values[entry] = None group_uncertainties[entry] = None group_counts[entry] = None # Store the fitted group values and uncertainties on the associated entries for entry in group_entries: if group_values[entry] is not None: entry.data = KineticsData(Tdata=(Tdata, "K"), kdata=(group_values[entry], kunits)) if not any(np.isnan(np.array(group_uncertainties[entry]))): entry.data.kdata.uncertainties = np.array(group_uncertainties[entry]) entry.data.kdata.uncertainty_type = '*|/' entry.short_desc = "Group additive kinetics." entry.long_desc = "Fitted to {0} rates.\n".format(group_counts[entry]) entry.long_desc += "\n".join(group_comments[entry]) else: entry.data = None elif method == 'Arrhenius': # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data Tdata = np.array([300, 400, 500, 600, 800, 1000, 1500, 2000]) logTdata = np.log(Tdata) Tinvdata = 1000. / (constants.R * Tdata) A = [] b = [] kdata = [] for template, kinetics in training_set: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.get_rate_coefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.get_rate_coefficient(T, 0) for T in Tdata] else: raise TypeError('Unexpected kinetics model of type {0} for template ' '{1}.'.format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = get_all_combinations(combinations) # Add a row to the matrix for each combination at each temperature for t, T in enumerate(Tdata): logT = logTdata[t] Tinv = Tinvdata[t] for groups in combinations: Arow = [] for group in group_list: if group in groups: Arow.extend([1, logT, -Tinv]) else: Arow.extend([0, 0, 0]) Arow.extend([1, logT, -Tinv]) brow = math.log(kd[t]) A.append(Arow) b.append(brow) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; ' 'no valid data found.'.format(self.label)) return A = np.array(A) b = np.array(b) kdata = np.array(kdata) x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND) # Store the results self.top[0].data = Arrhenius( A=(math.exp(x[-3]), kunits), n=x[-2], Ea=(x[-1], "kJ/mol"), T0=(1, "K"), ) for i, group in enumerate(group_list): group.data = Arrhenius( A=(math.exp(x[3 * i]), kunits), n=x[3 * i + 1], Ea=(x[3 * i + 2], "kJ/mol"), T0=(1, "K"), ) elif method == 'Arrhenius2': # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values A = [] b = [] for template, kinetics in training_set: # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = get_all_combinations(combinations) # Add a row to the matrix for each parameter if (isinstance(kinetics, Arrhenius) or (isinstance(kinetics, ArrheniusEP) and kinetics.alpha.value_si == 0)): for groups in combinations: Arow = [] for group in group_list: if group in groups: Arow.append(1) else: Arow.append(0) Arow.append(1) Ea = kinetics.E0.value_si if isinstance(kinetics, ArrheniusEP) else kinetics.Ea.value_si brow = [math.log(kinetics.A.value_si), kinetics.n.value_si, Ea / 1000.] A.append(Arow) b.append(brow) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; ' 'no valid data found.'.format(self.label)) return A = np.array(A) b = np.array(b) x, residues, rank, s = np.linalg.lstsq(A, b, rcond=RCOND) # Store the results self.top[0].data = Arrhenius( A=(math.exp(x[-1, 0]), kunits), n=x[-1, 1], Ea=(x[-1, 2], "kJ/mol"), T0=(1, "K"), ) for i, group in enumerate(group_list): group.data = Arrhenius( A=(math.exp(x[i, 0]), kunits), n=x[i, 1], Ea=(x[i, 2], "kJ/mol"), T0=(1, "K"), ) # Add a note to the history of each changed item indicating that we've generated new group values changed = False for label, entry in self.entries.items(): if entry.data is not None and label in old_entries: if (isinstance(entry.data, KineticsData) and isinstance(old_entries[label], KineticsData) and len(entry.data.kdata.value_si) == len(old_entries[label].kdata.value_si) and all(abs(entry.data.kdata.value_si / old_entries[label].kdata.value_si - 1) < 0.01)): # New group values within 1% of old pass elif (isinstance(entry.data, Arrhenius) and isinstance(old_entries[label], Arrhenius) and abs(entry.data.A.value_si / old_entries[label].A.value_si - 1) < 0.01 and abs(entry.data.n.value_si / old_entries[label].n.value_si - 1) < 0.01 and abs(entry.data.Ea.value_si / old_entries[label].Ea.value_si - 1) < 0.01 and abs(entry.data.T0.value_si / old_entries[label].T0.value_si - 1) < 0.01): # New group values within 1% of old pass else: changed = True break else: changed = True break return changed
def fill_rules_by_averaging_up(self, root_template, already_done, verbose=False): """ Fill in gaps in the kinetics rate rules by averaging child nodes. If verbose is set to True, then exact sources of kinetics are saved in the kinetics comments (warning: this uses up a lot of memory due to the extensively long comments) """ root_label = ';'.join([g.label for g in root_template]) if root_label in already_done: return already_done[root_label] # Generate the distance 1 pairings which must be averaged for this root template. # The distance 1 template is created by taking the parent node from one or more trees # and creating the combinations with children from a single remaining tree. # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and # (A's children, B). For node (A,B,C), we would retrieve all combinations of (A,B,C's children) # (A,B's children,C) etc... # If a particular node has no children, it is skipped from the children expansion altogether. children_list = [] distance_list = [] for i, parent in enumerate(root_template): # Start with the root template, and replace the ith member with its children if parent.children: children_set = [[group] for group in root_template] children_set[i] = parent.children children_list.extend(get_all_combinations(children_set)) distance_list.extend( [k.nodal_distance for k in parent.children]) if distance_list != []: # average the minimum distance neighbors min_dist = min(distance_list) close_children_list = [ children_list[i] for i in range(len(children_list)) if distance_list[i] == min_dist ] else: close_children_list = [] kinetics_list = [] for template in children_list: label = ';'.join([g.label for g in template]) if label in already_done: kinetics = already_done[label] else: kinetics = self.fill_rules_by_averaging_up( template, already_done, verbose) if template in close_children_list and kinetics is not None: kinetics_list.append([kinetics, template]) # See if we already have a rate rule for this exact template instead # and return it now that we have finished searching its children entry = self.get_rule(root_template) if entry is not None and entry.rank > 0: # We already have a rate rule for this exact template # If the entry has rank of zero, then we have so little faith # in it that we'd rather use an averaged value if possible # Since this entry does not have a rank of zero, we keep its # value already_done[root_label] = entry.data return entry.data if len(kinetics_list) > 0: if len(kinetics_list) > 1: # We found one or more results! Let's average them together kinetics = self._get_average_kinetics( [k for k, t in kinetics_list]) if verbose: kinetics.comment = 'Average of [{0}]'.format(' + '.join( k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kinetics_list)) else: kinetics.comment = 'Average of [{0}]'.format(' + '.join( ';'.join(g.label for g in t) for k, t in kinetics_list)) else: k, t = kinetics_list[0] kinetics = deepcopy(k) # Even though we are using just a single set of kinetics, it's still considered # an average. It just happens that the other distance 1 children had no data. if verbose: kinetics.comment = 'Average of [{0}]'.format( k.comment if k.comment != '' else ';'.join(g.label for g in t)) else: kinetics.comment = 'Average of [{0}]'.format(';'.join( g.label for g in t)) entry = Entry( index=0, label=root_label, item=root_template, data=kinetics, rank=11, # Indicates this is an averaged estimate ) self.entries[entry.label] = [entry] already_done[root_label] = entry.data return entry.data already_done[root_label] = None return None