def fillRulesByAveragingUp(self, rootTemplate, alreadyDone, verbose=False): """ Fill in gaps in the kinetics rate rules by averaging child nodes. If verbose is set to True, then exact sources of kinetics are saved in the kinetics comments (warning: this uses up a lot of memory due to the extensively long comments) """ rootLabel = ';'.join([g.label for g in rootTemplate]) if rootLabel in alreadyDone: return alreadyDone[rootLabel] # Generate the distance 1 pairings which must be averaged for this root template. # The distance 1 template is created by taking the parent node from one or more trees # and creating the combinations with children from a single remaining tree. # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and # (A's children, B). For node (A,B,C), we would retrieve all combinations of (A,B,C's children) # (A,B's children,C) etc... # If a particular node has no children, it is skipped from the children expansion altogether. childrenList = [] for i, parent in enumerate(rootTemplate): # Start with the root template, and replace the ith member with its children if parent.children: childrenSet = [[group] for group in rootTemplate] childrenSet[i] = parent.children childrenList.extend(getAllCombinations(childrenSet)) kineticsList = [] for template in childrenList: label = ';'.join([g.label for g in template]) if label in alreadyDone: kinetics = alreadyDone[label] else: kinetics = self.fillRulesByAveragingUp(template, alreadyDone, verbose) if kinetics is not None: kineticsList.append([kinetics, template]) # See if we already have a rate rule for this exact template instead # and return it now that we have finished searching its children entry = self.getRule(rootTemplate) if entry is not None and entry.rank > 0: # We already have a rate rule for this exact template # If the entry has rank of zero, then we have so little faith # in it that we'd rather use an averaged value if possible # Since this entry does not have a rank of zero, we keep its # value alreadyDone[rootLabel] = entry.data return entry.data if len(kineticsList) > 0: if len(kineticsList) > 1: # We found one or more results! Let's average them together kinetics = self.__getAverageKinetics([k for k, t in kineticsList]) if verbose: kinetics.comment = 'Average of [{0}]'.format( ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList)) else: kinetics.comment = 'Average of [{0}]'.format( ' + '.join(';'.join(g.label for g in t) for k, t in kineticsList)) else: k,t = kineticsList[0] kinetics = deepcopy(k) # Even though we are using just a single set of kinetics, it's still considered # an average. It just happens that the other distance 1 children had no data. if verbose: kinetics.comment = 'Average of [{0}]'.format(k.comment if k.comment != '' else ';'.join(g.label for g in t)) else: kinetics.comment = 'Average of [{0}]'.format(';'.join(g.label for g in t)) entry = Entry( index = 0, label = rootLabel, item = rootTemplate, data = kinetics, rank = 10, # Indicates this is an averaged estimate ) self.entries[entry.label] = [entry] alreadyDone[rootLabel] = entry.data return entry.data alreadyDone[rootLabel] = None return None
def fillRulesByAveragingUp(self, rootTemplate, alreadyDone): """ Fill in gaps in the kinetics rate rules by averaging child nodes. """ rootLabel = ';'.join([g.label for g in rootTemplate]) if rootLabel in alreadyDone: return alreadyDone[rootLabel] # See if we already have a rate rule for this exact template entry = self.getRule(rootTemplate) if entry is not None and entry.rank > 0: # We already have a rate rule for this exact template # If the entry has rank of zero, then we have so little faith # in it that we'd rather use an averaged value if possible # Since this entry does not have a rank of zero, we keep its # value alreadyDone[rootLabel] = entry.data return entry.data # Recursively descend to the child nodes childrenList = [[group] for group in rootTemplate] for group in childrenList: parent = group.pop(0) if len(parent.children) > 0: group.extend(parent.children) else: group.append(parent) childrenList = getAllCombinations(childrenList) kineticsList = [] for template in childrenList: label = ';'.join([g.label for g in template]) if template == rootTemplate: continue if label in alreadyDone: kinetics = alreadyDone[label] else: kinetics = self.fillRulesByAveragingUp(template, alreadyDone) if kinetics is not None: kineticsList.append([kinetics, template]) if len(kineticsList) > 0: # We found one or more results! Let's average them together kinetics = self.__getAverageKinetics([k for k, t in kineticsList]) if len(kineticsList) > 1: kinetics.comment += 'Average of ({0})'.format(' + '.join( k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList)) else: k, t = kineticsList[0] kinetics.comment += k.comment if k.comment != '' else ';'.join( g.label for g in t) entry = Entry( index=0, label=rootLabel, item=rootTemplate, data=kinetics, rank=10, # Indicates this is an averaged estimate ) self.entries[entry.label] = [entry] alreadyDone[rootLabel] = entry.data return entry.data alreadyDone[rootLabel] = None return None
def fillRulesByAveragingUp(self, rootTemplate, alreadyDone): """ Fill in gaps in the kinetics rate rules by averaging child nodes. """ rootLabel = ';'.join([g.label for g in rootTemplate]) if rootLabel in alreadyDone: return alreadyDone[rootLabel] # See if we already have a rate rule for this exact template entry = self.getRule(rootTemplate) if entry is not None and entry.rank > 0: # We already have a rate rule for this exact template # If the entry has rank of zero, then we have so little faith # in it that we'd rather use an averaged value if possible # Since this entry does not have a rank of zero, we keep its # value alreadyDone[rootLabel] = entry.data return entry.data # Recursively descend to the child nodes childrenList = [[group] for group in rootTemplate] for group in childrenList: parent = group.pop(0) if len(parent.children) > 0: group.extend(parent.children) else: group.append(parent) childrenList = getAllCombinations(childrenList) kineticsList = [] for template in childrenList: label = ';'.join([g.label for g in template]) if template == rootTemplate: continue if label in alreadyDone: kinetics = alreadyDone[label] else: kinetics = self.fillRulesByAveragingUp(template, alreadyDone) if kinetics is not None: kineticsList.append([kinetics, template]) if len(kineticsList) > 0: # We found one or more results! Let's average them together kinetics = self.__getAverageKinetics([k for k, t in kineticsList]) kinetics.comment += 'Average of ({0}). '.format( ' + '.join([k.comment if k.comment != '' else ','.join([g.label for g in t]) for k, t in kineticsList]), ) entry = Entry( index = 0, label = rootLabel, item = rootTemplate, data = kinetics, rank = 10, # Indicates this is an averaged estimate ) self.entries[entry.label] = [entry] alreadyDone[rootLabel] = entry.data return entry.data alreadyDone[rootLabel] = None return None
def fillRulesByAveragingUp(self, rootTemplate, alreadyDone, verbose=False): """ Fill in gaps in the kinetics rate rules by averaging child nodes. If verbose is set to True, then exact sources of kinetics are saved in the kinetics comments (warning: this uses up a lot of memory due to the extensively long comments) """ rootLabel = ';'.join([g.label for g in rootTemplate]) if rootLabel in alreadyDone: return alreadyDone[rootLabel] # Generate the distance 1 pairings which must be averaged for this root template. # The distance 1 template is created by taking the parent node from one or more trees # and creating the combinations with children from a single remaining tree. # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and # (A's children, B). For node (A,B,C), we would retrieve all combinations of (A,B,C's children) # (A,B's children,C) etc... # If a particular node has no children, it is skipped from the children expansion altogether. childrenList = [] distanceList = [] for i, parent in enumerate(rootTemplate): # Start with the root template, and replace the ith member with its children if parent.children: childrenSet = [[group] for group in rootTemplate] childrenSet[i] = parent.children childrenList.extend(getAllCombinations(childrenSet)) distanceList.extend([k.nodalDistance for k in parent.children]) if distanceList != []: #average the minimum distance neighbors minDist = min(distanceList) closeChildrenList = [childrenList[i] for i in xrange(len(childrenList)) if distanceList[i]==minDist] else: closeChildrenList = [] kineticsList = [] for template in childrenList: label = ';'.join([g.label for g in template]) if label in alreadyDone: kinetics = alreadyDone[label] else: kinetics = self.fillRulesByAveragingUp(template, alreadyDone, verbose) if template in closeChildrenList and kinetics is not None: kineticsList.append([kinetics, template]) # See if we already have a rate rule for this exact template instead # and return it now that we have finished searching its children entry = self.getRule(rootTemplate) if entry is not None and entry.rank > 0: # We already have a rate rule for this exact template # If the entry has rank of zero, then we have so little faith # in it that we'd rather use an averaged value if possible # Since this entry does not have a rank of zero, we keep its # value alreadyDone[rootLabel] = entry.data return entry.data if len(kineticsList) > 0: if len(kineticsList) > 1: # We found one or more results! Let's average them together kinetics = self.__getAverageKinetics([k for k, t in kineticsList]) if verbose: kinetics.comment = 'Average of [{0}]'.format( ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList)) else: kinetics.comment = 'Average of [{0}]'.format( ' + '.join(';'.join(g.label for g in t) for k, t in kineticsList)) else: k,t = kineticsList[0] kinetics = deepcopy(k) # Even though we are using just a single set of kinetics, it's still considered # an average. It just happens that the other distance 1 children had no data. if verbose: kinetics.comment = 'Average of [{0}]'.format(k.comment if k.comment != '' else ';'.join(g.label for g in t)) else: kinetics.comment = 'Average of [{0}]'.format(';'.join(g.label for g in t)) entry = Entry( index = 0, label = rootLabel, item = rootTemplate, data = kinetics, rank = 11, # Indicates this is an averaged estimate ) self.entries[entry.label] = [entry] alreadyDone[rootLabel] = entry.data return entry.data alreadyDone[rootLabel] = None return None
def generateGroupAdditivityValues(self, trainingSet, kunits, method='Arrhenius'): """ Generate the group additivity values using the given `trainingSet`, a list of 2-tuples of the form ``(template, kinetics)``. You must also specify the `kunits` for the family and the `method` to use when generating the group values. Returns ``True`` if the group values have changed significantly since the last time they were fitted, or ``False`` otherwise. """ # keep track of previous values so we can detect if they change old_entries = dict() for label, entry in self.entries.items(): if entry.data is not None: old_entries[label] = entry.data # Determine a complete list of the entries in the database, sorted as in the tree groupEntries = self.top[:] for entry in self.top: groupEntries.extend(self.descendants(entry)) # Determine a unique list of the groups we will be able to fit parameters for groupList = [] for template, kinetics in trainingSet: for group in template: if group not in self.top: groupList.append(group) groupList.extend(self.ancestors(group)[:-1]) groupList = list(set(groupList)) groupList.sort(key=lambda x: x.index) if method == 'KineticsData': # Fit a discrete set of k(T) data points by training against k(T) data Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000]) # Initialize dictionaries of fitted group values and uncertainties groupValues = {} groupUncertainties = {} groupCounts = {} groupComments = {} for entry in groupEntries: groupValues[entry] = [] groupUncertainties[entry] = [] groupCounts[entry] = [] groupComments[entry] = set() # Generate least-squares matrix and vector A = [] b = [] kdata = [] for template, kinetics in trainingSet: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.getRateCoefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata] else: raise Exception( 'Unexpected kinetics model of type {0} for template {1}.' .format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination for groups in combinations: Arow = [1 if group in groups else 0 for group in groupList] Arow.append(1) brow = [math.log10(k) for k in kd] A.append(Arow) b.append(brow) for group in groups: groupComments[group].add("{0!s}".format(template)) if len(A) == 0: logging.warning( 'Unable to fit kinetics groups for family "{0}"; no valid data found.' .format(self.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) for t, T in enumerate(Tdata): # Determine error in each group (on log scale) stdev = numpy.zeros(len(groupList) + 1, numpy.float64) count = numpy.zeros(len(groupList) + 1, numpy.int) for index in range(len(trainingSet)): template, kinetics = trainingSet[index] kd = math.log10(kdata[index, t]) km = x[-1, t] + sum([ x[groupList.index(group), t] for group in template if group in groupList ]) variance = (km - kd)**2 for group in template: groups = [group] groups.extend(self.ancestors(group)) for g in groups: if g not in self.top: ind = groupList.index(g) stdev[ind] += variance count[ind] += 1 stdev[-1] += variance count[-1] += 1 stdev = numpy.sqrt(stdev / (count - 1)) import scipy.stats ci = scipy.stats.t.ppf(0.975, count - 1) * stdev # Update dictionaries of fitted group values and uncertainties for entry in groupEntries: if entry == self.top[0]: groupValues[entry].append(10**x[-1, t]) groupUncertainties[entry].append(10**ci[-1]) groupCounts[entry].append(count[-1]) elif entry in groupList: index = groupList.index(entry) groupValues[entry].append(10**x[index, t]) groupUncertainties[entry].append(10**ci[index]) groupCounts[entry].append(count[index]) else: groupValues[entry] = None groupUncertainties[entry] = None groupCounts[entry] = None # Store the fitted group values and uncertainties on the associated entries for entry in groupEntries: if groupValues[entry] is not None: entry.data = KineticsData(Tdata=(Tdata, "K"), kdata=(groupValues[entry], kunits)) if not any( numpy.isnan(numpy.array( groupUncertainties[entry]))): entry.data.kdata.uncertainties = numpy.array( groupUncertainties[entry]) entry.data.kdata.uncertaintyType = '*|/' entry.shortDesc = "Group additive kinetics." entry.longDesc = "Fitted to {0} rates.\n".format( groupCounts[entry]) entry.longDesc += "\n".join(groupComments[entry]) else: entry.data = None elif method == 'Arrhenius': # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000]) logTdata = numpy.log(Tdata) Tinvdata = 1000. / (constants.R * Tdata) A = [] b = [] kdata = [] for template, kinetics in trainingSet: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.getRateCoefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata] else: raise Exception( 'Unexpected kinetics model of type {0} for template {1}.' .format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination at each temperature for t, T in enumerate(Tdata): logT = logTdata[t] Tinv = Tinvdata[t] for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.extend([1, logT, -Tinv]) else: Arow.extend([0, 0, 0]) Arow.extend([1, logT, -Tinv]) brow = math.log(kd[t]) A.append(Arow) b.append(brow) if len(A) == 0: logging.warning( 'Unable to fit kinetics groups for family "{0}"; no valid data found.' .format(self.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results self.top[0].data = Arrhenius( A=(math.exp(x[-3]), kunits), n=x[-2], Ea=(x[-1], "kJ/mol"), T0=(1, "K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A=(math.exp(x[3 * i]), kunits), n=x[3 * i + 1], Ea=(x[3 * i + 2], "kJ/mol"), T0=(1, "K"), ) elif method == 'Arrhenius2': # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values A = [] b = [] for template, kinetics in trainingSet: # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group] groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each parameter if isinstance(kinetics, Arrhenius) or (isinstance(kinetics, ArrheniusEP) and kinetics.alpha.value_si == 0): for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.append(1) else: Arow.append(0) Arow.append(1) Ea = kinetics.E0.value_si if isinstance( kinetics, ArrheniusEP) else kinetics.Ea.value_si brow = [ math.log(kinetics.A.value_si), kinetics.n.value_si, Ea / 1000. ] A.append(Arow) b.append(brow) if len(A) == 0: logging.warning( 'Unable to fit kinetics groups for family "{0}"; no valid data found.' .format(self.label)) return A = numpy.array(A) b = numpy.array(b) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results self.top[0].data = Arrhenius( A=(math.exp(x[-1, 0]), kunits), n=x[-1, 1], Ea=(x[-1, 2], "kJ/mol"), T0=(1, "K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A=(math.exp(x[i, 0]), kunits), n=x[i, 1], Ea=(x[i, 2], "kJ/mol"), T0=(1, "K"), ) # Add a note to the history of each changed item indicating that we've generated new group values changed = False for label, entry in self.entries.items(): if entry.data is not None and old_entries.has_key(label): if (isinstance(entry.data, KineticsData) and isinstance(old_entries[label], KineticsData) and len(entry.data.kdata.value_si) == len( old_entries[label].kdata.value_si) and all( abs(entry.data.kdata.value_si / old_entries[label].kdata.value_si - 1) < 0.01)): #print "New group values within 1% of old." pass elif (isinstance(entry.data, Arrhenius) and isinstance(old_entries[label], Arrhenius) and abs(entry.data.A.value_si / old_entries[label].A.value_si - 1) < 0.01 and abs(entry.data.n.value_si / old_entries[label].n.value_si - 1) < 0.01 and abs(entry.data.Ea.value_si / old_entries[label].Ea.value_si - 1) < 0.01 and abs(entry.data.T0.value_si / old_entries[label].T0.value_si - 1) < 0.01): #print "New group values within 1% of old." pass else: changed = True break else: changed = True break return changed
def generateGroupAdditivityValues(self, trainingSet, kunits, method='Arrhenius'): """ Generate the group additivity values using the given `trainingSet`, a list of 2-tuples of the form ``(template, kinetics)``. You must also specify the `kunits` for the family and the `method` to use when generating the group values. Returns ``True`` if the group values have changed significantly since the last time they were fitted, or ``False`` otherwise. """ # keep track of previous values so we can detect if they change old_entries = dict() for label,entry in self.entries.items(): if entry.data is not None: old_entries[label] = entry.data # Determine a complete list of the entries in the database, sorted as in the tree groupEntries = self.top[:] for entry in self.top: groupEntries.extend(self.descendants(entry)) # Determine a unique list of the groups we will be able to fit parameters for groupList = [] for template, kinetics in trainingSet: for group in template: if group not in self.top: groupList.append(group) groupList.extend(self.ancestors(group)[:-1]) groupList = list(set(groupList)) groupList.sort(key=lambda x: x.index) if method == 'KineticsData': # Fit a discrete set of k(T) data points by training against k(T) data Tdata = numpy.array([300,400,500,600,800,1000,1500,2000]) # Initialize dictionaries of fitted group values and uncertainties groupValues = {}; groupUncertainties = {}; groupCounts = {}; groupComments = {} for entry in groupEntries: groupValues[entry] = [] groupUncertainties[entry] = [] groupCounts[entry] = [] groupComments[entry] = set() # Generate least-squares matrix and vector A = []; b = [] kdata = [] for template, kinetics in trainingSet: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.getRateCoefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata] else: raise Exception('Unexpected kinetics model of type {0} for template {1}.'.format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group]; groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination for groups in combinations: Arow = [1 if group in groups else 0 for group in groupList] Arow.append(1) brow = [math.log10(k) for k in kd] A.append(Arow); b.append(brow) for group in groups: groupComments[group].add("{0!s}".format(template)) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(self.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) for t, T in enumerate(Tdata): # Determine error in each group (on log scale) stdev = numpy.zeros(len(groupList)+1, numpy.float64) count = numpy.zeros(len(groupList)+1, numpy.int) for index in range(len(trainingSet)): template, kinetics = trainingSet[index] kd = math.log10(kdata[index,t]) km = x[-1,t] + sum([x[groupList.index(group),t] for group in template if group in groupList]) variance = (km - kd)**2 for group in template: groups = [group]; groups.extend(self.ancestors(group)) for g in groups: if g not in self.top: ind = groupList.index(g) stdev[ind] += variance count[ind] += 1 stdev[-1] += variance count[-1] += 1 stdev = numpy.sqrt(stdev / (count - 1)) import scipy.stats ci = scipy.stats.t.ppf(0.975, count - 1) * stdev # Update dictionaries of fitted group values and uncertainties for entry in groupEntries: if entry == self.top[0]: groupValues[entry].append(10**x[-1,t]) groupUncertainties[entry].append(10**ci[-1]) groupCounts[entry].append(count[-1]) elif entry in groupList: index = groupList.index(entry) groupValues[entry].append(10**x[index,t]) groupUncertainties[entry].append(10**ci[index]) groupCounts[entry].append(count[index]) else: groupValues[entry] = None groupUncertainties[entry] = None groupCounts[entry] = None # Store the fitted group values and uncertainties on the associated entries for entry in groupEntries: if groupValues[entry] is not None: entry.data = KineticsData(Tdata=(Tdata,"K"), kdata=(groupValues[entry],kunits)) if not any(numpy.isnan(numpy.array(groupUncertainties[entry]))): entry.data.kdata.uncertainties = numpy.array(groupUncertainties[entry]) entry.data.kdata.uncertaintyType = '*|/' entry.shortDesc = "Group additive kinetics." entry.longDesc = "Fitted to {0} rates.\n".format(groupCounts[entry]) entry.longDesc += "\n".join(groupComments[entry]) else: entry.data = None elif method == 'Arrhenius': # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data Tdata = numpy.array([300,400,500,600,800,1000,1500,2000]) logTdata = numpy.log(Tdata) Tinvdata = 1000. / (constants.R * Tdata) A = []; b = [] kdata = [] for template, kinetics in trainingSet: if isinstance(kinetics, (Arrhenius, KineticsData)): kd = [kinetics.getRateCoefficient(T) for T in Tdata] elif isinstance(kinetics, ArrheniusEP): kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata] else: raise Exception('Unexpected kinetics model of type {0} for template {1}.'.format(kinetics.__class__, template)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group]; groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination at each temperature for t, T in enumerate(Tdata): logT = logTdata[t] Tinv = Tinvdata[t] for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.extend([1,logT,-Tinv]) else: Arow.extend([0,0,0]) Arow.extend([1,logT,-Tinv]) brow = math.log(kd[t]) A.append(Arow); b.append(brow) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(self.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results self.top[0].data = Arrhenius( A = (math.exp(x[-3]),kunits), n = x[-2], Ea = (x[-1],"kJ/mol"), T0 = (1,"K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A = (math.exp(x[3*i]),kunits), n = x[3*i+1], Ea = (x[3*i+2],"kJ/mol"), T0 = (1,"K"), ) elif method == 'Arrhenius2': # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values A = []; b = [] for template, kinetics in trainingSet: # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group]; groups.extend(self.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each parameter if isinstance(kinetics, Arrhenius) or (isinstance(kinetics, ArrheniusEP) and kinetics.alpha.value_si == 0): for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.append(1) else: Arow.append(0) Arow.append(1) Ea = kinetics.E0.value_si if isinstance(kinetics, ArrheniusEP) else kinetics.Ea.value_si brow = [math.log(kinetics.A.value_si), kinetics.n.value_si, Ea / 1000.] A.append(Arow); b.append(brow) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(self.label)) return A = numpy.array(A) b = numpy.array(b) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results self.top[0].data = Arrhenius( A = (math.exp(x[-1,0]),kunits), n = x[-1,1], Ea = (x[-1,2],"kJ/mol"), T0 = (1,"K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A = (math.exp(x[i,0]),kunits), n = x[i,1], Ea = (x[i,2],"kJ/mol"), T0 = (1,"K"), ) # Add a note to the history of each changed item indicating that we've generated new group values changed = False for label, entry in self.entries.items(): if entry.data is not None and old_entries.has_key(label): if (isinstance(entry.data, KineticsData) and isinstance(old_entries[label], KineticsData) and len(entry.data.kdata.value_si) == len(old_entries[label].kdata.value_si) and all(abs(entry.data.kdata.value_si / old_entries[label].kdata.value_si - 1) < 0.01)): #print "New group values within 1% of old." pass elif (isinstance(entry.data, Arrhenius) and isinstance(old_entries[label], Arrhenius) and abs(entry.data.A.value_si / old_entries[label].A.value_si - 1) < 0.01 and abs(entry.data.n.value_si / old_entries[label].n.value_si - 1) < 0.01 and abs(entry.data.Ea.value_si / old_entries[label].Ea.value_si - 1) < 0.01 and abs(entry.data.T0.value_si / old_entries[label].T0.value_si - 1) < 0.01): #print "New group values within 1% of old." pass else: changed = True break else: changed = True break return changed
def generateKineticsGroupValues(family, database, trainingSetLabels, method): """ Evaluate the kinetics group additivity values for the given reaction `family` using the specified lists of depository components `trainingSetLabels` as the training set. The already-loaded RMG database should be given as the `database` parameter. """ kunits = getRateCoefficientUnits(family) print 'Categorizing reactions in training sets for {0}'.format(family.label) trainingSets = createDataSet(trainingSetLabels, family, database) trainingSet = [] for label, data in trainingSets: trainingSet.extend(data) #reactions = [reaction for label, trainingSet in trainingSets for reaction, template, entry in trainingSet] #templates = [template for label, trainingSet in trainingSets for reaction, template, entry in trainingSet] #entries = [entry for label, trainingSet in trainingSets for reaction, template, entry in trainingSet] print 'Fitting new group additivity values for {0}...'.format(family.label) # keep track of previous values so we can detect if they change old_entries = dict() for label,entry in family.groups.entries.iteritems(): if entry.data is not None: old_entries[label] = entry.data # Determine a complete list of the entries in the database, sorted as in the tree groupEntries = family.groups.top[:] for entry in family.groups.top: groupEntries.extend(family.groups.descendants(entry)) # Determine a unique list of the groups we will be able to fit parameters for groupList = [] for reaction, template, entry in trainingSet: for group in template: if group not in family.groups.top: groupList.append(group) groupList.extend(family.groups.ancestors(group)[:-1]) groupList = list(set(groupList)) groupList.sort(key=lambda x: x.index) if method == 'KineticsData': # Fit a discrete set of k(T) data points by training against k(T) data Tdata = [300,400,500,600,800,1000,1500,2000] #kmodel = numpy.zeros_like(kdata) # Initialize dictionaries of fitted group values and uncertainties groupValues = {}; groupUncertainties = {}; groupCounts = {}; groupComments = {} for entry in groupEntries: groupValues[entry] = [] groupUncertainties[entry] = [] groupCounts[entry] = [] groupComments[entry] = set() # Generate least-squares matrix and vector A = []; b = [] kdata = [] for reaction, template, entry in trainingSet: if isinstance(reaction.kinetics, Arrhenius) or isinstance(reaction.kinetics, KineticsData): kd = [reaction.kinetics.getRateCoefficient(T) / reaction.degeneracy for T in Tdata] elif isinstance(reaction.kinetics, ArrheniusEP): kd = [reaction.kinetics.getRateCoefficient(T, 0) / reaction.degeneracy for T in Tdata] else: raise Exception('Unexpected kinetics model of type {0} for reaction {1}.'.format(reaction.kinetics.__class__, reaction)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group]; groups.extend(family.groups.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination for groups in combinations: Arow = [1 if group in groups else 0 for group in groupList] Arow.append(1) brow = [math.log10(k) for k in kd] A.append(Arow); b.append(brow) for group in groups: groupComments[group].add("{0!s}".format(template)) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(family.groups.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) for t, T in enumerate(Tdata): # Determine error in each group (on log scale) stdev = numpy.zeros(len(groupList)+1, numpy.float64) count = numpy.zeros(len(groupList)+1, numpy.int) for index in range(len(trainingSet)): reaction, template, entry = trainingSet[index] kd = math.log10(kdata[index,t]) km = x[-1,t] + sum([x[groupList.index(group),t] for group in template if group in groupList]) variance = (km - kd)**2 for group in template: groups = [group]; groups.extend(family.groups.ancestors(group)) for g in groups: if g not in family.groups.top: ind = groupList.index(g) stdev[ind] += variance count[ind] += 1 stdev[-1] += variance count[-1] += 1 stdev = numpy.sqrt(stdev / (count - 1)) ci = scipy.stats.t.ppf(0.975, count - 1) * stdev # Update dictionaries of fitted group values and uncertainties for entry in groupEntries: if entry == family.groups.top[0]: groupValues[entry].append(10**x[-1,t]) groupUncertainties[entry].append(10**ci[-1]) groupCounts[entry].append(count[-1]) elif entry in groupList: index = groupList.index(entry) groupValues[entry].append(10**x[index,t]) groupUncertainties[entry].append(10**ci[index]) groupCounts[entry].append(count[index]) else: groupValues[entry] = None groupUncertainties[entry] = None groupCounts[entry] = None # Store the fitted group values and uncertainties on the associated entries for entry in groupEntries: if groupValues[entry] is not None: entry.data = KineticsData(Tdata=(Tdata,"K"), kdata=(groupValues[entry],kunits)) if not any(numpy.isnan(numpy.array(groupUncertainties[entry]))): entry.data.kdata.uncertainties = numpy.array(groupUncertainties[entry]) entry.data.kdata.uncertaintyType = '*|/' entry.shortDesc = "Group additive kinetics." entry.longDesc = "Fitted to {0} rates.\n".format(groupCounts[entry]) entry.longDesc += "\n".join(groupComments[entry]) else: entry.data = None # Print the group values print '=============================== =========== =========== =========== =======' print 'Group T (K) k(T) (SI) CI (95%) Count' print '=============================== =========== =========== =========== =======' entry = family.groups.top[0] for i in range(len(entry.data.Tdata.values)): label = ', '.join(['%s' % (top.label) for top in family.groups.top]) if i == 0 else '' T = Tdata[i] value = groupValues[entry][i] uncertainty = groupUncertainties[entry][i] count = groupCounts[entry][i] print '%-31s %-11g %-11.4e %-11.4e %-7i' % (label, T, value, uncertainty, count) print '------------------------------- ----------- ----------- ----------- -------' for entry in groupEntries: if entry.data is not None: for i in range(len(entry.data.Tdata.values)): label = entry.label if i == 0 else '' T = Tdata[i] value = groupValues[entry][i] uncertainty = groupUncertainties[entry][i] count = groupCounts[entry][i] print '%-31s %-11g %-11.4e %-11.4e %-7i' % (label, T, value, uncertainty, count) print '=============================== =========== =========== =========== =======' elif method == 'Arrhenius': # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data Tdata = [300,400,500,600,800,1000,1500,2000] A = []; b = [] kdata = [] for reaction, template, entry in trainingSet: if isinstance(reaction.kinetics, Arrhenius) or isinstance(reaction.kinetics, KineticsData): kd = [reaction.kinetics.getRateCoefficient(T) / reaction.degeneracy for T in Tdata] elif isinstance(reaction.kinetics, ArrheniusEP): kd = [reaction.kinetics.getRateCoefficient(T, 0) / reaction.degeneracy for T in Tdata] else: raise Exception('Unexpected kinetics model of type {0} for reaction {1}.'.format(reaction.kinetics.__class__, reaction)) kdata.append(kd) # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group]; groups.extend(family.groups.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each combination at each temperature for t, T in enumerate(Tdata): logT = math.log(T) Tinv = 1000.0 / (constants.R * T) for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.extend([1,logT,-Tinv]) else: Arow.extend([0,0,0]) Arow.extend([1,logT,-Tinv]) brow = math.log(kd[t]) A.append(Arow); b.append(brow) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(family.groups.label)) return A = numpy.array(A) b = numpy.array(b) kdata = numpy.array(kdata) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results family.groups.top[0].data = Arrhenius( A = (math.exp(x[-3]),kunits), n = x[-2], Ea = (x[-1]*1000.,"J/mol"), T0 = (1,"K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A = (math.exp(x[3*i]),kunits), n = x[3*i+1], Ea = (x[3*i+2]*1000.,"J/mol"), T0 = (1,"K"), ) # Print the results print '======================================= =========== =========== ===========' print 'Group log A (SI) n Ea (kJ/mol) ' print '======================================= =========== =========== ===========' entry = family.groups.top[0] label = ', '.join(['%s' % (top.label) for top in family.groups.top]) logA = math.log10(entry.data.A.value) n = entry.data.n.value Ea = entry.data.Ea.value / 1000. print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea) print '--------------------------------------- ----------- ----------- -----------' for i, group in enumerate(groupList): label = group.label logA = math.log10(group.data.A.value) n = group.data.n.value Ea = group.data.Ea.value / 1000. print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea) print '======================================= =========== =========== ===========' elif method == 'Arrhenius2': # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values A = []; b = [] for reaction, template, entry in trainingSet: # Create every combination of each group and its ancestors with each other combinations = [] for group in template: groups = [group]; groups.extend(family.groups.ancestors(group)) combinations.append(groups) combinations = getAllCombinations(combinations) # Add a row to the matrix for each parameter if isinstance(entry.data, Arrhenius) or (isinstance(entry.data, ArrheniusEP) and entry.data.alpha.value == 0): for groups in combinations: Arow = [] for group in groupList: if group in groups: Arow.append(1) else: Arow.append(0) Arow.append(1) Ea = entry.data.E0.value if isinstance(entry.data, ArrheniusEP) else entry.data.Ea.value brow = [math.log(entry.data.A.value), entry.data.n.value, Ea / 1000.] A.append(Arow); b.append(brow) if len(A) == 0: logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(family.groups.label)) return A = numpy.array(A) b = numpy.array(b) x, residues, rank, s = numpy.linalg.lstsq(A, b) # Store the results family.groups.top[0].data = Arrhenius( A = (math.exp(x[-1,0]),kunits), n = x[-1,1], Ea = (x[-1,2]*1000.,"J/mol"), T0 = (1,"K"), ) for i, group in enumerate(groupList): group.data = Arrhenius( A = (math.exp(x[i,0]),kunits), n = x[i,1], Ea = (x[i,2]*1000.,"J/mol"), T0 = (1,"K"), ) # Print the results print '======================================= =========== =========== ===========' print 'Group log A (SI) n Ea (kJ/mol) ' print '======================================= =========== =========== ===========' entry = family.groups.top[0] label = ', '.join(['%s' % (top.label) for top in family.groups.top]) logA = math.log10(entry.data.A.value) n = entry.data.n.value Ea = entry.data.Ea.value / 1000. print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea) print '--------------------------------------- ----------- ----------- -----------' for i, group in enumerate(groupList): label = group.label logA = math.log10(group.data.A.value) n = group.data.n.value Ea = group.data.Ea.value / 1000. print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea) print '======================================= =========== =========== ===========' # Add a note to the history of each changed item indicating that we've generated new group values changed = False event = [time.asctime(),user,'action','Generated new group additivity values for this entry.'] for label, entry in family.groups.entries.iteritems(): if entry.data is not None and old_entries.has_key(label): if (isinstance(entry.data, KineticsData) and isinstance(old_entries[label], KineticsData) and len(entry.data.kdata.values) == len(old_entries[label].kdata.values) and all(abs(entry.data.kdata.values / old_entries[label].kdata.values - 1) < 0.01)): #print "New group values within 1% of old." pass elif (isinstance(entry.data, Arrhenius) and isinstance(old_entries[label], Arrhenius) and abs(entry.data.A.value / old_entries[label].A.value - 1) < 0.01 and abs(entry.data.n.value / old_entries[label].n.value - 1) < 0.01 and abs(entry.data.Ea.value / old_entries[label].Ea.value - 1) < 0.01 and abs(entry.data.T0.value / old_entries[label].T0.value - 1) < 0.01): #print "New group values within 1% of old." pass else: changed = True entry.history.append(event) return changed