def updateChiSquareIndependence(self, syncNumber, get): """Update a baseline model with a chiSquareIndependence testStatistic. This updates a multi-dimensional CountTable with the data, entirely encoded in PMML. """ self.resetLoggerLevels() if self.first: if not self._updateChiSquareIndependence_first(get): return False self.first = False got = {} key = [] for f in self.fields: value = get(f) if value is INVALID or value is MISSING: self.logger.debug( "updateChiSquareIndependence: returning False (INVALID or MISSING data)" ) return False got[f] = value key.append(value) key = tuple(key) depth, node = self._updateChiSquareIndependence_findNode( 0, got, self.countTable) # if we don't have a table entry for this yet, make one for i in xrange(depth, len(self.fields)): f = self.fields[i] if i == len(self.fields) - 1: child = pmml.FieldValueCount(field=f, value=got[f], count=0.) else: child = pmml.FieldValue(field=f, value=got[f]) node.children.append(child) node = child updator = self.updators.get(key, None) if updator is None: updator = self.engine.producerUpdateScheme.updator(SUMX) updator.initialize({SUMX: node.attrib["count"]}) self.updators[key] = updator # for histograms, increment all bins, but only the correct bin gets a non-zero value for otherupdator in self.updators.values(): if updator is not otherupdator: otherupdator.increment(syncNumber, 0.) updator.increment(syncNumber, 1.) node.attrib["count"] = updator.sum() self.total_updator.increment(syncNumber, 1.) self.countTable.attrib["sample"] = self.total_updator.sum() return True
def _updateChiSquareIndependence_newTableFromFields(self, got): self.countTable = pmml.CountTable(sample=0.) dimension = self.countTable for i, f in enumerate(self.fields): if i == len(self.fields) - 1: dimension.children.append(pmml.FieldValueCount(field=f, value=got[f], count=0.)) else: dimension.children.append(pmml.FieldValue(field=f, value=got[f])) dimension = dimension.child(pmml.nonExtension) self.baseline.children = [self.countTable]