def _selectMax(self, dataTable, functionTable, performanceTable, segmentation): """Used by C{calculateScore}.""" performanceTable.begin("Segmentation max") scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object)) filled = NP("zeros", len(dataTable), dtype=NP.dtype(bool)) unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool)) newOutputData = [] for segment in segmentation.childrenOfTag("Segment", iterator=True): performanceTable.pause("Segmentation max") selection = segment.childOfClass(PmmlPredicate).evaluate( dataTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if not selection.any(): continue subTable = dataTable.subTable(selection) subModel = segment.childOfClass(PmmlModel) performanceTable.pause("Segmentation max") subModel.calculate(subTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if subTable.score.fieldType.dataType in ("string", "boolean", "object"): raise defs.PmmlValidationError( "Segmentation with multipleModelMethod=\"max\" cannot be applied to models that produce dataType \"%s\"" % subTable.score.fieldType.dataType) # ignore invalid in matches (like the built-in "min" Apply function) if subTable.score.mask is not None: NP("logical_and", selection, NP(subTable.score.mask == defs.VALID), selection) selectionFilled = NP("logical_and", selection, filled) selectionUnfilled = NP("logical_and", selection, unfilled) filled_selection = filled[selection] unfilled_selection = unfilled[selection] left, right = subTable.score.data[filled_selection], scoresData[ selectionFilled] condition = NP(left > right) scoresData[selectionFilled] = NP("where", condition, left, right) scoresData[selectionUnfilled] = subTable.score.data[ unfilled_selection] for fieldName, dataColumn in subTable.output.items(): if fieldName not in dataTable.output: data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype) data[selectionUnfilled] = dataColumn.data mask = NP( NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING) if dataColumn.mask is None: mask[selectionUnfilled] = defs.VALID else: mask[selectionUnfilled] = dataColumn.mask newDataColumn = DataColumn(dataColumn.fieldType, data, mask) newDataColumn._unlock() dataTable.output[fieldName] = newDataColumn newOutputData.append(newDataColumn) else: newDataColumn = dataTable.output[fieldName] newDataColumn.data[selectionFilled] = NP( "where", condition, dataColumn.data[filled_selection], newDataColumn.data[selectionFilled]) newDataColumn.data[selectionUnfilled] = dataColumn.data[ unfilled_selection] if dataColumn.mask is None: newDataColumn.mask[selectionUnfilled] = defs.VALID else: newDataColumn.mask[selectionUnfilled] = dataColumn.mask filled += selectionUnfilled unfilled -= selectionUnfilled for newDataColumn in newOutputData: if not newDataColumn.mask.any(): newDataColumn._mask = None newDataColumn._lock() if filled.all(): scoresMask = None else: scoresMask = NP(NP("logical_not", filled) * defs.MISSING) scores = DataColumn(self.scoreType, scoresData, scoresMask) performanceTable.end("Segmentation max") return {None: scores}
def _selectMax(self, dataTable, functionTable, performanceTable, segmentation): """Used by C{calculateScore}.""" performanceTable.begin("Segmentation max") scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object)) filled = NP("zeros", len(dataTable), dtype=NP.dtype(bool)) unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool)) newOutputData = [] for segment in segmentation.childrenOfTag("Segment", iterator=True): performanceTable.pause("Segmentation max") selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if not selection.any(): continue subTable = dataTable.subTable(selection) subModel = segment.childOfClass(PmmlModel) performanceTable.pause("Segmentation max") subModel.calculate(subTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if subTable.score.fieldType.dataType in ("string", "boolean", "object"): raise defs.PmmlValidationError("Segmentation with multipleModelMethod=\"max\" cannot be applied to models that produce dataType \"%s\"" % subTable.score.fieldType.dataType) # ignore invalid in matches (like the built-in "min" Apply function) if subTable.score.mask is not None: NP("logical_and", selection, NP(subTable.score.mask == defs.VALID), selection) selectionFilled = NP("logical_and", selection, filled) selectionUnfilled = NP("logical_and", selection, unfilled) filled_selection = filled[selection] unfilled_selection = unfilled[selection] left, right = subTable.score.data[filled_selection], scoresData[selectionFilled] condition = NP(left > right) scoresData[selectionFilled] = NP("where", condition, left, right) scoresData[selectionUnfilled] = subTable.score.data[unfilled_selection] for fieldName, dataColumn in subTable.output.items(): if fieldName not in dataTable.output: data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype) data[selectionUnfilled] = dataColumn.data mask = NP(NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING) if dataColumn.mask is None: mask[selectionUnfilled] = defs.VALID else: mask[selectionUnfilled] = dataColumn.mask newDataColumn = DataColumn(dataColumn.fieldType, data, mask) newDataColumn._unlock() dataTable.output[fieldName] = newDataColumn newOutputData.append(newDataColumn) else: newDataColumn = dataTable.output[fieldName] newDataColumn.data[selectionFilled] = NP("where", condition, dataColumn.data[filled_selection], newDataColumn.data[selectionFilled]) newDataColumn.data[selectionUnfilled] = dataColumn.data[unfilled_selection] if dataColumn.mask is None: newDataColumn.mask[selectionUnfilled] = defs.VALID else: newDataColumn.mask[selectionUnfilled] = dataColumn.mask filled += selectionUnfilled unfilled -= selectionUnfilled for newDataColumn in newOutputData: if not newDataColumn.mask.any(): newDataColumn._mask = None newDataColumn._lock() if filled.all(): scoresMask = None else: scoresMask = NP(NP("logical_not", filled) * defs.MISSING) scores = DataColumn(self.scoreType, scoresData, scoresMask) performanceTable.end("Segmentation max") return {None: scores}
def _selectFirst(self, dataTable, functionTable, performanceTable, segmentation): """Used by C{calculateScore}.""" performanceTable.begin("Segmentation selectFirst") scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object)) scoresMask = NP("zeros", len(dataTable), dtype=defs.maskType) unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool)) segments = NP("empty", len(dataTable), dtype=NP.dtype(object)) newOutputData = [] for segment in segmentation.childrenOfTag("Segment", iterator=True): performanceTable.pause("Segmentation selectFirst") selection = segment.childOfClass(PmmlPredicate).evaluate( dataTable, functionTable, performanceTable) performanceTable.unpause("Segmentation selectFirst") NP("logical_and", selection, unfilled, selection) if not selection.any(): continue subTable = dataTable.subTable(selection) subModel = segment.childOfClass(PmmlModel) performanceTable.pause("Segmentation selectFirst") subModel.calculate(subTable, functionTable, performanceTable) performanceTable.unpause("Segmentation selectFirst") scoresData[selection] = subTable.score.data if subTable.score.mask is not None: scoresMask[selection] = subTable.score.mask else: scoresMask[selection] = defs.VALID segmentName = segment.get("id") if segmentName is not None: segments[selection] = segmentName for fieldName, dataColumn in subTable.output.items(): if fieldName not in dataTable.output: data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype) data[selection] = dataColumn.data mask = NP( NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING) if dataColumn.mask is None: mask[selection] = defs.VALID else: mask[selection] = dataColumn.mask newDataColumn = DataColumn(dataColumn.fieldType, data, mask) newDataColumn._unlock() dataTable.output[fieldName] = newDataColumn newOutputData.append(newDataColumn) else: newDataColumn = dataTable.output[fieldName] newDataColumn.data[selection] = dataColumn.data if dataColumn.mask is None: newDataColumn.mask[selection] = defs.VALID else: newDataColumn.mask[selection] = dataColumn.mask unfilled -= selection if not unfilled.any(): break for newDataColumn in newOutputData: if not newDataColumn.mask.any(): newDataColumn._mask = None newDataColumn._lock() if not scoresMask.any(): scoresMask = None scores = DataColumn(self.scoreType, scoresData, scoresMask) if self.name is None: performanceTable.end("Segmentation selectFirst") return {None: scores} else: performanceTable.end("Segmentation selectFirst") return { None: scores, "segment": DataColumn(self.scoreTypeSegment, segments, None) }
def _selectFirst(self, dataTable, functionTable, performanceTable, segmentation): """Used by C{calculateScore}.""" performanceTable.begin("Segmentation selectFirst") scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object)) scoresMask = NP("zeros", len(dataTable), dtype=defs.maskType) unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool)) segments = NP("empty", len(dataTable), dtype=NP.dtype(object)) newOutputData = [] for segment in segmentation.childrenOfTag("Segment", iterator=True): performanceTable.pause("Segmentation selectFirst") selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("Segmentation selectFirst") NP("logical_and", selection, unfilled, selection) if not selection.any(): continue subTable = dataTable.subTable(selection) subModel = segment.childOfClass(PmmlModel) performanceTable.pause("Segmentation selectFirst") subModel.calculate(subTable, functionTable, performanceTable) performanceTable.unpause("Segmentation selectFirst") scoresData[selection] = subTable.score.data if subTable.score.mask is not None: scoresMask[selection] = subTable.score.mask else: scoresMask[selection] = defs.VALID segmentName = segment.get("id") if segmentName is not None: segments[selection] = segmentName for fieldName, dataColumn in subTable.output.items(): if fieldName not in dataTable.output: data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype) data[selection] = dataColumn.data mask = NP(NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING) if dataColumn.mask is None: mask[selection] = defs.VALID else: mask[selection] = dataColumn.mask newDataColumn = DataColumn(dataColumn.fieldType, data, mask) newDataColumn._unlock() dataTable.output[fieldName] = newDataColumn newOutputData.append(newDataColumn) else: newDataColumn = dataTable.output[fieldName] newDataColumn.data[selection] = dataColumn.data if dataColumn.mask is None: newDataColumn.mask[selection] = defs.VALID else: newDataColumn.mask[selection] = dataColumn.mask unfilled -= selection if not unfilled.any(): break for newDataColumn in newOutputData: if not newDataColumn.mask.any(): newDataColumn._mask = None newDataColumn._lock() if not scoresMask.any(): scoresMask = None scores = DataColumn(self.scoreType, scoresData, scoresMask) if self.name is None: performanceTable.end("Segmentation selectFirst") return {None: scores} else: performanceTable.end("Segmentation selectFirst") return {None: scores, "segment": DataColumn(self.scoreTypeSegment, segments, None)}