Python FieldCastMethods.applyMapMissingTo示例，augustus.core.FieldCastMethods.FieldCastMethods.applyMapMissingTo Python示例

示例#1

0

显示文件

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("NormDiscrete")

        dataColumn = dataTable.fields[self["field"]]
        value = dataColumn.fieldType.stringToValue(self["value"])
        data = NP("array",
                  NP(dataColumn.data == value),
                  dtype=self._fieldType.dtype)
        data, mask = FieldCastMethods.applyMapMissingTo(
            self._fieldType, data, dataColumn.mask, self.get("mapMissingTo"))

        performanceTable.end("NormDiscrete")
        return DataColumn(self._fieldType, data, mask)

示例#2

0

显示文件

文件： Apply.py 项目： Huskyeder/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("Apply")
        
        function = functionTable.get(self.get("function"))
        if function is None:
            raise LookupError("Apply references function \"%s\", but it does not exist" % self.get("function"))

        arguments = self.childrenOfClass(PmmlExpression)

        performanceTable.pause("Apply")
        dataColumn = function.evaluate(dataTable, functionTable, performanceTable, arguments)
        performanceTable.unpause("Apply")

        mask = FieldCastMethods.applyInvalidValueTreatment(dataColumn.mask, self.get("invalidValueTreatment"))
        data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, dataColumn.data, mask, self.get("mapMissingTo"))

        performanceTable.end("Apply")
        return DataColumn(dataColumn.fieldType, data, mask)

示例#3

0

显示文件

    def evaluate(self, dataTable, functionTable, performanceTable, text=None):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @type text: string or None
        @param text: If None, use the text of this Formula object; otherwise, use C{text} instead.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        if text is None:
            text = self.text

        performanceTable.begin("Formula parse")
        parsed = Formula.parse(text)
        performanceTable.end("Formula parse")

        performanceTable.begin("Formula evaluate")
        dataColumn = parsed.evaluate(dataTable, functionTable, performanceTable)

        if dataColumn.mask is None:
            return dataColumn

        data = dataColumn.data
        mask = dataColumn.mask
        mask = FieldCastMethods.applyInvalidValueTreatment(mask, self.get("invalidValueTreatment"))
        data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, data, mask, self.get("mapMissingTo"))

        performanceTable.end("Formula evaluate")
        return DataColumn(dataColumn.fieldType, data, mask)

示例#4

0

显示文件

文件： NormContinuous.py 项目： Huskyeder/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("NormContinuous")
        
        dataColumn = dataTable.fields[self["field"]]
        if dataColumn.fieldType.dataType in ("object", "string", "boolean"):
            raise defs.PmmlValidationError("NormContinuous requires a numeric input field, but \"%s\" is" % dataColumn.fieldType.dataType)

        outliers = self.get("outliers")

        linearNorms = self.childrenOfTag("LinearNorm")
        for linearNorm in linearNorms:
            linearNorm.orig = float(linearNorm["orig"])
            linearNorm.norm = float(linearNorm["norm"])

        linearNorms.sort(lambda x, y: cmp(x.orig, y.orig))   # technically, it's invalid if not already sorted

        data = NP("empty", len(dataTable), self._fieldType.dtype)
        mask = dataColumn.mask

        # extrapolate before the first
        selection = NP(dataColumn.data <= linearNorms[0].orig)
        if outliers == "asMissingValues":
            mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask, selection)
        elif outliers == "asExtremeValues":
            data[selection] = linearNorms[0].norm
        else:
            self.transformSelection(linearNorms[0], linearNorms[1], dataColumn.data, data, selection)

        for i in xrange(len(linearNorms) - 1):
            selection = NP(linearNorms[i].orig < dataColumn.data)
            NP("logical_and", selection, NP(dataColumn.data <= linearNorms[i+1].orig), selection)

            self.transformSelection(linearNorms[i], linearNorms[i+1], dataColumn.data, data, selection)

        selection = NP(linearNorms[-1].orig < dataColumn.data)
        if outliers == "asMissingValues":
            mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask, selection)
        elif outliers == "asExtremeValues":
            data[selection] = linearNorms[-1].norm
        else:
            self.transformSelection(linearNorms[-2], linearNorms[-1], dataColumn.data, data, selection)

        data, mask = FieldCastMethods.applyMapMissingTo(self._fieldType, data, mask, self.get("mapMissingTo"))

        performanceTable.end("NormContinuous")
        return DataColumn(self._fieldType, data, mask)

示例#5

0

显示文件

文件： FieldRef.py 项目： Huskyeder/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("FieldRef")

        dataColumn = dataTable.fields[self["field"]]
        data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, dataColumn.data, dataColumn.mask, self.get("mapMissingTo"))

        performanceTable.end("FieldRef")
        return DataColumn(dataColumn.fieldType, data, mask)

示例#6

0

显示文件

文件： CastExpression.py 项目： Huskyeder/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        dataColumn = self.childOfClass(PmmlExpression).evaluate(dataTable, functionTable, performanceTable)
        performanceTable.begin("CastExpression")

        dataColumn = FieldCastMethods.cast(FieldType(self), dataColumn)
        mask = FieldCastMethods.applyInvalidValueTreatment(dataColumn.mask, self.get("invalidValueTreatment"))
        data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, dataColumn.data, mask, self.get("mapMissingTo"))

        performanceTable.end("CastExpression")
        return DataColumn(dataColumn.fieldType, data, mask)

示例#7

0

显示文件

文件： Apply.py 项目： soedjais/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("Apply")

        function = functionTable.get(self.get("function"))
        if function is None:
            raise LookupError(
                "Apply references function \"%s\", but it does not exist" %
                self.get("function"))

        arguments = self.childrenOfClass(PmmlExpression)

        performanceTable.pause("Apply")
        dataColumn = function.evaluate(dataTable, functionTable,
                                       performanceTable, arguments)
        performanceTable.unpause("Apply")

        mask = FieldCastMethods.applyInvalidValueTreatment(
            dataColumn.mask, self.get("invalidValueTreatment"))
        data, mask = FieldCastMethods.applyMapMissingTo(
            dataColumn.fieldType, dataColumn.data, mask,
            self.get("mapMissingTo"))

        performanceTable.end("Apply")
        return DataColumn(dataColumn.fieldType, data, mask)

示例#8

0

显示文件

文件： Discretize.py 项目： pradeep6kumar/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("Discretize")

        dataColumn = dataTable.fields[self["field"]]
        if dataColumn.fieldType.dataType in ("object", "string", "boolean"):
            raise defs.PmmlValidationError("Discretize requires a numeric input field, but \"%s\" is" % dataColumn.fieldType.dataType)

        fieldType = FakeFieldType(self.get("dataType", "string"), self.get("optype", self._optype))
        fieldType._newValuesAllowed = True

        defaultValue = self.get("defaultValue")
        if defaultValue is not None:
            defaultValue = fieldType.stringToValue(defaultValue)

        data = NP("empty", len(dataTable), dtype=fieldType.dtype)
        mask = NP("empty", len(dataTable), dtype=defs.maskType)
        if defaultValue is None:
            mask[:] = defs.MISSING
        else:
            data[:] = defaultValue
            mask[:] = defs.VALID

        for discretizeBin in self.childrenOfTag("DiscretizeBin"):
            try:
                binValue = fieldType.stringToValue(discretizeBin["binValue"])
            except ValueError:
                raise defs.PmmlValidationError("Cannot cast DiscretizeBin binValue \"%s\" as %s %s" % (discretizeBin["binValue"], fieldType.optype, fieldType.dataType))

            fieldType.values.append(FakeFieldValue(value=binValue))

            interval = discretizeBin.childOfTag("Interval")

            closure = interval["closure"]
            leftMargin = interval.get("leftMargin")
            rightMargin = interval.get("rightMargin")
            selection = None

            if leftMargin is not None:
                try:
                    leftMargin = dataColumn.fieldType.stringToValue(leftMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval leftMargin specification: \"%s\"" % leftMargin)

                if closure in ("openClosed", "openOpen"):
                    if selection is None:
                        selection = NP(leftMargin < dataColumn.data)
                    else:
                        NP("logical_and", selection, NP(leftMargin < dataColumn.data), selection)

                elif closure in ("closedOpen", "closedClosed"):
                    if selection is None:
                        selection = NP(leftMargin <= dataColumn.data)
                    else:
                        NP("logical_and", selection, NP(leftMargin <= dataColumn.data), selection)

            if rightMargin is not None:
                try:
                    rightMargin = dataColumn.fieldType.stringToValue(rightMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval rightMargin specification: \"%s\"" % rightMargin)

                if closure in ("openOpen", "closedOpen"):
                    if selection is None:
                        selection = NP(dataColumn.data < rightMargin)
                    else:
                        NP("logical_and", selection, NP(dataColumn.data < rightMargin), selection)

                elif closure in ("openClosed", "closedClosed"):
                    if selection is None:
                        selection = NP(dataColumn.data <= rightMargin)
                    else:
                        NP("logical_and", selection, NP(dataColumn.data <= rightMargin), selection)
                
            if selection is not None:
                NP("logical_and", selection, NP(dataColumn.mask == defs.VALID), selection)
                data[selection] = binValue
                mask[selection] = defs.VALID

        mask[NP(dataColumn.mask == defs.MISSING)] = defs.MISSING
        mask[NP(dataColumn.mask == defs.INVALID)] = defs.INVALID

        data, mask = FieldCastMethods.applyMapMissingTo(fieldType, data, mask, self.get("mapMissingTo"))
        
        performanceTable.end("Discretize")
        return DataColumn(fieldType, data, mask)

示例#9

0

显示文件

文件： MiningField.py 项目： Huskyeder/augustus

    def replaceField(self, dataTable, functionTable, performanceTable):
        """Replace a field in the DataTable for outlier removal,
        missing value handling, and invalid value treatment.

        @type dataTable: DataTable
        @param dataTable: The pre-built DataTable.
        @type functionTable: FunctionTable
        @param functionTable: A table of functions.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        """

        dataColumn = dataTable.fields.get(self.name)
        if dataColumn is None:
            return

        performanceTable.begin("MiningField")

        optype = self.get("optype", dataColumn.fieldType.optype)
        if optype != dataColumn.fieldType.optype:
            dataColumn = FieldCastMethods.cast(FakeFieldType(dataColumn.fieldType.dataType, optype), dataColumn)

        data = dataColumn.data
        mask = dataColumn.mask

        outliers = self.get("outliers")
        
        lowValue = self.get("lowValue")
        if lowValue is not None:
            lowValue = dataColumn.fieldType.stringToValue(lowValue)

            if outliers == "asMissingValues":
                selection = NP(dataColumn.data < lowValue)
                mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask, selection)

            elif outliers == "asExtremeValues":
                selection = NP(dataColumn.data < lowValue)
                if data is dataColumn.data:
                    data = NP("copy", data)
                    data.setflags(write=True)
                    data[selection] = lowValue

        highValue = self.get("highValue")
        if highValue is not None:
            highValue = dataColumn.fieldType.stringToValue(highValue)

            if outliers == "asMissingValues":
                selection = NP(dataColumn.data > highValue)
                mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask, selection)

            elif outliers == "asExtremeValues":
                selection = NP(dataColumn.data > highValue)
                if data is dataColumn.data:
                    data = NP("copy", data)
                    data.setflags(write=True)
                    data[selection] = highValue

        mask = FieldCastMethods.applyInvalidValueTreatment(mask, self.get("invalidValueTreatment"))
        data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, data, mask, self.get("missingValueReplacement"))

        dataTable.fields.replaceField(self.name, DataColumn(dataColumn.fieldType, data, mask))
        performanceTable.end("MiningField")

示例#10

0

显示文件

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("Discretize")

        dataColumn = dataTable.fields[self["field"]]
        if dataColumn.fieldType.dataType in ("object", "string", "boolean"):
            raise defs.PmmlValidationError("Discretize requires a numeric input field, but \"%s\" is" % dataColumn.fieldType.dataType)

        fieldType = FakeFieldType(self.get("dataType", "string"), self.get("optype", self._optype))
        fieldType._newValuesAllowed = True

        defaultValue = self.get("defaultValue")
        if defaultValue is not None:
            defaultValue = fieldType.stringToValue(defaultValue)

        data = NP("empty", len(dataTable), dtype=fieldType.dtype)
        mask = NP("empty", len(dataTable), dtype=defs.maskType)
        if defaultValue is None:
            mask[:] = defs.MISSING
        else:
            data[:] = defaultValue
            mask[:] = defs.VALID

        for discretizeBin in self.childrenOfTag("DiscretizeBin"):
            try:
                binValue = fieldType.stringToValue(discretizeBin["binValue"])
            except ValueError:
                raise defs.PmmlValidationError("Cannot cast DiscretizeBin binValue \"%s\" as %s %s" % (discretizeBin["binValue"], fieldType.optype, fieldType.dataType))

            fieldType.values.append(FakeFieldValue(value=binValue))

            interval = discretizeBin.childOfTag("Interval")

            closure = interval["closure"]
            leftMargin = interval.get("leftMargin")
            rightMargin = interval.get("rightMargin")
            selection = None

            if leftMargin is not None:
                try:
                    leftMargin = dataColumn.fieldType.stringToValue(leftMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval leftMargin specification: \"%s\"" % leftMargin)

                if closure in ("openClosed", "openOpen"):
                    if selection is None:
                        selection = NP(leftMargin < dataColumn.data)
                    else:
                        NP("logical_and", selection, NP(leftMargin < dataColumn.data), selection)

                elif closure in ("closedOpen", "closedClosed"):
                    if selection is None:
                        selection = NP(leftMargin <= dataColumn.data)
                    else:
                        NP("logical_and", selection, NP(leftMargin <= dataColumn.data), selection)

            if rightMargin is not None:
                try:
                    rightMargin = dataColumn.fieldType.stringToValue(rightMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval rightMargin specification: \"%s\"" % rightMargin)

                if closure in ("openOpen", "closedOpen"):
                    if selection is None:
                        selection = NP(dataColumn.data < rightMargin)
                    else:
                        NP("logical_and", selection, NP(dataColumn.data < rightMargin), selection)

                elif closure in ("openClosed", "closedClosed"):
                    if selection is None:
                        selection = NP(dataColumn.data <= rightMargin)
                    else:
                        NP("logical_and", selection, NP(dataColumn.data <= rightMargin), selection)
                
            if selection is not None:
                NP("logical_and", selection, NP(dataColumn.mask == defs.VALID), selection)
                data[selection] = binValue
                mask[selection] = defs.VALID

        mask[NP(dataColumn.mask == defs.MISSING)] = defs.MISSING
        mask[NP(dataColumn.mask == defs.INVALID)] = defs.INVALID

        data, mask = FieldCastMethods.applyMapMissingTo(fieldType, data, mask, self.get("mapMissingTo"))
        
        performanceTable.end("Discretize")
        return DataColumn(fieldType, data, mask)

示例#11

0

显示文件

文件： MiningField.py 项目： soedjais/augustus

    def replaceField(self, dataTable, functionTable, performanceTable):
        """Replace a field in the DataTable for outlier removal,
        missing value handling, and invalid value treatment.

        @type dataTable: DataTable
        @param dataTable: The pre-built DataTable.
        @type functionTable: FunctionTable
        @param functionTable: A table of functions.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        """

        dataColumn = dataTable.fields.get(self.name)
        if dataColumn is None:
            return

        performanceTable.begin("MiningField")

        optype = self.get("optype", dataColumn.fieldType.optype)
        if optype != dataColumn.fieldType.optype:
            dataColumn = FieldCastMethods.cast(
                FakeFieldType(dataColumn.fieldType.dataType, optype),
                dataColumn)

        data = dataColumn.data
        mask = dataColumn.mask

        outliers = self.get("outliers")

        lowValue = self.get("lowValue")
        if lowValue is not None:
            lowValue = dataColumn.fieldType.stringToValue(lowValue)

            if outliers == "asMissingValues":
                selection = NP(dataColumn.data < lowValue)
                mask = FieldCastMethods.outliersAsMissing(
                    mask, dataColumn.mask, selection)

            elif outliers == "asExtremeValues":
                selection = NP(dataColumn.data < lowValue)
                if data is dataColumn.data:
                    data = NP("copy", data)
                    data.setflags(write=True)
                    data[selection] = lowValue

        highValue = self.get("highValue")
        if highValue is not None:
            highValue = dataColumn.fieldType.stringToValue(highValue)

            if outliers == "asMissingValues":
                selection = NP(dataColumn.data > highValue)
                mask = FieldCastMethods.outliersAsMissing(
                    mask, dataColumn.mask, selection)

            elif outliers == "asExtremeValues":
                selection = NP(dataColumn.data > highValue)
                if data is dataColumn.data:
                    data = NP("copy", data)
                    data.setflags(write=True)
                    data[selection] = highValue

        mask = FieldCastMethods.applyInvalidValueTreatment(
            mask, self.get("invalidValueTreatment"))
        data, mask = FieldCastMethods.applyMapMissingTo(
            dataColumn.fieldType, data, mask,
            self.get("missingValueReplacement"))

        dataTable.fields.replaceField(
            self.name, DataColumn(dataColumn.fieldType, data, mask))
        performanceTable.end("MiningField")

示例#12

0

显示文件

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("NormContinuous")

        dataColumn = dataTable.fields[self["field"]]
        if dataColumn.fieldType.dataType in ("object", "string", "boolean"):
            raise defs.PmmlValidationError(
                "NormContinuous requires a numeric input field, but \"%s\" is"
                % dataColumn.fieldType.dataType)

        outliers = self.get("outliers")

        linearNorms = self.childrenOfTag("LinearNorm")
        for linearNorm in linearNorms:
            linearNorm.orig = float(linearNorm["orig"])
            linearNorm.norm = float(linearNorm["norm"])

        linearNorms.sort(lambda x, y: cmp(x.orig, y.orig)
                         )  # technically, it's invalid if not already sorted

        data = NP("empty", len(dataTable), self._fieldType.dtype)
        mask = dataColumn.mask

        # extrapolate before the first
        selection = NP(dataColumn.data <= linearNorms[0].orig)
        if outliers == "asMissingValues":
            mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask,
                                                      selection)
        elif outliers == "asExtremeValues":
            data[selection] = linearNorms[0].norm
        else:
            self.transformSelection(linearNorms[0], linearNorms[1],
                                    dataColumn.data, data, selection)

        for i in xrange(len(linearNorms) - 1):
            selection = NP(linearNorms[i].orig < dataColumn.data)
            NP("logical_and", selection,
               NP(dataColumn.data <= linearNorms[i + 1].orig), selection)

            self.transformSelection(linearNorms[i], linearNorms[i + 1],
                                    dataColumn.data, data, selection)

        selection = NP(linearNorms[-1].orig < dataColumn.data)
        if outliers == "asMissingValues":
            mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask,
                                                      selection)
        elif outliers == "asExtremeValues":
            data[selection] = linearNorms[-1].norm
        else:
            self.transformSelection(linearNorms[-2], linearNorms[-1],
                                    dataColumn.data, data, selection)

        data, mask = FieldCastMethods.applyMapMissingTo(
            self._fieldType, data, mask, self.get("mapMissingTo"))

        performanceTable.end("NormContinuous")
        return DataColumn(self._fieldType, data, mask)

示例#13

0

显示文件

文件： MapValues.py 项目： Huskyeder/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("MapValues")
        
        fieldType = FakeFieldType(self.get("dataType", "string"), self.get("optype", self._optype))
        fieldType._newValuesAllowed = True

        defaultValue = self.get("defaultValue")
        if defaultValue is not None:
            defaultValue = fieldType.stringToValue(defaultValue)

        data = NP("empty", len(dataTable), dtype=fieldType.dtype)
        if defaultValue is not None:
            data[:] = defaultValue

        outputColumn = self["outputColumn"]
        columnNameToField = {}
        for fieldColumnPair in self.childrenOfTag("FieldColumnPair"):
            dataColumn = dataTable.fields[fieldColumnPair["field"]]
            columnNameToField[fieldColumnPair["column"]] = dataColumn

        # cache partial selections because they'll be used over and over in intersecting sets
        dataSelections = {}
        missingSelections = {}
        coverage = NP("zeros", len(dataTable), dtype=NP.dtype(bool))

        for index, row in enumerate(self.childOfClass(TableInterface).iterate()):
            outputValue = row.get(outputColumn)
            if outputValue is None:
                raise defs.PmmlValidationError("MapValues has outputColumn \"%s\" but a column with that name does not appear in row %d of the table" % (outputColumn, index))
            del row[outputColumn]
            outputValue = fieldType.stringToValue(outputValue)

            # this is an intersection of all matching columns
            selection = NP("ones", len(dataTable), dtype=NP.dtype(bool))

            for columnName, columnValueString in row.items():
                dataColumn = columnNameToField.get(columnName)
                if dataColumn is not None:
                    columnValue = dataColumn.fieldType.stringToValue(columnValueString)

                    # one cached data array per column (name, value) pair
                    if (columnName, columnValueString) not in dataSelections:
                        selectData = NP(dataColumn.data == columnValue)
                        if dataColumn.mask is not None:
                            NP("logical_and", selectData, NP(dataColumn.mask == defs.VALID), selectData)
                        dataSelections[columnName, columnValueString] = selectData
                    NP("logical_and", selection, dataSelections[columnName, columnValueString], selection)

                    # one cached mask array per column name ("missing" has only one possible value, though I consider any non-VALID "missing")
                    if columnName not in missingSelections and dataColumn.mask is not None:
                        missingSelections[columnName] = NP(dataColumn.mask != defs.VALID)
                        
            # set the intersection to the output value
            data[selection] = outputValue
            NP("logical_or", coverage, selection, coverage)
        
        missing = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
        for missingSelection in missingSelections.values():
            NP("logical_or", missing, missingSelection, missing)
        coverage -= missing

        mask = missing * defs.MISSING

        data, mask = FieldCastMethods.applyMapMissingTo(fieldType, data, mask, self.get("mapMissingTo"))

        if defaultValue is None:
            NP("logical_not", coverage, coverage)
            if mask is None:
                mask = NP(coverage * defs.MISSING)
            else:
                mask[coverage] = defs.MISSING

        performanceTable.end("MapValues")
        return DataColumn(fieldType, data, mask)

示例#14

0

显示文件

文件： MapValues.py 项目： soedjais/augustus

    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the expression, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: The result of the calculation as a DataColumn.
        """

        performanceTable.begin("MapValues")

        fieldType = FakeFieldType(self.get("dataType", "string"),
                                  self.get("optype", self._optype))
        fieldType._newValuesAllowed = True

        defaultValue = self.get("defaultValue")
        if defaultValue is not None:
            defaultValue = fieldType.stringToValue(defaultValue)

        data = NP("empty", len(dataTable), dtype=fieldType.dtype)
        if defaultValue is not None:
            data[:] = defaultValue

        outputColumn = self["outputColumn"]
        columnNameToField = {}
        for fieldColumnPair in self.childrenOfTag("FieldColumnPair"):
            dataColumn = dataTable.fields[fieldColumnPair["field"]]
            columnNameToField[fieldColumnPair["column"]] = dataColumn

        # cache partial selections because they'll be used over and over in intersecting sets
        dataSelections = {}
        missingSelections = {}
        coverage = NP("zeros", len(dataTable), dtype=NP.dtype(bool))

        for index, row in enumerate(
                self.childOfClass(TableInterface).iterate()):
            outputValue = row.get(outputColumn)
            if outputValue is None:
                raise defs.PmmlValidationError(
                    "MapValues has outputColumn \"%s\" but a column with that name does not appear in row %d of the table"
                    % (outputColumn, index))
            del row[outputColumn]
            outputValue = fieldType.stringToValue(outputValue)

            # this is an intersection of all matching columns
            selection = NP("ones", len(dataTable), dtype=NP.dtype(bool))

            for columnName, columnValueString in row.items():
                dataColumn = columnNameToField.get(columnName)
                if dataColumn is not None:
                    columnValue = dataColumn.fieldType.stringToValue(
                        columnValueString)

                    # one cached data array per column (name, value) pair
                    if (columnName, columnValueString) not in dataSelections:
                        selectData = NP(dataColumn.data == columnValue)
                        if dataColumn.mask is not None:
                            NP("logical_and", selectData,
                               NP(dataColumn.mask == defs.VALID), selectData)
                        dataSelections[columnName,
                                       columnValueString] = selectData
                    NP("logical_and", selection,
                       dataSelections[columnName,
                                      columnValueString], selection)

                    # one cached mask array per column name ("missing" has only one possible value, though I consider any non-VALID "missing")
                    if columnName not in missingSelections and dataColumn.mask is not None:
                        missingSelections[columnName] = NP(
                            dataColumn.mask != defs.VALID)

            # set the intersection to the output value
            data[selection] = outputValue
            NP("logical_or", coverage, selection, coverage)

        missing = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
        for missingSelection in missingSelections.values():
            NP("logical_or", missing, missingSelection, missing)
        coverage -= missing

        mask = missing * defs.MISSING

        data, mask = FieldCastMethods.applyMapMissingTo(
            fieldType, data, mask, self.get("mapMissingTo"))

        if defaultValue is None:
            NP("logical_not", coverage, coverage)
            if mask is None:
                mask = NP(coverage * defs.MISSING)
            else:
                mask[coverage] = defs.MISSING

        performanceTable.end("MapValues")
        return DataColumn(fieldType, data, mask)