示例#1
0
    def mapper(self, dataTable):
        dataTable = dataTable.subTable()  # ensure that the results of this calculation do not get propagated

        self.metadata["ClusteringModel"].calculate(dataTable, performanceTable=self.performanceTable)

        data = dataTable.score.data
        mask = dataTable.score.mask
        stringToValue = dataTable.score.fieldType.stringToValue
        for index, cluster in enumerate(self.clusters):
            clusterName = cluster.get("id", "%d" % (index + 1))
            value = stringToValue(clusterName)

            selection = NP(data == value)
            if mask is not None:
                NP("logical_and", selection, NP(mask == defs.VALID), selection)

            denominator = selection.sum()

            numer = dict((fieldName, 0.0) for fieldName in self.fieldNames)
            denom = dict((fieldName, 0.0) for fieldName in self.fieldNames)

            for fieldName in self.fieldNames:
                numer[fieldName] += dataTable.fields[fieldName].data[selection].sum()
                denom[fieldName] += denominator

            self.emit(clusterName, {"numer": numer, "denom": denom})
示例#2
0
    def pointsToSmoothCurve(xarray, yarray, samples, smoothingScale, loop):
        """Fit a smooth line through a set of given numeric points
        with a characteristic smoothing scale.

        This is a non-parametric locally linear fit, used to plot data
        as a smooth line.

        @type xarray: 1d Numpy array of numbers
        @param xarray: Array of x values.
        @type yarray: 1d Numpy array of numbers
        @param yarray: Array of y values.
        @type samples: 1d Numpy array of numbers
        @param samples: Locations at which to fit the C{xarray} and C{yarray} with best-fit positions and derivatives.
        @type smoothingScale: number
        @param smoothingScale: Standard deviation of the Gaussian kernel used to smooth the locally linear fit.
        @type loop: bool
        @param loop: If False, disconnect the end of the fitted curve from the beginning.
        @rtype: 4-tuple of 1d Numpy arrays
        @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} appropriate for C{formatPathdata}.
        """

        ylist = []
        dylist = []

        for sample in samples:
            weights = NP(
                NP(
                    NP(
                        "exp",
                        NP(
                            NP(-0.5 * NP("power", NP(xarray - sample), 2)) /
                            NP(smoothingScale * smoothingScale))) /
                    smoothingScale) / (math.sqrt(2.0 * math.pi)))
            sum1 = weights.sum()
            sumx = NP(weights * xarray).sum()
            sumxx = NP(weights * NP(xarray * xarray)).sum()
            sumy = NP(weights * yarray).sum()
            sumxy = NP(weights * NP(xarray * yarray)).sum()

            delta = (sum1 * sumxx) - (sumx * sumx)
            intercept = ((sumxx * sumy) - (sumx * sumxy)) / delta
            slope = ((sum1 * sumxy) - (sumx * sumy)) / delta

            ylist.append(intercept + (sample * slope))
            dylist.append(slope)

        xlist = samples
        ylist = NP("array", ylist, dtype=NP.dtype(float))
        dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
        dylist = NP("array", dylist, dtype=NP.dtype(float)) * dxlist
        if not loop:
            dxlist[0] = 0.0
            dxlist[-1] = 0.0
            dylist[0] = 0.0
            dylist[-1] = 0.0

        return xlist, ylist, dxlist, dylist
示例#3
0
    def pointsToSmoothCurve(xarray, yarray, samples, smoothingScale, loop):
        """Fit a smooth line through a set of given numeric points
        with a characteristic smoothing scale.

        This is a non-parametric locally linear fit, used to plot data
        as a smooth line.

        @type xarray: 1d Numpy array of numbers
        @param xarray: Array of x values.
        @type yarray: 1d Numpy array of numbers
        @param yarray: Array of y values.
        @type samples: 1d Numpy array of numbers
        @param samples: Locations at which to fit the C{xarray} and C{yarray} with best-fit positions and derivatives.
        @type smoothingScale: number
        @param smoothingScale: Standard deviation of the Gaussian kernel used to smooth the locally linear fit.
        @type loop: bool
        @param loop: If False, disconnect the end of the fitted curve from the beginning.
        @rtype: 4-tuple of 1d Numpy arrays
        @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} appropriate for C{formatPathdata}.
        """

        ylist = []
        dylist = []

        for sample in samples:
            weights = NP(NP(NP("exp", NP(NP(-0.5 * NP("power", NP(xarray - sample), 2)) / NP(smoothingScale * smoothingScale))) / smoothingScale) / (math.sqrt(2.0*math.pi)))
            sum1 = weights.sum()
            sumx = NP(weights * xarray).sum()
            sumxx = NP(weights * NP(xarray * xarray)).sum()
            sumy = NP(weights * yarray).sum()
            sumxy = NP(weights * NP(xarray * yarray)).sum()

            delta = (sum1 * sumxx) - (sumx * sumx)
            intercept = ((sumxx * sumy) - (sumx * sumxy)) / delta
            slope = ((sum1 * sumxy) - (sumx * sumy)) / delta

            ylist.append(intercept + (sample * slope))
            dylist.append(slope)

        xlist = samples
        ylist = NP("array", ylist, dtype=NP.dtype(float))
        dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
        dylist = NP("array", dylist, dtype=NP.dtype(float)) * dxlist
        if not loop:
            dxlist[0] = 0.0
            dxlist[-1] = 0.0
            dylist[0] = 0.0
            dylist[-1] = 0.0

        return xlist, ylist, dxlist, dylist