def mapper(self, dataTable): dataTable = dataTable.subTable() # ensure that the results of this calculation do not get propagated self.metadata["ClusteringModel"].calculate(dataTable, performanceTable=self.performanceTable) data = dataTable.score.data mask = dataTable.score.mask stringToValue = dataTable.score.fieldType.stringToValue for index, cluster in enumerate(self.clusters): clusterName = cluster.get("id", "%d" % (index + 1)) value = stringToValue(clusterName) selection = NP(data == value) if mask is not None: NP("logical_and", selection, NP(mask == defs.VALID), selection) denominator = selection.sum() numer = dict((fieldName, 0.0) for fieldName in self.fieldNames) denom = dict((fieldName, 0.0) for fieldName in self.fieldNames) for fieldName in self.fieldNames: numer[fieldName] += dataTable.fields[fieldName].data[selection].sum() denom[fieldName] += denominator self.emit(clusterName, {"numer": numer, "denom": denom})
def pointsToSmoothCurve(xarray, yarray, samples, smoothingScale, loop): """Fit a smooth line through a set of given numeric points with a characteristic smoothing scale. This is a non-parametric locally linear fit, used to plot data as a smooth line. @type xarray: 1d Numpy array of numbers @param xarray: Array of x values. @type yarray: 1d Numpy array of numbers @param yarray: Array of y values. @type samples: 1d Numpy array of numbers @param samples: Locations at which to fit the C{xarray} and C{yarray} with best-fit positions and derivatives. @type smoothingScale: number @param smoothingScale: Standard deviation of the Gaussian kernel used to smooth the locally linear fit. @type loop: bool @param loop: If False, disconnect the end of the fitted curve from the beginning. @rtype: 4-tuple of 1d Numpy arrays @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} appropriate for C{formatPathdata}. """ ylist = [] dylist = [] for sample in samples: weights = NP( NP( NP( "exp", NP( NP(-0.5 * NP("power", NP(xarray - sample), 2)) / NP(smoothingScale * smoothingScale))) / smoothingScale) / (math.sqrt(2.0 * math.pi))) sum1 = weights.sum() sumx = NP(weights * xarray).sum() sumxx = NP(weights * NP(xarray * xarray)).sum() sumy = NP(weights * yarray).sum() sumxy = NP(weights * NP(xarray * yarray)).sum() delta = (sum1 * sumxx) - (sumx * sumx) intercept = ((sumxx * sumy) - (sumx * sumxy)) / delta slope = ((sum1 * sumxy) - (sumx * sumy)) / delta ylist.append(intercept + (sample * slope)) dylist.append(slope) xlist = samples ylist = NP("array", ylist, dtype=NP.dtype(float)) dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP("array", dylist, dtype=NP.dtype(float)) * dxlist if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 return xlist, ylist, dxlist, dylist
def pointsToSmoothCurve(xarray, yarray, samples, smoothingScale, loop): """Fit a smooth line through a set of given numeric points with a characteristic smoothing scale. This is a non-parametric locally linear fit, used to plot data as a smooth line. @type xarray: 1d Numpy array of numbers @param xarray: Array of x values. @type yarray: 1d Numpy array of numbers @param yarray: Array of y values. @type samples: 1d Numpy array of numbers @param samples: Locations at which to fit the C{xarray} and C{yarray} with best-fit positions and derivatives. @type smoothingScale: number @param smoothingScale: Standard deviation of the Gaussian kernel used to smooth the locally linear fit. @type loop: bool @param loop: If False, disconnect the end of the fitted curve from the beginning. @rtype: 4-tuple of 1d Numpy arrays @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} appropriate for C{formatPathdata}. """ ylist = [] dylist = [] for sample in samples: weights = NP(NP(NP("exp", NP(NP(-0.5 * NP("power", NP(xarray - sample), 2)) / NP(smoothingScale * smoothingScale))) / smoothingScale) / (math.sqrt(2.0*math.pi))) sum1 = weights.sum() sumx = NP(weights * xarray).sum() sumxx = NP(weights * NP(xarray * xarray)).sum() sumy = NP(weights * yarray).sum() sumxy = NP(weights * NP(xarray * yarray)).sum() delta = (sum1 * sumxx) - (sumx * sumx) intercept = ((sumxx * sumy) - (sumx * sumxy)) / delta slope = ((sum1 * sumxy) - (sumx * sumy)) / delta ylist.append(intercept + (sample * slope)) dylist.append(slope) xlist = samples ylist = NP("array", ylist, dtype=NP.dtype(float)) dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP("array", dylist, dtype=NP.dtype(float)) * dxlist if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 return xlist, ylist, dxlist, dylist