def testClusters(self, x): p = self.model.predict(x) print 'Number of clusters: ', len(self.clusters) for i in xrange(len(self.clusters)): print core.clusterIntersectionRatio(x, self.clusters[i]) C = self.clusters[p] print 'This should be better:' print core.clusterIntersectionRatio(x, C) print 'This should be even better:' D = self.buildCluster(x) print core.clusterIntersectionRatio(x, D)
def test(data, recommender, fractionTrain=.8, highFactor=.1, verbose=False): """ Parameters: cluster: an array of arrays fractionTrain: a float specifying percent of data to use as train highFactor: ratio of any given element to the largest element in the array Returns: the RMSE produced by removing a specific color, then adding it back Algorithm: Works by first finding the max value in the histogram and then trying to find the index of the histogram that contains the value that is closest (in terms of a ration with the max value) to highFactor. Removes this color, then passes it to recommender to get back a value which it then adds to the histogramogram, then takes the rmse between the original and the modified """ xTrain, xTest = ml.splitData(data, fractionTrain) n = xTest.shape[0] m = xTrain.shape[0] train_colors, _, train_histograms = removeColors(xTrain, highFactor=highFactor) recommender.fit(train_histograms, train_colors) if verbose: print 'Done fitting' colors, quantities, histograms = removeColors(xTest, highFactor=highFactor) assert(colors.shape[0] == n) assert(histograms.shape[0] == n) numCorrect = 0 D = histograms.shape[1] count = np.zeros(D) for color in colors: count[color] += 1 tmp = count[np.where(count > 0)] color_mean = np.mean(tmp) color_stdev = np.std(tmp) print 'Color mean and stdev', color_mean, color_stdev recommendedColors = np.zeros((n)) ignored = 0 intersectionRatio = 0. for i in xrange(n): if i % 100 == 1: print 'Partial %d: %f' % (i, float(numCorrect) / (i - ignored + 1)) color, amount = colors[i], quantities[i] # Ignore colors that might bias us if count[color] > color_mean + color_stdev: ignored += 1 continue hist = histograms[i] # Ignore colors that are basically the background if hist[color] > 0.4: ignored += 1 continue if verbose: print 'Testing site %s' % names[i] print 'Amount remmoved %d' % amount try: cluster = recommender.cluster(hist) intersectionRatio += core.clusterIntersectionRatio(hist, cluster) except: pass #recommender.testClusters(hist) recommendedColor = recommender.predict(hist) r1, g1, b1 = image.binToRGB(color) r2, g2, b2 = image.binToRGB(recommendedColor) if verbose: print 'Removed color %d %d %d. Recommended color %d %d %d.' % (r1, g1, b1, r2, g2, b2) print 'Color distance: %d' % (image.binDistance(recommendedColor, color)) recommendedColors[i] = recommendedColor if verbose: print 'Recommended color %d' % (recommendedColor) if recommendedColor == color: numCorrect += 1 print 'Ignored: %d. Used: %d' % (ignored, n - ignored) print 'Mean cluster intersection ratio: %f' % (intersectionRatio / (n - ignored)) print colorError(colors, recommendedColors) percentCorrect = float(numCorrect)/(n - ignored) return percentCorrect