def show(data, recommender, fractionTrain=0.8, highFactor=0.1, verbose=False, plot=False): """ Parameters: cluster: an array of arrays fractionTrain: a float specifying percent of data to use as train highFactor: ratio of any given element to the largest element in the array Returns: the RMSE produced by removing a specific color, then adding it back Algorithm: Works by first finding the max value in the histogram and then trying to find the index of the histogram that contains the value that is closest (in terms of a ration with the max value) to highFactor. Removes this color, then passes it to recommender to get back a value which it then adds to the histogramogram, then takes the rmse between the original and the modified """ xTrain, xTest = ml.splitData(data, fractionTrain) train_names, test_names = ml.splitData(np.array(names), fractionTrain) assert len(train_names) == len(xTrain) assert len(test_names) == len(xTest) n, D = xTest.shape indices = np.random.choice(n, SAMPLE_SIZE) xTest = xTest[indices, :] test_names = test_names[indices] assert xTest.shape == (SAMPLE_SIZE, D) assert test_names.shape == (SAMPLE_SIZE,) n = SAMPLE_SIZE train_colors, _, train_histograms = tester.removeColors(xTrain, highFactor=highFactor) try: recommender.fitWithPlot(train_histograms, train_colors, train_names) except: print train_colors recommender.fit(train_histograms, train_colors) if verbose: print "Done fitting" colors, quantities, histograms = tester.removeColors(xTest, highFactor=highFactor) assert colors.shape[0] == n assert histograms.shape[0] == n numCorrect = 0 if plot: colorRecommend = [] namesRecommend = [] colorRemoved = [] clusterLoc = [] # clusterIndexList = [] D = histograms.shape[1] count = np.zeros(D) for color in colors: count[color] += 1 clusters = [] recommendedColors = np.zeros((n)) ignored = 0 for i in xrange(n): if i % 100 == 1: print "Partial %d: %f" % (i, float(numCorrect) / i) color, amount = colors[i], quantities[i] # Ignore colors that might bias us if count[color] > 10: ignored += 1 continue hist = histograms[i] # Ignore colors that are basically the background if hist[color] > 0.4: ignored += 1 continue if verbose: print "Testing site %s" % names[i] print "Amount remmoved %d" % amount # This is used for cluster recommendations # elem, recommendedColor = recommender.recommendFromCluster(hist, xTrain) # if verbose: # print 'Recommended from website %s' % names[elem] # This is used for vanilla classifiers try: cluster_names = recommender.clusterNames(hist) clusters.append(cluster_names) except: pass cluster = recommender.cluster(hist) print "Incoming x" nz = ml.maxArgs(hist, 15) for j in nz: print j, hist[j] for x in cluster: print "Another x" nz = ml.maxArgs(x, 15) for j in nz: print j, x[j] recommendedColor = recommender.predict(hist) print recommendedColor r1, g1, b1 = image.binToRGB(color) r2, g2, b2 = image.binToRGB(recommendedColor) if verbose: print "Removed color %d %d %d. Recommended color %d %d %d." % (r1, g1, b1, r2, g2, b2) print "Color distance: %d" % (image.binDistance(recommendedColor, color)) recommendedColors[i] = recommendedColor # for plotting purposes if plot: colorRemoved.append(color) colorRecommend.append(recommendedColor) namesRecommend.append(test_names[i]) # clusterIndex = recommender.returnClusterTest(hist) # clusterNames = recommender.clusterNames[clusterIndex] # clusterIndexList.append(clusterLin) # clusterLoc.append(clusterNames) if verbose: print "Recommended color %d" % (recommendedColor) if recommendedColor == color: numCorrect += 1 print "Ignored: %d" % ignored print tester.colorError(colors, recommendedColors) if plot: plotRecommend(colorRemoved, colorRecommend, namesRecommend, clusters) percentCorrect = float(numCorrect) / (n - ignored) return percentCorrect
def test(data, recommender, fractionTrain=.8, highFactor=.1, verbose=False): """ Parameters: cluster: an array of arrays fractionTrain: a float specifying percent of data to use as train highFactor: ratio of any given element to the largest element in the array Returns: the RMSE produced by removing a specific color, then adding it back Algorithm: Works by first finding the max value in the histogram and then trying to find the index of the histogram that contains the value that is closest (in terms of a ration with the max value) to highFactor. Removes this color, then passes it to recommender to get back a value which it then adds to the histogramogram, then takes the rmse between the original and the modified """ xTrain, xTest = ml.splitData(data, fractionTrain) n = xTest.shape[0] m = xTrain.shape[0] train_colors, _, train_histograms = removeColors(xTrain, highFactor=highFactor) recommender.fit(train_histograms, train_colors) if verbose: print 'Done fitting' colors, quantities, histograms = removeColors(xTest, highFactor=highFactor) assert(colors.shape[0] == n) assert(histograms.shape[0] == n) numCorrect = 0 D = histograms.shape[1] count = np.zeros(D) for color in colors: count[color] += 1 tmp = count[np.where(count > 0)] color_mean = np.mean(tmp) color_stdev = np.std(tmp) print 'Color mean and stdev', color_mean, color_stdev recommendedColors = np.zeros((n)) ignored = 0 intersectionRatio = 0. for i in xrange(n): if i % 100 == 1: print 'Partial %d: %f' % (i, float(numCorrect) / (i - ignored + 1)) color, amount = colors[i], quantities[i] # Ignore colors that might bias us if count[color] > color_mean + color_stdev: ignored += 1 continue hist = histograms[i] # Ignore colors that are basically the background if hist[color] > 0.4: ignored += 1 continue if verbose: print 'Testing site %s' % names[i] print 'Amount remmoved %d' % amount try: cluster = recommender.cluster(hist) intersectionRatio += core.clusterIntersectionRatio(hist, cluster) except: pass #recommender.testClusters(hist) recommendedColor = recommender.predict(hist) r1, g1, b1 = image.binToRGB(color) r2, g2, b2 = image.binToRGB(recommendedColor) if verbose: print 'Removed color %d %d %d. Recommended color %d %d %d.' % (r1, g1, b1, r2, g2, b2) print 'Color distance: %d' % (image.binDistance(recommendedColor, color)) recommendedColors[i] = recommendedColor if verbose: print 'Recommended color %d' % (recommendedColor) if recommendedColor == color: numCorrect += 1 print 'Ignored: %d. Used: %d' % (ignored, n - ignored) print 'Mean cluster intersection ratio: %f' % (intersectionRatio / (n - ignored)) print colorError(colors, recommendedColors) percentCorrect = float(numCorrect)/(n - ignored) return percentCorrect