示例#1
0
def show(data, recommender, fractionTrain=0.8, highFactor=0.1, verbose=False, plot=False):
    """
    Parameters:
        cluster: an array of arrays
        fractionTrain: a float specifying percent of data to use as train
        highFactor: ratio of any given element to the largest element in the array
    Returns:
        the RMSE produced by removing a specific color, then adding it back

    Algorithm:
        Works by first finding the max value in the histogram and then trying to find the
        index of the histogram that contains the value that is closest (in terms of a ration
        with the max value) to highFactor. Removes this color, then passes it to recommender
        to get back a value which it then adds to the histogramogram, then takes the rmse between
        the original and the modified
    """
    xTrain, xTest = ml.splitData(data, fractionTrain)
    train_names, test_names = ml.splitData(np.array(names), fractionTrain)
    assert len(train_names) == len(xTrain)
    assert len(test_names) == len(xTest)

    n, D = xTest.shape
    indices = np.random.choice(n, SAMPLE_SIZE)
    xTest = xTest[indices, :]
    test_names = test_names[indices]
    assert xTest.shape == (SAMPLE_SIZE, D)
    assert test_names.shape == (SAMPLE_SIZE,)
    n = SAMPLE_SIZE

    train_colors, _, train_histograms = tester.removeColors(xTrain, highFactor=highFactor)
    try:
        recommender.fitWithPlot(train_histograms, train_colors, train_names)
    except:
        print train_colors
        recommender.fit(train_histograms, train_colors)
    if verbose:
        print "Done fitting"

    colors, quantities, histograms = tester.removeColors(xTest, highFactor=highFactor)
    assert colors.shape[0] == n
    assert histograms.shape[0] == n
    numCorrect = 0
    if plot:
        colorRecommend = []
        namesRecommend = []
        colorRemoved = []
        clusterLoc = []
        # clusterIndexList = []

    D = histograms.shape[1]
    count = np.zeros(D)
    for color in colors:
        count[color] += 1

    clusters = []
    recommendedColors = np.zeros((n))
    ignored = 0
    for i in xrange(n):
        if i % 100 == 1:
            print "Partial %d: %f" % (i, float(numCorrect) / i)

        color, amount = colors[i], quantities[i]
        # Ignore colors that might bias us
        if count[color] > 10:
            ignored += 1
            continue

        hist = histograms[i]
        # Ignore colors that are basically the background
        if hist[color] > 0.4:
            ignored += 1
            continue

        if verbose:
            print "Testing site %s" % names[i]
            print "Amount remmoved %d" % amount

        # This is used for cluster recommendations
        # elem, recommendedColor = recommender.recommendFromCluster(hist, xTrain)
        # if verbose:
        #   print 'Recommended from website %s' % names[elem]

        # This is used for vanilla classifiers
        try:
            cluster_names = recommender.clusterNames(hist)
            clusters.append(cluster_names)
        except:
            pass

        cluster = recommender.cluster(hist)
        print "Incoming x"
        nz = ml.maxArgs(hist, 15)
        for j in nz:
            print j, hist[j]

        for x in cluster:
            print "Another x"
            nz = ml.maxArgs(x, 15)
            for j in nz:
                print j, x[j]

        recommendedColor = recommender.predict(hist)
        print recommendedColor
        r1, g1, b1 = image.binToRGB(color)
        r2, g2, b2 = image.binToRGB(recommendedColor)
        if verbose:
            print "Removed color %d %d %d. Recommended color %d %d %d." % (r1, g1, b1, r2, g2, b2)
            print "Color distance: %d" % (image.binDistance(recommendedColor, color))
        recommendedColors[i] = recommendedColor

        # for plotting purposes
        if plot:
            colorRemoved.append(color)
            colorRecommend.append(recommendedColor)
            namesRecommend.append(test_names[i])
            # clusterIndex = recommender.returnClusterTest(hist)
            # clusterNames = recommender.clusterNames[clusterIndex]
            # clusterIndexList.append(clusterLin)
            # clusterLoc.append(clusterNames)

        if verbose:
            print "Recommended color %d" % (recommendedColor)

        if recommendedColor == color:
            numCorrect += 1

    print "Ignored: %d" % ignored
    print tester.colorError(colors, recommendedColors)
    if plot:
        plotRecommend(colorRemoved, colorRecommend, namesRecommend, clusters)
    percentCorrect = float(numCorrect) / (n - ignored)
    return percentCorrect
示例#2
0
def test(data, recommender, fractionTrain=.8, highFactor=.1, verbose=False):
    """
    Parameters:
        cluster: an array of arrays
        fractionTrain: a float specifying percent of data to use as train
        highFactor: ratio of any given element to the largest element in the array
    Returns:
        the RMSE produced by removing a specific color, then adding it back

    Algorithm:
        Works by first finding the max value in the histogram and then trying to find the
        index of the histogram that contains the value that is closest (in terms of a ration
        with the max value) to highFactor. Removes this color, then passes it to recommender
        to get back a value which it then adds to the histogramogram, then takes the rmse between
        the original and the modified
    """
    xTrain, xTest = ml.splitData(data, fractionTrain)
    n = xTest.shape[0]
    m = xTrain.shape[0]

    train_colors, _, train_histograms = removeColors(xTrain, highFactor=highFactor)
    recommender.fit(train_histograms, train_colors)
    if verbose:
        print 'Done fitting'

    colors, quantities, histograms = removeColors(xTest, highFactor=highFactor)
    assert(colors.shape[0] == n)
    assert(histograms.shape[0] == n)
    numCorrect = 0

    D = histograms.shape[1]
    count = np.zeros(D)
    for color in colors:
        count[color] += 1

    tmp = count[np.where(count > 0)]
    color_mean = np.mean(tmp)
    color_stdev = np.std(tmp)
    print 'Color mean and stdev', color_mean, color_stdev

    recommendedColors = np.zeros((n))
    ignored = 0
    intersectionRatio = 0.
    for i in xrange(n):
        if i % 100 == 1:
            print 'Partial %d: %f' % (i, float(numCorrect) / (i - ignored + 1))

        color, amount = colors[i], quantities[i]
        # Ignore colors that might bias us
        if count[color] > color_mean + color_stdev:
            ignored += 1
            continue

        hist = histograms[i]
        # Ignore colors that are basically the background
        if hist[color] > 0.4:
        	ignored += 1
        	continue

        if verbose:
            print 'Testing site %s' % names[i]
            print 'Amount remmoved %d' % amount
        try:
            cluster = recommender.cluster(hist)
            intersectionRatio += core.clusterIntersectionRatio(hist, cluster)
        except:
            pass
        #recommender.testClusters(hist)
        recommendedColor = recommender.predict(hist)
        r1, g1, b1 = image.binToRGB(color)
        r2, g2, b2 = image.binToRGB(recommendedColor)
        if verbose:
            print 'Removed color %d %d %d. Recommended color %d %d %d.' % (r1, g1, b1, r2, g2, b2)
            print 'Color distance: %d' % (image.binDistance(recommendedColor, color))
        recommendedColors[i] = recommendedColor

        if verbose:
            print 'Recommended color %d' % (recommendedColor)

        if recommendedColor == color:
            numCorrect += 1


    print 'Ignored: %d. Used: %d' % (ignored, n - ignored)
    print 'Mean cluster intersection ratio: %f' % (intersectionRatio / (n - ignored))
    print colorError(colors, recommendedColors)
    percentCorrect = float(numCorrect)/(n - ignored)
    return percentCorrect