示例#1
0
def cmd_hack(args):
    info('hack called with args %s' % str(args))
    warn('this is a warning')
    err('this is an error')
    info('this is merely info')
    print getTags()
    a, b = u.split([i for i in range(20)], 17)
    print a
    print b
    fail('now you\'ve done it')
示例#2
0
def cmd_train(args):
    if len(args) == 0:
        err('error: not enough arguments.')
        err('usage: run <classify-tag>')
        sys.exit(1)
    tag = args[0]
    tags = u.getTags()
    if tag not in tags:
        fail('error: tag "%s" not found.\nKnown tags are: %s' %
             (tag, str(tags)))

    info('classifying for tag "%s"' % tag)
    imageFiles, imageData, labels, imageJson = u.loadInputs()
    # see multi-label learning comment below. for now, slice() to one label
    labels = u.slice(labels, tags.index(tag))
    NO_YES = ['NO ' + tag, 'YES ' + tag]
    verbose('data shape: %s labels shape: %s' %
            (imageData.shape, labels.shape))
    trainedFiles, testFiles = u.split(imageFiles, 326)
    trainedImages, testImages = u.split(imageData, 326)
    trainedLabels, testLabels = u.split(labels, 326)
    # some classifiers can only handle a single target
    # http://stackoverflow.com/questions/31881138/predicting-outcome-of-multiple-targets-in-scikit-learn
    # try this? http://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html
    verbose('trained data shape: %s trained labels shape: %s' %
            (trainedImages.shape, trainedLabels.shape))
    classifier = GaussianNB(
    )  # 92.6% correct on training data set, wrong between white <-> gray
    #classifier = LogisticRegression() # 100% correct on training, empirically worse on test data tho
    #classifier = KNeighborsClassifier() # not as good as GaussianNB, particularly false positives/negatives on blue
    #classifier = ElasticNet()
    classifier.fit(trainedImages, trainedLabels)
    predicts = classifier.predict(testImages)
    verbose('predicts.shape: %s' % str(predicts.shape))
    #print predicts
    probs = classifier.predict_proba(testImages)
    verbose('probs shape: %s' % str(probs.shape))
    verbose('testLabels: %s' % str(testLabels))
    htmlFile = tag + '-gaussian-unclassified.html'
    u.outputHtml(htmlFile, testFiles, [NO_YES[int(p)] for p in predicts],
                 [NO_YES[int(i)] for i in testLabels], None)
    info('saved test results: %s' % htmlFile)
示例#3
0
def cmd_hack(args):
    tset = u.loadInputs2(True, '28x28')
    print len(tset.images), 'image(s)'
    print tset.images[0]
    print tset.images[0].np_data.dtype
    sys.exit(1)
    info('hack called with args %s' % str(args))
    warn('this is a warning')
    err('this is an error')
    info('this is merely info')
    a, b = u.split([i for i in range(20)], 17)
    print a
    print b
    nda = np.array([0, 0, 1])
    print(nda)
    print(n2c(nda))
    fail('now you\'ve done it')
示例#4
0
def cmd_run(args):
    if len(args) == 0:
        err('error: not enough arguments.')
        err('usage: run <classify-tag>')
        sys.exit(1)
    tag = args[0]
    tags = u.getTags()
    if tag not in tags:
        fail('error: tag "%s" not found.\nKnown tags are: %s' %
             (tag, str(tags)))

    info('classifying for tag "%s"' % tag)
    imageFiles, imageData, labels, imgJson = u.loadInputs()
    # see multi-label learning comment below. for now, slice() to one label
    labels = u.slice(labels, tags.index(tag))
    NO_YES = ['NO ' + tag, 'YES ' + tag]
    verbose('data shape: %s labels shape: %s' %
            (imageData.shape, labels.shape))
    trainedFiles, testFiles = u.split(imageFiles, 326)
    trainedImages, testImages = u.split(imageData, 326)
    trainedLabels, testLabels = u.split(labels, 326)
    # some classifiers can only handle a single target
    # http://stackoverflow.com/questions/31881138/predicting-outcome-of-multiple-targets-in-scikit-learn
    # try this? http://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html
    verbose('trained data shape: %s trained labels shape: %s' %
            (trainedImages.shape, trainedLabels.shape))

    model = Sequential()
    model.add(Dense(None, input_dim=14400))
    model.add(Activation('relu'))

    verbose('model summary:')
    model.summary()

    info('model built, compiling')
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Convert labels to categorical one-hot encoding
    # think I've done the equivalent of this in the slice() above (?)
    #binaryTrainedLabels = keras.utils.to_categorical(labels, num_classes=10)
    model.fit(trainedImages, trainedLabels, epochs=10, batch_size=326)

    classifier = GaussianNB(
    )  # 92.6% correct on training data set, wrong between white <-> gray
    #classifier = LogisticRegression() # 100% correct on training, empirically worse on test data tho
    #classifier = ElasticNet()
    classifier.fit(trainedImages, trainedLabels)
    predicts = classifier.predict(testImages)
    verbose('predicts.shape: %s' % str(predicts.shape))
    #print predicts
    probs = classifier.predict_proba(testImages)
    verbose('probs shape: %s' % str(probs.shape))
    verbose('testLabels: %s' % str(testLabels))
    htmlFile = tag + '-unclassified.html'
    u.outputHtml(htmlFile, testFiles, [NO_YES[int(p)] for p in predicts],
                 [NO_YES[int(i)] for i in testLabels], None)
    info('saved test results: %s' % htmlFile)

    predicts = classifier.predict(trainedImages)
    htmlFile = tag + '-classified.html'
    u.outputHtml(htmlFile, trainedFiles, [NO_YES[int(p)] for p in predicts],
                 [NO_YES[int(i)] for i in trainedLabels], None)
    info('saved test results: %s' % htmlFile)