示例#1
0
def predict(train_start_time, hdfs_input_path):
    # NOTE(brandyn): This assumes that they all use the same feature
    train_root = '/user/brandyn/tp/image_cluster/run-%s/' % train_start_time
    start_time = time.time()
    root = '/user/brandyn/tp/image_cluster/run-%s/' % start_time
    # Predict photos
    cluster.run_image_feature(hdfs_input_path, root + 'feat/input', 'meta_gist_spatial_hist', 256)
    cluster.run_predict_classifier(root + 'feat/input', train_root + 'classifiers/photos', root + 'predict/photos')
    # Split images for photos/nonphotos
    cluster.run_thresh_predictions(root + 'predict/photos', hdfs_input_path, root + 'data/photos', 'photo', 0., 1)
    cluster.run_thresh_predictions(root + 'predict/photos', hdfs_input_path, root + 'data/nonphotos', 'photo', 0., -1)
    # Split features for photos
    cluster.run_thresh_predictions(root + 'predict/photos', root + 'feat/input', root + 'feat/photos', 'photo', 0., 1)
    # Predict photo subclasses
    cluster.run_predict_classifier(root + 'feat/photos', train_root + 'classifiers/indoors', root + 'predict/indoors')
    cluster.run_predict_classifier(root + 'feat/photos', train_root + 'classifiers/objects', root + 'predict/objects')
    cluster.run_predict_classifier(root + 'feat/photos', train_root + 'classifiers/pr0n', root + 'predict/pr0n')
    # Split images for photos subclasses
    cluster.run_thresh_predictions(root + 'predict/indoors', root + 'data/photos', root + 'data/indoors', 'indoor', 0., 1)
    cluster.run_thresh_predictions(root + 'predict/indoors', root + 'data/photos', root + 'data/outdoors', 'indoor', 0., -1)
    cluster.run_thresh_predictions(root + 'predict/objects', root + 'data/photos', root + 'data/objects', 'object', 0., 1)
    cluster.run_thresh_predictions(root + 'predict/pr0n', root + 'data/photos', root + 'data/pr0n', 'pr0n', 0., 1)
    # Split features for photos subclasses
    cluster.run_thresh_predictions(root + 'predict/indoors', root + 'feat/photos', root + 'feat/indoors', 'indoor', 0., 1)
    cluster.run_thresh_predictions(root + 'predict/indoors', root + 'feat/photos', root + 'feat/outdoors', 'indoor', 0., -1)
    cluster.run_thresh_predictions(root + 'predict/objects', root + 'feat/photos', root + 'feat/objects', 'object', 0., 1)
    cluster.run_thresh_predictions(root + 'predict/pr0n', root + 'feat/photos', root + 'feat/pr0n', 'pr0n', 0., 1)
    # Find faces and compute the eigenface feature
    cluster.run_face_finder(root + 'data/photos', root + 'data/detected_faces', image_length=64, boxes=False)
    cluster.run_image_feature(root + 'data/detected_faces', root + 'feat/detected_faces', 'meta_gist_spatial_hist', 256)
    cluster.run_predict_classifier(root + 'feat/detected_faces', train_root + 'classifiers/faces', root + 'predict/detected_faces')
    cluster.run_thresh_predictions(root + 'predict/detected_faces', root + 'data/detected_faces', root + 'data/faces', 'face', 0., 1)
    cluster.run_image_feature(root + 'data/faces', root + 'feat/faces', 'eigenface', 64)
    # Sample for initial clusters
    num_clusters = 10
    num_iters = 5
    num_output_samples = 10
    whiten = lambda x: cluster.run_whiten(root + 'feat/%s' % x, root + 'whiten/%s' % x)
    map(whiten, ['indoors', 'outdoors', 'objects', 'pr0n', 'faces'])
    sample = lambda x: cluster.run_sample(root + 'whiten/%s' % x, root + 'cluster/%s/clust0' % x, num_clusters)
    map(sample, ['indoors', 'outdoors', 'objects', 'pr0n', 'faces'])
    # Cluster photos, indoors, outdoors, pr0n, faces
    kmeans = lambda x: hadoopy_flow.Greenlet(cluster.run_kmeans, root + 'whiten/%s' % x, root + 'cluster/%s/clust0' % x, root + 'data/%s' % x,
                                             root + 'cluster/%s' % x, num_clusters, num_iters, num_output_samples, 'l2sqr').start()
    map(kmeans, ['indoors', 'outdoors', 'objects', 'pr0n', 'faces'])
示例#2
0
def train_predict(train_start_time='1308626598.185418'):
    start_time = time.time()
    train_root = '/user/brandyn/tp/image_cluster/run-%s/' % train_start_time
    root = '/user/brandyn/tp/image_cluster/run-%f/' % start_time

    cluster.run_image_feature(test_photos_path, root + 'test_feat/photos', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_nonphotos_path, root + 'test_feat/nonphotos', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_indoors_path, root + 'test_feat/indoors', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_outdoors_path, root + 'test_feat/outdoors', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_objects_path, root + 'test_feat/objects', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_nonobjects_path, root + 'test_feat/nonobjects', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_pr0n_path, root + 'test_feat/pr0n', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_nonpr0n_path, root + 'test_feat/nonpr0n', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_faces_path, root + 'test_feat/faces', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(test_nonfaces_path, root + 'test_feat/nonfaces', 'meta_gist_spatial_hist', 256)

    cluster.run_predict_classifier([root + 'test_feat/photos'], train_root + 'classifiers/photos', root + 'test_predict/photos')
    cluster.run_predict_classifier([root + 'test_feat/nonphotos'], train_root + 'classifiers/photos', root + 'test_predict/nonphotos')
    cluster.run_predict_classifier([root + 'test_feat/indoors'], train_root + 'classifiers/indoors', root + 'test_predict/indoors')
    cluster.run_predict_classifier([root + 'test_feat/outdoors'], train_root + 'classifiers/indoors', root + 'test_predict/outdoors')
    cluster.run_predict_classifier([root + 'test_feat/objects'], train_root + 'classifiers/objects', root + 'test_predict/objects')
    cluster.run_predict_classifier([root + 'test_feat/nonobjects'], train_root + 'classifiers/objects', root + 'test_predict/nonobjects')
    cluster.run_predict_classifier([root + 'test_feat/pr0n'], train_root + 'classifiers/pr0n', root + 'test_predict/pr0n')
    cluster.run_predict_classifier([root + 'test_feat/nonpr0n'], train_root + 'classifiers/pr0n', root + 'test_predict/nonpr0n')
    cluster.run_predict_classifier([root + 'test_feat/faces'], train_root + 'classifiers/faces', root + 'test_predict/faces')
    cluster.run_predict_classifier([root + 'test_feat/nonfaces'], train_root + 'classifiers/faces', root + 'test_predict/nonfaces')
    return '%f' % start_time
示例#3
0
def train():
    # HDFS Paths for Output
    start_time = time.time()
    root = '/user/brandyn/tp/image_cluster/run-%f/' % start_time

    # Compute features for classifier train
    cluster.run_image_feature(train_photos_path, root + 'train_feat/photos', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_nonphotos_path, root + 'train_feat/nonphotos', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_indoors_path, root + 'train_feat/indoors', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_outdoors_path, root + 'train_feat/outdoors', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_objects_path, root + 'train_feat/objects', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_nonobjects_path, root + 'train_feat/nonobjects', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_pr0n_path, root + 'train_feat/pr0n', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_nonpr0n_path, root + 'train_feat/nonpr0n', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_faces_path, root + 'train_feat/faces', 'meta_gist_spatial_hist', 256)
    cluster.run_image_feature(train_nonfaces_path, root + 'train_feat/nonfaces', 'meta_gist_spatial_hist', 256)

    # Label images # TODO make one run of this per feature type as the training assumes the features are homogeneous
    cluster.run_classifier_labels(root + 'train_feat/photos', root + 'train_feat/nonphotos', root + 'labels/photos',
                                  'photo', '', 'tp_photos_labels.js', 'svmlinear')
    cluster.run_classifier_labels(root + 'train_feat/indoors', root + 'train_feat/outdoors', root + 'labels/indoors',
                                  'indoor', '', 'tp_indoors_labels.js', 'svmlinear')
    cluster.run_classifier_labels(root + 'train_feat/objects', root + 'train_feat/nonobjects', root + 'labels/objects',
                                  'object', '', 'tp_objects_labels.js', 'svmlinear')
    cluster.run_classifier_labels(root + 'train_feat/pr0n', root + 'train_feat/nonpr0n', root + 'labels/pr0n',
                                  'pr0n', '', 'tp_pr0n_labels.js', 'svmlinear')
    cluster.run_classifier_labels(root + 'train_feat/faces', root + 'train_feat/nonfaces', root + 'labels/faces',
                                  'face', '', 'tp_faces_labels.js', 'svmlinear')

    # Train classifiers
    cluster.run_train_classifier([root + 'train_feat/photos', root + 'train_feat/nonphotos'], root + 'classifiers/photos', 'tp_photos_labels.js')
    cluster.run_train_classifier([root + 'train_feat/indoors', root + 'train_feat/outdoors'], root + 'classifiers/indoors', 'tp_indoors_labels.js')
    cluster.run_train_classifier([root + 'train_feat/objects', root + 'train_feat/nonobjects'], root + 'classifiers/objects', 'tp_objects_labels.js')
    cluster.run_train_classifier([root + 'train_feat/pr0n', root + 'train_feat/nonpr0n'], root + 'classifiers/pr0n', 'tp_pr0n_labels.js')
    cluster.run_train_classifier([root + 'train_feat/faces', root + 'train_feat/nonfaces'], root + 'classifiers/faces', 'tp_faces_labels.js')
    return '%f' % start_time