示例#1
0
from hashmapd.load_config import LoadConfig, DefaultConfig
from hashmapd.tsne import TSNE


if __name__ == '__main__':
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("-f", "--file", dest="config", default="config",
        help="Path of the config file to use")
    (options, args) = parser.parse_args()
    cfg = LoadConfig(options.config)

    codes_file = cfg.output.codes_file
    coords_file = cfg.output.coords_file
    
    codes = numpy.genfromtxt(codes_file, dtype=numpy.float32, delimiter=',')
    codes = codes[:,1:]
        
    tsne = TSNE(perplexity=cfg.tsne.perplexity, desired_dims=cfg.tsne.desired_dims)
    tsne.initialize_with_codes(codes)
    #tsne.fit(iterations=cfg.tsne.initial_fit_iterations)
    # should already be fit.
    tsne.save_coords_to_file(coords_file)
    
    #tsne.load_from_file(coords_file,codes_file)
    #tsne.fit(iterations=2)
    
    #test_code = [0.1350030452,0.4128168225,0.0014129921,0.7547346354,0.0068102819,0.6216894388,0.9996289015,0.8628810048,0.0004052414,0.0012938380,0.9998107553,0.0000006208,0.2459984124,0.0001938931,0.0103854276,0.0001564398,0.0000000090,0.9995579720,0.9649902582,0.0000025402,0.9946812987,0.9264854193,0.9999329448,0.0095445570,0.0054685692,0.9955748916,0.9433483481,0.0002042586,0.0430774689,0.7664549351]
    #tsne.get_coord_for_code(test_code, iterations =2)
    
示例#2
0
    #coords = scale_to_interval(coords, max=100)
    
    print 'writing coordinates to csv'
    csv_writer = csv.writer(open(output_file, 'wb'), delimiter=',')
    for r in xrange(len(coords)):
        csv_writer.writerow(coords[r].astype('|S12')) # format with 10dp accuracy (but no '-e' format stuff)

if __name__ == '__main__':

    codes_file = 'test/test_codes.csv'
    coords_file = 'out/test_coords.csv'
    
    codes = numpy.genfromtxt(codes_file, dtype=numpy.float32, delimiter=',')
    codes = codes[:,1:]
    
    for perplexity in xrange(20):
        try:
            tsne = TSNE(perplexity=perplexity, desired_dims=2)
            tsne.initialize_with_codes(codes)
            tsne.fit(iterations=500)
            tsne.save_coords_to_file(coords_file)
            
            density_plot_file = 'out/test_map_%i.png'%perplexity
            labels_file = 'test/test_labels.csv'
        
            render = Render(coords_file, labels_file)
            render.plot_density(density_plot_file)
        except:
            print 'failed to compute with perplexity %i'%perplexity
            
示例#3
0
    desired_dims = cfg.tsne.desired_dims
    target_iterations = cfg.tsne.initial_fit_iterations

    # FIXME2 slightly braindead way to work out epochs expected (just run through it)
    total_epochs = 0
    if target_perplex >= (len(codes) / 2):
        total_epochs = 1
    print len(codes)
    perplexity = len(codes) / 2
    while perplexity > target_perplex:
        perplexity = perplexity / 2
        total_epochs = total_epochs + 1

    iterations_per = target_iterations / total_epochs
    print "will run for %i total epochs at %i iterations each " % (total_epochs, iterations_per)

    # initialize
    tsne = TSNE(desired_dims=desired_dims)
    tsne.initialize_with_codes(codes)

    # fit with reducing perplexity, ending up at target complexity
    perplexity = len(codes) / 2
    while perplexity > target_perplex:
        print "training with perplexity", perplexity
        tsne.perplexity = perplexity
        tsne.fit(iterations_per)
        perplexity = perplexity / 2

    print "done training"
    tsne.save_coords_to_file(coords_file)