from hashmapd.load_config import LoadConfig, DefaultConfig from hashmapd.tsne import TSNE if __name__ == '__main__': from optparse import OptionParser parser = OptionParser() parser.add_option("-f", "--file", dest="config", default="config", help="Path of the config file to use") (options, args) = parser.parse_args() cfg = LoadConfig(options.config) codes_file = cfg.output.codes_file coords_file = cfg.output.coords_file codes = numpy.genfromtxt(codes_file, dtype=numpy.float32, delimiter=',') codes = codes[:,1:] tsne = TSNE(perplexity=cfg.tsne.perplexity, desired_dims=cfg.tsne.desired_dims) tsne.initialize_with_codes(codes) #tsne.fit(iterations=cfg.tsne.initial_fit_iterations) # should already be fit. tsne.save_coords_to_file(coords_file) #tsne.load_from_file(coords_file,codes_file) #tsne.fit(iterations=2) #test_code = [0.1350030452,0.4128168225,0.0014129921,0.7547346354,0.0068102819,0.6216894388,0.9996289015,0.8628810048,0.0004052414,0.0012938380,0.9998107553,0.0000006208,0.2459984124,0.0001938931,0.0103854276,0.0001564398,0.0000000090,0.9995579720,0.9649902582,0.0000025402,0.9946812987,0.9264854193,0.9999329448,0.0095445570,0.0054685692,0.9955748916,0.9433483481,0.0002042586,0.0430774689,0.7664549351] #tsne.get_coord_for_code(test_code, iterations =2)
#coords = scale_to_interval(coords, max=100) print 'writing coordinates to csv' csv_writer = csv.writer(open(output_file, 'wb'), delimiter=',') for r in xrange(len(coords)): csv_writer.writerow(coords[r].astype('|S12')) # format with 10dp accuracy (but no '-e' format stuff) if __name__ == '__main__': codes_file = 'test/test_codes.csv' coords_file = 'out/test_coords.csv' codes = numpy.genfromtxt(codes_file, dtype=numpy.float32, delimiter=',') codes = codes[:,1:] for perplexity in xrange(20): try: tsne = TSNE(perplexity=perplexity, desired_dims=2) tsne.initialize_with_codes(codes) tsne.fit(iterations=500) tsne.save_coords_to_file(coords_file) density_plot_file = 'out/test_map_%i.png'%perplexity labels_file = 'test/test_labels.csv' render = Render(coords_file, labels_file) render.plot_density(density_plot_file) except: print 'failed to compute with perplexity %i'%perplexity
desired_dims = cfg.tsne.desired_dims target_iterations = cfg.tsne.initial_fit_iterations # FIXME2 slightly braindead way to work out epochs expected (just run through it) total_epochs = 0 if target_perplex >= (len(codes) / 2): total_epochs = 1 print len(codes) perplexity = len(codes) / 2 while perplexity > target_perplex: perplexity = perplexity / 2 total_epochs = total_epochs + 1 iterations_per = target_iterations / total_epochs print "will run for %i total epochs at %i iterations each " % (total_epochs, iterations_per) # initialize tsne = TSNE(desired_dims=desired_dims) tsne.initialize_with_codes(codes) # fit with reducing perplexity, ending up at target complexity perplexity = len(codes) / 2 while perplexity > target_perplex: print "training with perplexity", perplexity tsne.perplexity = perplexity tsne.fit(iterations_per) perplexity = perplexity / 2 print "done training" tsne.save_coords_to_file(coords_file)