def largevis(edgelist_filename, outdim=2, alpha=-1): """ Use LargeVis for embedding This function is Clustit's interface to the LargeVis model for embedding large-scale and high-dimensional data. :param edgelist_filename: The filename of the edgelist with the similarities or distances :type edgelist_filename: string :param outdim: The number of output dimensions, default is 2. :type outdim: int :returns: The resulting embedding of the input data :rtype: pandas.DataFrame """ LargeVis.loadgraph(edgelist_filename) _run_largevis(outdim, alpha) #get output data from LargeVis temp_file = "/tmp/largevis_tempfile.txt" try: LargeVis.save(temp_file) data_frame = pandas.read_csv(temp_file, sep=" ", index_col=0) finally: delete_temp_file(temp_file) return data_frame
def largevis(edgelist_filename, outdim=2, alpha=-1.0): """ Use LargeVis for embedding This function is Clustit's interface to the LargeVis model for embedding large-scale and high-dimensional data. :param edgelist_filename: The filename of the edgelist with the similarities or distances :type edgelist_filename: string :param outdim: The number of output dimensions, default is 2. :type outdim: int :returns: The resulting embedding of the input data :rtype: pandas.DataFrame """ LargeVis.loadgraph(edgelist_filename) _run_largevis(outdim, alpha) #get output data from LargeVis temp_file = "/tmp/largevis_tempfile.txt" try: LargeVis.save(temp_file) names = get_column_names(outdim) data_frame = pandas.read_csv(temp_file, sep=" ", names=names, header=0) finally: #delete_temp_file(temp_file) pass return data_frame
def largevisproc(i_file, o_file, sim): import LargeVis outdim = 2 threads = 24 samples = -1 prop = -1 alpha = -1 trees = -1 neg = -1 neigh = -1 gamma = -1 perp = -1 if sim: LargeVis.loadgraph(i_file) else: LargeVis.loadfile(i_file) Y = LargeVis.run(outdim, threads, samples, prop, alpha, trees, neg, neigh, gamma, perp) LargeVis.save(o_file)
import LargeVis import argparse parser = argparse.ArgumentParser() parser.add_argument('-fea', default = 1, type = int, help = 'whether to visualize high-dimensional feature vectors or networks') parser.add_argument('-input', default = '', help = 'input file') parser.add_argument('-output', default = '', help = 'output file') parser.add_argument('-outdim', default = -1, type = int, help = 'output dimensionality') parser.add_argument('-threads', default = -1, type = int, help = 'number of training threads') parser.add_argument('-samples', default = -1, type = int, help = 'number of training mini-batches') parser.add_argument('-prop', default = -1, type = int, help = 'number of propagations') parser.add_argument('-alpha', default = -1, type = float, help = 'learning rate') parser.add_argument('-trees', default = -1, type = int, help = 'number of rp-trees') parser.add_argument('-neg', default = -1, type = int, help = 'number of negative samples') parser.add_argument('-neigh', default = -1, type = int, help = 'number of neighbors in the NN-graph') parser.add_argument('-gamma', default = -1, type = float, help = 'weight assigned to negative edges') parser.add_argument('-perp', default = -1, type = float, help = 'perplexity for the NN-grapn') args = parser.parse_args() if args.fea == 1: LargeVis.loadfile(args.input) else: LargeVis.loadgraph(args.input) Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp) LargeVis.save(args.output)
#!/usr/bin/env python import LargeVis import argparse parser = argparse.ArgumentParser() parser.add_argument('--fea', default=1, type=int, help='whether to visualize high-dimensional feature vectors or networks') parser.add_argument('--input', default='', help='input file', required=True) parser.add_argument('--output', default='', help='output file', required=True) parser.add_argument('--outdim', default=-1, type=int, help='output dimensionality') parser.add_argument('--threads', default=-1, type=int, help='number of training threads') parser.add_argument('--samples', default=-1, type=int, help='number of training mini-batches') parser.add_argument('--prop', default=-1, type=int, help='number of propagations') parser.add_argument('--alpha', default=-1, type=float, help='learning rate') parser.add_argument('--trees', default=-1, type=int, help='number of rp-trees') parser.add_argument('--neg', default=-1, type=int, help='number of negative samples') parser.add_argument('--neigh', default=-1, type=int, help='number of neighbors in the NN-graph') parser.add_argument('--gamma', default=-1, type=float, help='weight assigned to negative edges') parser.add_argument('--perp', default=-1, type=float, help='perplexity for the NN-grapn') args = parser.parse_args() if args.fea == 1: LargeVis.loadfile(args.input) else: LargeVis.loadgraph(args.input) Y = LargeVis.run(args.outdim, args.threads, args.samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, args.gamma, args.perp) LargeVis.save(args.output)
out.write("{}\t{}\n".format(*features.shape)) for row in tq(features): out.write('\t'.join(row.astype(str))+'\n') del features # now run Large Vis! (in 2D mode) LargeVis.loadfile(args.temp+"lv_format.txt") # samples only matters for graph layout samples = -1 gamma = -1 Y = LargeVis.run(2, args.threads, samples, args.prop, args.alpha, args.trees, args.neg, args.neigh, gamma, args.perp) if args.sampling == 'by_year': filename = '.{}.year_lv_coords'.format(args.sample_size) else: filename = '.{}.lv_coords'.format(args.sample_size) LargeVis.save(args.input+filename) donestring = """ -----PROCESSING COMPLETE----- 2D Embedding saved as: {} """.format(args.input+filename) if args.sampling is not None: donestring += "\nRandom indices saves as as: {}".format(args.input+idx_filename) print(donestring)