gc.enable() parameters = parser.parse_args() min_frac, max_frac, interval, nSamples, edges_csv, delim_option, output, dataset, seq_samples_scale = parameters.min, \ parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset, parameters.seq_samples_scale#,\ #[float(x) for x in parameters.eps.split(',')], parameters.title delim_dict = {0 : '\t', 1 : ' ', 2 : ','} delimeter = delim_dict[delim_option] results_dir = 'experiments/results/' bUndirected = True if parameters.undirected == 1 else False bfs_method = 'seq' start_time = time() print "creating link-server object" if delimeter == "\t": print "delimeter is tab" L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=[0.1,0.01], delim=delimeter, undirected = bUndirected) n = L.getNumNodes() #n = len(V) print "Number of nodes: ", n nBFS_samples = 1000 running_times_file = results_dir + output + '-running_times-ratios_k_min-%.3f-k_max-%.3f-samples-%d-bfs_samples-%d-large'%(min_frac,max_frac,nSamples,nBFS_samples) running_times_file_raw = running_times_file + "-raw" removeFile(running_times_file) f = open(running_times_file, 'w') f_raw = open(running_times_file_raw,'w') nBFS_samples = 1000 nBFS_samples_theoretic = n * log(n,2) for nSeeds in xrange(int(min_frac * n), int(max_frac * n), int(interval * n)): print "k = ", nSeeds seeds_fname = output + "-seeds-" + str(nSeeds) + '.cp' runtimes_approx, runtimes_seq = [], []
parameters = parser.parse_args() min_frac, max_frac, interval, nSamples, edges_csv, delim_option, output, dataset = parameters.min, \ parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset print "Input file: ", edges_csv print "Output file prefix: ", output delimiter = delim_dict[delim_option] start_time = time() bfs_method = 'seq' print "creating link-server object" if parameters.prob_method == 0: edge_prob = 0.2 else: edge_prob = [0.1,0.01] L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=edge_prob, delim=delimiter, undirected=parameters.undirected) print "n = ", L.getNumNodes() V = LoadNodesFromFile(edges_csv, delimiter) n = len(V) print 'min_frac', min_frac k_min = int(n * min_frac) k_max = int(n * max_frac) + 1 k_step = int(n * interval) generateSeedFiles(k_min, k_max, k_step, V, nSamples, 'experiments/' + output + "-seeds-") mean_errors, std_errors = [], [] for k in xrange(k_min, k_max,k_step):
parameters = parser.parse_args() min_samples, max_samples, samples_step, k_mode, k, edges_csv, delim_option, output, dataset, prob_method, cores= \ parameters.min_samples, parameters.max_samples, parameters.samples_step, parameters.k_mode, parameters.k,\ parameters.csv, parameters.delim, parameters.output, parameters.dataset, parameters.prob_method, parameters.cores print "Dataset: ", dataset delim_dict = {0 : '\t', 1 : ' ', 2 : ','} delimeter = delim_dict[delim_option] results_dir = 'experiments/results/influence_values/' bUndirected = True if parameters.undirected == 1 else False bfs_method = 'seq' start_time = time() print "creating link-server object" if delimeter == "\t": print "delimeter is tab" L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=[0.1,0.01], delim=delimeter, undirected = bUndirected) n = L.getNumNodes() if k_mode == 0: nSeeds = int(n * k) else: nSeeds = int(k) perf_csv_fname = "perf_out" + str(sample(range(1000),1)[0]) + ".csv" print "Number of nodes: ", n print "nSeeds = ", nSeeds results_file = results_dir + output + '-influence_values_samples_min-%d-samples_max-%d-k-%.3f-prob_method-%d'%(min_samples,max_samples, k, prob_method) removeFile(results_file) seeds_fname = "%s-seeds-%d.cp"%(dataset,nSeeds)
#!/usr/bin/python from link_server import LinkServerCP import sys import argparse parser = argparse.ArgumentParser() parser.add_argument('-cp', type=str) if __name__ == "__main__": parameters = parser.parse_args() print "name of dataset file: ", parameters.cp L = LinkServerCP(parameters.cp)
new = True total_time_start = time() iters = 0 if pr_method == 2: prob = [0.01, 0.1] print "min_frac = %.5f, max_frac = %.5f, interval = %.5f" % ( min_frac, max_frac, interval) print "Number of cores to use: ", parameters.cores print "CSV file to be used: ", edges_csv print "creating link-server object" if delimeter == "\t": print "delimeter is tab" L = LinkServerCP(dataset, edges_csv, new, pr_method, prob, delim=delimeter, undirected=parameters.undirected) V = LoadNodesFromFile(edges_csv, delimeter) n = len(V) print "Number of nodes: ", n running_times_file = output + '-running_times-large-samples-%d-k_frac-%.5f-%.5f' % ( nSamples, min_frac, max_frac) removeFile(running_times_file) dRuntimes = defaultdict(list) for nSeeds in xrange(int(min_frac * n), int(max_frac * n), int(interval * n)): start_time_k = time()
n, parameters.graph_method) running_times_files = [ output + '-running_times-n-%d-graph_method-%d-samples-prob_method-%d-%d-eps-%.5f' % (n, parameters.graph_method, nSamples, parameters.prob_method, eps) for eps in eps_list ] for fname in running_times_files: removeFile(fname) createRandomGraph(csv_fname, parameters.graph_method, n, parameters.er_p, parameters.ws_beta, parameters.ws_k) L = LinkServerCP('input/datasets/' + dataset, csv_fname, create_new=True, prob_method=parameters.prob_method, prob=[0.1, 0.01], delim='\t', undirected=1) # record loading time of link-server -- for interpolation removeFile(perf_output_fname) subprocess.Popen("perf stat -x, -o %s python load_link_server.py -cp %s"%\ (perf_output_fname, "input/datasets/" + dataset), shell = True, stdout = subprocess.PIPE).stdout.read() nCycles_link_server = getNumCycles(perf_output_fname) removeFile(perf_output_fname) #removeFile(csv_fname) V = xrange(n) for nSeeds in xrange(int(min_frac * n), int(max_frac * n),
def plot_k_vs_approx(): csv_files = [#'input/datasets/wiki-Vote-small.txt',\ #'input/datasets/email-Enron.txt',\ 'input/datasets/Epinions1.txt',\ 'input/datasets/Wiki-Vote_stripped.txt'] work_dir = 'experiments/results/' delims = ['\t', '\t'] n_values = [ LinkServerCP('tmp', csv_fname, create_new=True, delim=delims[i]).getNumNodes() for i, csv_fname in enumerate(csv_files) ] datasets = ['epinions', 'wiki-vote'] x_vals_lists, y_vals_lists, sem_lists = [], [], [] for i, dataset in enumerate(datasets): print "dataset: ", dataset n = n_values[i] min_frac, max_frac, step_frac = 0.01, 0.6, 0.02 k_min, k_max, step = int(n * min_frac), int(n * max_frac) + 1, int( n * step_frac) approx_files = [ file for file in os.listdir(work_dir) if file.startswith(dataset + "-approx_errors-k_min-%d-k_max-%d" % (k_min, k_max)) ] seq_files = [ file for file in os.listdir(work_dir) if file.startswith(dataset + "-seq-approx-errors-k") ] x_vals, y_data, errs = [], [], [] for k in range(k_min, k_max, step): print "k = ", k offsets = range(-30, 30) candidate_fnames = [ dataset + "-approx_errors-k_min-%d-k_max-%d-k-%d-samples-10" % (k_min, k_max, k + offset) for offset in offsets ] files_exist = [ candidate in approx_files for candidate in candidate_fnames ] assert any( files_exist), "candidate fnames: " + str(candidate_fnames) offset_idx = files_exist.index(True) approx_fname = candidate_fnames[offset_idx] seq_fname = dataset + "-seq-approx-errors-k-" + str( k + offsets[offset_idx]) + "-samples-10" print "approx_fname: ", approx_fname print "seq_fname: ", seq_fname k, mean_err, err = calculateMeanErrorAndSEM( work_dir + approx_fname, work_dir + seq_fname) x_vals.append(1.0 * k / n) y_data.append(mean_err) errs.append(err) x_vals_lists.append(x_vals) y_vals_lists.append(y_data) sem_lists.append(errs) assert len(x_vals) == len(y_data) assert len(x_vals) == len(errs) plot2d(x_vals_lists, y_vals_lists, sem_lists, datasets, [r'$k/n$', 'Approximation ratio'], '', 'experiments/figures/k_vs_approx_combined.pdf') print "Printing separate plots: " for i, dataset in enumerate(datasets): x_data = x_vals_lists[i] y_data = y_vals_lists[i] sem_list = sem_lists[i] plot2d(x_data, [y_data], [sem_list], [dataset], [r'$k/n$','Approximation ratio'], \ '', 'experiments/figures/' + dataset + '-k_vs_approx.pdf')
print "Input file: ", edges_csv print "Output file prefix: ", output delimiter = delim_dict[delim_option] start_time = time() bfs_method = 'seq' print "creating link-server object" if parameters.prob_method == 0: edge_prob = 0.2 else: edge_prob = [0.1, 0.01] L = LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=edge_prob, delim=delimiter, undirected=parameters.undirected) print "n = ", L.getNumNodes() V = LoadNodesFromFile(edges_csv, delimiter) n = len(V) k_min = int(n * min_frac) k_max = int(n * max_frac) + 1 k_step = int(n * interval) print "max_k = ", max_frac print "Minimum k value: %d, maximum k value: %d" % (k_min, k_max) removeFile(dataset) generateSeedFiles(k_min, k_max, k_step, V, nSamples, 'experiments/' + output + "-seeds-")
parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset#,\ #[float(x) for x in parameters.eps.split(',')], parameters.title seed() perf_fname = "runtimes" + str(random()) delim_dict = {0: '\t', 1: ' ', 2: ','} delimeter = delim_dict[delim_option] results_dir = 'experiments/results/' start_time = time() bfs_method = 'seq' print "creating link-server object" if delimeter == "\t": print "delimeter is tab" L = LinkServerCP(dataset, edges_csv, create_new=True, prob_method=2, prob=[0.1, 0.01], delim=delimeter) V = LoadNodesFromFile(edges_csv, delimeter) n = len(V) print "Number of nodes: ", n running_times_file = results_dir + output + '-running_times-ratios_k_min-%.3f-k_max-%.3f-samples-%d' % ( min_frac, max_frac, nSamples) running_times_file_raw = running_times_file + "-raw" removeFile(running_times_file) f = open(running_times_file, 'w') f_raw = open(running_times_file_raw, 'w') for nSeeds in xrange(int(min_frac * n), int(max_frac * n), int(interval * n)): print "k = ", nSeeds seeds_fname = output + "-seeds-" + str(nSeeds) + '.cp'
def print_out(text): if output_mode in [0, 2]: print text if output_mode in [1, 2]: f_output = open(output_file, 'a') f_output.write(text + '\n') f_output.close() if __name__ == "__main__": parameters = parser.parse_args() dataset, seeds_file, results_file, output_mode, nSamples = parameters.dataset, parameters.seeds_file, parameters.results_file,\ parameters.output_mode, parameters.nSamples L = LinkServerCP(dataset) f = open(seeds_file, 'r') assert (nSamples <= 0 or parameters.min_relative_standard_error <= 0) seeds_list = cp.load(f) f.close() removeFile(parameters.reached_nodes_file) if output_mode in [1, 2]: f = open(results_file, 'w') for i, seed_set in enumerate(seeds_list): if output_mode in [0, 2]: print "Sample ", i if parameters.get_n_reached == 1: try: avg, total_samples,l_n_reached = sequential_estimation(L, seed_set, max_samples_cap=nSamples, \ nCores=parameters.cores,bReturnValues = True,\