def node_presence(graphs, path): if __name__ == '__main__': if len(sys.argv) != 3: print 'usage: make_paths <filename> <npaths>' sys.exit(-1) filename = sys.argv[1] npaths = int(sys.argv[2]) print 'loading clean training files' edge_lists = loader.load_train_files(train_format, file_low, file_high) graphs = loader.get_graphs_from_edge_lists(edge_lists) print 'making paths file %s' % filename with open(filename, 'w') as outfile: for i in range(npaths): outfile.write(delim.join(random_shortest_path(graphs))) outfile.write('\n')
import numpy as np import loader from collections import defaultdict train_file_format = 'train/train%d.txt' train_file_low = 1 train_file_high = 15 test_times = 5 paths_file = 'paths.txt' submission_file = 'submission.csv' if __name__ == '__main__': print 'loading data' paths = loader.load_paths_file(paths_file) graphs = loader.load_train_files(train_file_format, train_file_low, train_file_high) m = len(paths) n = test_times # pred = np.zeros((m,n)) pred = np.random.rand(m,n) print 'training node name decoder model' print 'get all nodes' names = loader.get_all_nodes_dict(graphs, paths) print 'make word lookup' word_lookup = loader.make_word_lookup(names) print 'get name found' name_found, edits = loader.get_name_found(names, word_lookup) actual = defaultdict(int) corrected = defaultdict(int)
def write_analysis_matrix(data, pred, filename): m, nd = data.shape m, np = pred.shape with open(filename, 'w') as f: for i in range(m): for j in range(nd): f.write('%d,' % data[i,j]) for j in range(np): f.write('%f,' % pred[i,j]) f.write('\n') if __name__ == '__main__': print 'loading clean training files' edge_lists = loader.load_train_files(train_format, file_low, file_high) graphs = loader.get_graphs_from_edge_lists(edge_lists) print 'loading paths file' paths = loader.load_paths_file(paths_file) print 'loading shortest path files' scosts = loader.load_shortest_path_costs(costs_format, file_low, file_high) print 'computing path costs' pcosts = loader.compute_path_costs(graphs, paths) print 'getting shortest path matrix' data = loader.get_shortest_path_matrix(graphs, paths, scosts, pcosts) print 'running logistic regression'