示例#1
0
def node_presence(graphs, path):
    

if __name__ == '__main__':
    if len(sys.argv) != 3:
        print 'usage: make_paths <filename> <npaths>'
        sys.exit(-1)
    filename = sys.argv[1]
    npaths = int(sys.argv[2])
    print 'loading clean training files'
    edge_lists = loader.load_train_files(train_format, file_low, file_high)
    graphs = loader.get_graphs_from_edge_lists(edge_lists)
    print 'making paths file %s' % filename
    with open(filename, 'w') as outfile:
        for i in range(npaths):
            outfile.write(delim.join(random_shortest_path(graphs)))
            outfile.write('\n')
示例#2
0
import numpy as np
import loader
from collections import defaultdict

train_file_format = 'train/train%d.txt'
train_file_low = 1
train_file_high = 15
test_times = 5
paths_file = 'paths.txt'
submission_file = 'submission.csv'

if __name__ == '__main__':

    print 'loading data'
    paths = loader.load_paths_file(paths_file)
    graphs = loader.load_train_files(train_file_format,
                                     train_file_low, train_file_high)
    m = len(paths)
    n = test_times
    # pred = np.zeros((m,n))
    pred = np.random.rand(m,n)

    print 'training node name decoder model'
    print 'get all nodes'
    names = loader.get_all_nodes_dict(graphs, paths)
    print 'make word lookup'
    word_lookup = loader.make_word_lookup(names)
    print 'get name found'
    name_found, edits = loader.get_name_found(names, word_lookup)
    
    actual = defaultdict(int)
    corrected = defaultdict(int)
示例#3
0
def write_analysis_matrix(data, pred, filename):
    m, nd = data.shape
    m, np = pred.shape
    with open(filename, 'w') as f:
        for i in range(m):
            for j in range(nd):
                f.write('%d,' % data[i,j])
            for j in range(np):
                f.write('%f,' % pred[i,j])
            f.write('\n')

if __name__ == '__main__':

    print 'loading clean training files'
    edge_lists = loader.load_train_files(train_format, file_low, file_high)
    graphs = loader.get_graphs_from_edge_lists(edge_lists)
    
    print 'loading paths file'
    paths = loader.load_paths_file(paths_file)

    print 'loading shortest path files'
    scosts = loader.load_shortest_path_costs(costs_format, file_low, file_high)

    print 'computing path costs'
    pcosts = loader.compute_path_costs(graphs, paths)

    print 'getting shortest path matrix'
    data = loader.get_shortest_path_matrix(graphs, paths, scosts, pcosts)

    print 'running logistic regression'