Пример #1
0
def make_clean_paper_author(confirmed_pairs, paper_author_pairs,
                            output_file_path):
    with open(output_file_path, 'w+') as f:
        print >> f, 'PaperId,AuthorId,Name,Affiliation'
        for idx in confirmed_pairs:
            print >> f, '%d,%d,%s,%s' % (
                paper_author_pairs[idx]['paper_id'],
                paper_author_pairs[idx]['author_id'],
                paper_author_pairs[idx]['author_name'],
                paper_author_pairs[idx]['author_affiliation'])


if __name__ == '__main__':
    if len(sys.argv) != 1 + 5:
        print >> sys.stderr, "Usage : %s Train.csv PaperAuthor.csv Test.csv label output_file" % (
            sys.argv[0], )
        sys.exit(-1)

    train_csv = loader.load_train_csv(sys.argv[1])
    train_csv_dict = make_train_csv_dict(train_csv)
    paper_author_csv = loader.load_paper_author_csv(sys.argv[2])
    test_csv = loader.load_test_csv(sys.argv[3])
    labels = load_labels(sys.argv[4])
    print 'Data loaded!'
    confirmed_pairs = get_confirmed_pairs(get_author_in_train(train_csv),
                                          train_csv_dict, test_csv, labels)
    pair_idxs = filter_paper_author_csv(paper_author_csv, confirmed_pairs)
    make_clean_paper_author(pair_idxs, paper_author_csv, sys.argv[5])
    sys.exit(0)
Пример #2
0
def make_train_csv_dict(train_csv):
    train_dict = dict()
    for author in train_csv:
        train_dict[author['author_id']] = author

    return train_dict

def make_clean_paper_author(confirmed_pairs, paper_author_pairs, output_file_path):
    with open(output_file_path, 'w+') as f:
        print >> f, 'PaperId,AuthorId,Name,Affiliation'
        for idx in confirmed_pairs:
            print >> f, '%d,%d,%s,%s' % (paper_author_pairs[idx]['paper_id'], paper_author_pairs[idx]['author_id'], paper_author_pairs[idx]['author_name'], paper_author_pairs[idx]['author_affiliation'])

if __name__ == '__main__':
    if len(sys.argv) != 1 + 5:
        print >> sys.stderr, "Usage : %s Train.csv PaperAuthor.csv Test.csv label output_file" % (sys.argv[0], )
        sys.exit(-1)

    train_csv = loader.load_train_csv(sys.argv[1])
    train_csv_dict = make_train_csv_dict(train_csv)
    paper_author_csv = loader.load_paper_author_csv(sys.argv[2])
    test_csv = loader.load_test_csv(sys.argv[3])
    labels = load_labels(sys.argv[4])
    print 'Data loaded!'
    confirmed_pairs = get_confirmed_pairs(get_author_in_train(train_csv),
                                            train_csv_dict, test_csv, labels)
    pair_idxs = filter_paper_author_csv(paper_author_csv, confirmed_pairs)
    make_clean_paper_author(pair_idxs, paper_author_csv, sys.argv[5])
    sys.exit(0)

    return authors

def make_test_file(authors, test_file_path):
    with open(test_file_path, 'w+') as f:
        f.write("AuthorId,PaperIds\n")
        for author_id in authors:
            f.write(str(author_id) + ',')
            f.write(' '.join([str(x) for x in authors[author_id]]))
            f.write('\n')



if __name__ == '__main__':
    if len(sys.argv) != 1 + 4:
        print >> sys.stderr, "Usage : %s Author.csv Paper.csv PaperAuthor.csv OutputFile" % (sys.argv[0],)
        sys.exit(-1)

    authors = loader.load_author_csv(sys.argv[1])
    papers = loader.load_paper_csv(sys.argv[2])
    paper_author_pairs = loader.load_paper_author_csv(sys.argv[3])

    print 'Data loaded'
    print 'Generating'
    test_pairs = generate_pairs(authors, papers, paper_author_pairs)
    print 'Saving'
    make_test_file(test_pairs, sys.argv[4])