示例#1
0
def worker(proc_num, queue, out_pref, in_dir, word_list, displacement_base,
           thresh):
    while True:
        try:
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break

        print proc_num, "Loading matrices..."
        base = simple_create_representation(REP_TYPE,
                                            in_dir + str(year - 1) + ".bin",
                                            restricted_context=word_list[year -
                                                                         1],
                                            thresh=thresh)
        delta = simple_create_representation(
            REP_TYPE,
            in_dir + str(year) + ".bin",
            restricted_context=word_list[year],
            thresh=thresh)
        print proc_num, "Getting deltas..."
        year_vols = get_cosine_deltas(base, delta, word_list[year])
        year_disp = get_cosine_deltas(displacement_base, delta,
                                      word_list[year])
        print proc_num, "Writing results..."
        ioutils.write_pickle(year_vols, out_pref + str(year) + "-vols.pkl")
        ioutils.write_pickle(year_disp, out_pref + str(year) + "-disps.pkl")
示例#2
0
def worker(proc_num, queue, out_pref, in_dir, year_index_infos, knn, thresh):
    random.shuffle(years)
    print proc_num, "Start loop"
    while True:
        try:
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break
        print proc_num, "Making second orders for year", year
        old_embed = simple_create_representation(REP_TYPE,
                                                 in_dir + str(year) + ".bin",
                                                 thresh=thresh)
        old_embed = old_embed.get_subembed(year_index_infos[year]["list"])
        old_mat = old_embed.m.tocoo()
        row_d, col_d, data_d, keep_rows = make_secondorder_mat(
            old_mat, thresh=thresh, min_cooccurs=0, shrink_mat=False)
        second_mat = coo_matrix((data_d, (row_d, col_d)))
        if knn != None:
            row_d, col_d, data_d = make_knn_mat(second_mat, knn)
            second_mat = coo_matrix((data_d, (row_d, col_d)))
        year_stats = get_year_stats(second_mat,
                                    old_embed.wi,
                                    old_embed.iw,
                                    stats=STATS)
        print proc_num, "Writing stats for year", year
        ioutils.write_pickle(year_stats, out_pref + str(year) + "-tmp.pkl")
示例#3
0
def worker(proc_num, queue, out_pref, in_dir, word_list, displacement_base, thresh):
    while True:
        try: 
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break
       
        print proc_num, "Loading matrices..."
        base = simple_create_representation(REP_TYPE, in_dir + str(year-1) + ".bin", restricted_context=word_list[year-1], thresh=thresh)
        delta = simple_create_representation(REP_TYPE, in_dir + str(year) + ".bin", restricted_context=word_list[year], thresh=thresh)
        print proc_num, "Getting deltas..."
        year_vols = get_cosine_deltas(base, delta, word_list[year])
        year_disp = get_cosine_deltas(displacement_base, delta, word_list[year])
        print proc_num, "Writing results..."
        ioutils.write_pickle(year_vols, out_pref + str(year) + "-vols.pkl")
        ioutils.write_pickle(year_disp, out_pref + str(year) + "-disps.pkl")
def worker(proc_num, queue, out_pref, in_dir, year_index_infos, knn, thresh):
    random.shuffle(years)
    print proc_num, "Start loop"
    while True:
        try: 
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break
        print proc_num, "Making second orders for year", year
        old_embed = simple_create_representation(REP_TYPE, in_dir + str(year) + ".bin", thresh=thresh)
        old_embed = old_embed.get_subembed(year_index_infos[year]["list"])
        old_mat = old_embed.m.tocoo()
        row_d, col_d, data_d, keep_rows = make_secondorder_mat(old_mat, thresh=thresh, min_cooccurs=0, shrink_mat=False)
        second_mat = coo_matrix((data_d, (row_d, col_d)))
        if knn != None:
            row_d, col_d, data_d = make_knn_mat(second_mat, knn)
            second_mat = coo_matrix((data_d, (row_d, col_d)))
        year_stats = get_year_stats(second_mat, old_embed.wi, old_embed.iw, stats=STATS)
        print proc_num, "Writing stats for year", year
        ioutils.write_pickle(year_stats,  out_pref + str(year) + "-tmp.pkl")
示例#5
0
    for p in procs:
        p.join()
    print "Merging"
    full_word_set = set([])
    for year_words in word_list.itervalues():
        full_word_set = full_word_set.union(set(year_words))
    merge(out_pref, years, list(full_word_set))

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Computes semantic change statistics for words.")
    parser.add_argument("dir", help="path to network data (also where output goes)")
    parser.add_argument("word_file", help="path to sorted word file")
    parser.add_argument("num_procs", type=int, help="number of processes to spawn")
    parser.add_argument("--num-words", type=int, help="Number of words (of decreasing average frequency) to include", default=-1)
    parser.add_argument("--start-year", type=int, help="start year (inclusive)", default=START_YEAR)
    parser.add_argument("--end-year", type=int, help="end year (inclusive)", default=END_YEAR)
    parser.add_argument("--disp-year", type=int, help="year to measure displacement from", default=END_YEAR)
    parser.add_argument("--thresh", type=float, help="relevance threshold", default=THRESH)
    args = parser.parse_args()
    years = range(args.start_year, args.end_year + 1)
    word_lists = ioutils.load_year_words(args.word_file, years)
    if args.num_words != -1:
        for year in years:
            word_lists[year] = word_lists[year][:args.num_words]
    ioutils.mkdir(args.dir + "/volstats")
    outpref ="/volstats/" + args.word_file.split("/")[-1].split(".")[0] + "-" + str(args.thresh)
    if args.num_words != -1:
        outpref += "-top" + str(args.num_words)
    displacement_base = simple_create_representation(REP_TYPE, args.dir + "/" +  str(args.disp_year) + ".bin", restricted_context=word_lists[args.end_year], thresh=args.thresh)
    run_parallel(args.num_procs, args.dir + outpref, args.dir + "/", years[1:], word_lists, displacement_base, args.thresh)       
示例#6
0
    parser.add_argument("--end-year",
                        type=int,
                        help="end year (inclusive)",
                        default=END_YEAR)
    parser.add_argument("--disp-year",
                        type=int,
                        help="year to measure displacement from",
                        default=END_YEAR)
    parser.add_argument("--thresh",
                        type=float,
                        help="relevance threshold",
                        default=THRESH)
    args = parser.parse_args()
    years = range(args.start_year, args.end_year + 1)
    word_lists = ioutils.load_year_words(args.word_file, years)
    if args.num_words != -1:
        for year in years:
            word_lists[year] = word_lists[year][:args.num_words]
    ioutils.mkdir(args.dir + "/volstats")
    outpref = "/volstats/" + args.word_file.split("/")[-1].split(
        ".")[0] + "-" + str(args.thresh)
    if args.num_words != -1:
        outpref += "-top" + str(args.num_words)
    displacement_base = simple_create_representation(
        REP_TYPE,
        args.dir + "/" + str(args.disp_year) + ".bin",
        restricted_context=word_lists[args.end_year],
        thresh=args.thresh)
    run_parallel(args.num_procs, args.dir + outpref, args.dir + "/", years[1:],
                 word_lists, displacement_base, args.thresh)