def load_matrix(f, thresh=None): if f.endswith('.bin'): if thresh == None: return matstore.retrieve_mat_as_coo(f, min_size=250000).tocsr() else: return matstore.retrieve_mat_as_coo_thresh(f, thresh, min_size=250000).tocsr() if not f.endswith('.npz'): f += '.npz' loader = np.load(f) return csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape'])
def load_matrix(f, thresh=None): if f.endswith('.bin'): if thresh == None: return matstore.retrieve_mat_as_coo(f, min_size=250000).tocsr() else: return matstore.retrieve_mat_as_coo_thresh( f, thresh, min_size=250000).tocsr() if not f.endswith('.npz'): f += '.npz' loader = np.load(f) return csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape'])
def main(proc_num, lock, out_pref, tmp_dir, in_dir, years, word_infos, thresh): random.shuffle(years) print proc_num, "Start loop" while True: lock.acquire() work_left = False for year in years: existing_files = set(os.listdir(tmp_dir)) fname = str(year) + "-tmp.pkl" if fname in existing_files: continue work_left = True print proc_num, "year", year with open(tmp_dir + fname, "w") as fp: fp.write("") fp.close() break lock.release() if not work_left: print proc_num, "Finished" break print proc_num, "Retrieving mat for year", year if thresh != None: mat = matstore.retrieve_mat_as_coo_thresh(in_dir + str(year) + ".bin", thresh) else: mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin") mat.setdiag(0) if word_infos != None: word_indices = word_infos[year][1] indices = word_indices[word_indices < min(mat.shape[1], mat.shape[0])] else: indices = np.arange(mat.shape[0]) year_graph = make_snap_graph(indices, mat) print proc_num, "Getting statistics for year", year year_stats = compute_graph_stats(year_graph) rewire_year_stats = compute_graph_stats(snap.GenRewire(year_graph, REWIRE_EDGE_SWITCHES)) ioutils.write_pickle(year_stats, tmp_dir + fname) ioutils.write_pickle(rewire_year_stats, tmp_dir + "rewire" + fname)