def create_lang_vec(filename, lv, cluster_sizes, N=N, k=k): total_lang = np.zeros((1,N)) # generate english vector for cz in cluster_sizes: print "generating language vector of cluster size", cz # which alphabet to use lang_vector = random_idx.generate_RI_text_fast(N, lv, cz, ordered, filename, alphabet)#"preprocessed_texts/AliceInWonderland.txt", alph) total_lang += lang_vector return total_lang
def log_generate_RI_text_fast(N, RI_letters, cluster_sz, ordered, text_name, alph=alphabet): text_vector = random_idx.generate_RI_text_fast(N, RI_letters, cluster_sz, ordered, text_name, alph) text_vector = np.log2(text_vector) return text_vector