def generate_words(cluster_sz,
                   english_vector=english_vector,
                   normed_eng=normed_eng):
    print "generating english vector of cluster size", cluster_sz
    # generate english vector
    #english_vector = random_idx.generate_RI_text_words(N, RI_letters, './lang_texts/texts_english/eng.txt')
    # generate new string of letters
    length = 30
    alphy = utils.generate_ordered_clusters(alph, cluster_sz=cluster_sz)
    gstr = alph[np.random.randint(len(alph))]
    temp_str = gstr
    for i in xrange(length):
        max_idx = 0
        maxabs = 0
        for j in xrange(len(alphy)):
            temp_str = gstr + alphy[j]
            temp_id = random_idx.generate_RI_str(N, RI_letters, cluster_sz,
                                                 ordered, temp_str)
            #temp_id += 1e1*np.random.randn(1,N)
            temp_id /= np.linalg.norm(temp_id)
            absy = np.abs(temp_id.dot(normed_eng.T))
            #print temp_str, absy
            if absy > maxabs:
                max_idx = j
                maxabs = absy
        gstr += alphy[max_idx]
        print len(gstr), maxabs, gstr
def generate_words(cluster_sz, english_vector=english_vector, normed_eng=normed_eng):
        print "generating english vector of cluster size", cluster_sz
        # generate english vector
        #english_vector = random_idx.generate_RI_text_words(N, RI_letters, './lang_texts/texts_english/eng.txt')
        # generate new string of letters
        length = 30
        alphy = utils.generate_ordered_clusters(alph, cluster_sz=cluster_sz)
        gstr = alph[np.random.randint(len(alph))]
        temp_str = gstr
        for i in xrange(length):
                max_idx = 0
                maxabs = 0
                for j in xrange(len(alphy)):
                        temp_str = gstr + alphy[j]
                        temp_id = random_idx.generate_RI_str(N,RI_letters,cluster_sz,ordered,temp_str)
                        #temp_id += 1e1*np.random.randn(1,N)
                        temp_id /= np.linalg.norm(temp_id)
                        absy = np.abs(temp_id.dot(normed_eng.T))
                        #print temp_str, absy
                        if absy > maxabs:
                                max_idx = j
                                maxabs = absy
                gstr += alphy[max_idx]
                print len(gstr), maxabs, gstr
def log_generate_RI_str(N, RI_letters, cluster_sz, ordered, string, alph=alphabet):
    # generate RI vector for string
    text_vector = random_idx.generate_RI_str(N, RI_letters, cluster_sz, ordered, string, alph)
    text_vector = np.log2(text_vector)
    return text_vector
def log_generate_RI_str(N, RI_letters, cluster_sz, ordered, string, alph=alphabet):
    # generate RI vector for string
    text_vector = random_idx.generate_RI_str(N, RI_letters, cluster_sz, ordered, string, alph)
    text_vector = np.log2(text_vector)
    return text_vector