# Create p_stemmer of class PorterStemmer p_stemmer = PorterStemmer() num_topics = 50 passes = 20 topn = 10 k = 5000 N = 10000 # cluster_sizes is mapping to n-gram size # cluster_sz in random_idx referring to specific element (int) in cluster_sizes, array cluster_sizes = [1, 2, 3, 4, 5, 6, 7, 8] ordered = 1 #assuming this is the alphabet bc of precedent in generate_text.py #alph = 'abc' alphabet = string.lowercase + ' ' RI_letters = random_idx.generate_letter_id_vectors(N, k, alphabet) def create_doc_set(path, files): doc_set = [] for filename in files: f = open(path + filename, "r") doc_set.append(f.read()) f.close() return doc_set def tokenize(doc_set): # list for tokenized documents in loop texts = [] # loop through document list
# Create p_stemmer of class PorterStemmer p_stemmer = PorterStemmer() num_topics = 50 passes = 20 topn = 10 k = 5000 N = 10000 # cluster_sizes is mapping to n-gram size # cluster_sz in random_idx referring to specific element (int) in cluster_sizes, array cluster_sizes = [1, 2, 3, 4, 5, 6, 7, 8] ordered = 1 # assuming this is the alphabet bc of precedent in generate_text.py # alph = 'abc' alphabet = string.lowercase + " " RI_letters = random_idx.generate_letter_id_vectors(N, k, alphabet) def create_doc_set(path, files): doc_set = [] for filename in files: f = open(path + filename, "r") doc_set.append(f.read()) f.close() return doc_set def tokenize(doc_set): # list for tokenized documents in loop texts = [] # loop through document list
return trellis """ The interface that you should interact with """ def wrapper_garden(s, n, training = 0): return garden_path(s, postprocessed_array1, postprocessed_array2, n, "", training) try: postprocessed_array1 = np.load('../output/log/preprocessedngrams_100k.p') lv1 = np.load('../output/log/lv_100k.p') except Exception as e: lv1 = ri.generate_letter_id_vectors(N,N/2); lv1.dump('../output/log/lv_100k.p') postprocessed_array1 = np.array(llv.load_ngram_vector(text, lv1, 6)) #creates the array up to 5 grams postprocessed_array1.dump('../output/log/preprocessedngrams_100k.p'); print "finished generating" try: postprocessed_array2 = np.load('../output/log/preprocessedngrams_2_100k.p') lv2 = np.load('../output/log/lv_2_100k.p') except Exception as e: lv2 = ri.generate_letter_id_vectors(N,N/2); lv2.dump('../output/log/lv_2_100k.p') postprocessed_array2 = np.array(llv.load_ngram_vector(text, lv2, 6)) #creates the array up to 5 grams postprocessed_array2.dump('../output/log/preprocessedngrams_2_100k.p'); print "finished generating"