def seed(): filepath = "preprocessed_texts/english/alice-only-spaced.txt" # lvu.initialize() lv, lang_vectors, n_gram_frequencies = lvu.initialize_from_file() vocab_vec, max_word_length = lvu.vocab_vector(lv, lang_vectors) vocab = lvu.vocab(max_word_length) filepath = "preprocessed_texts/english/alice-only-stream.txt" # a_christmas_carol.txt" text = read_file(filepath) processed_indices = dict_explain_away(vocab, max_word_length, text) processed = tuples_to_text(processed_indices, text) fwrite = open("intermediate/processed_dict_explain_away_results.txt", "w") fwrite.write(processed) fwrite.close() # now for the em # not necessary in seeding phase. discovered_words = hard_em_discover_words(processed_indices, text, vocab, lv) record_results(text, processed_indices, discovered_words, "output/explain_away_results.txt") # save data to file lvu.write_data_structures( [lv, lang_vectors, n_gram_frequencies, vocab_vec, vocab], [ "intermediate/lookup_lv", "intermediate/lookup_lang_vectors", "intermediate/lookup_n_gram_frequencies", "intermediate/lookup_vocab_vec", "intermediate/lookup_vocab", ], )
def seed(): filepath = "preprocessed_texts/english/alice-only-spaced.txt" #lvu.initialize() lv, lang_vectors, n_gram_frequencies = lvu.initialize_from_file() vocab_vec, max_word_length = lvu.vocab_vector(lv, lang_vectors) vocab_dict = lvu.vocab_dict(max_word_length) filepath = "preprocessed_texts/english/alice-only-stream.txt"#a_christmas_carol.txt" aea = dict_explain_away(vocab_dict,max_word_length,filepath) #file = open("intermediate/processing_array_explain_away_results","w") #file.write(aea) #file.close() #now for the em #not necessary in seeding phase. #hed = hard_em_discover_words(aea, vocab_array, max_length, filepath) #file = open("../output/processed_array_explain_away_results","w") #file.write(hed) #file.close() # save data to file lvu.write_data_structures([lv, lang_vectors, n_gram_frequencies, vocab_vec, vocab_dict], \ ["intermediate/lookup_lv", "intermediate/lookup_lang_vectors", \ "intermediate/lookup_n_gram_frequencies", "intermediate/lookup_vocab_vec", \ "intermediate/lookup_vocab_dict"])
def seed(): filepath = "preprocessed_texts/english/alice-only-spaced.txt" #lvu.initialize() lv, lang_vectors, n_gram_frequencies = lvu.initialize_from_file() vocab_vec, max_word_length = lvu.vocab_vector(lv, lang_vectors) vocab = lvu.vocab(max_word_length) filepath = "preprocessed_texts/english/alice-only-stream.txt" #a_christmas_carol.txt" text = read_file(filepath) processed_indices = dict_explain_away(vocab, max_word_length, text) processed = tuples_to_text(processed_indices, text) fwrite = open("intermediate/processed_dict_explain_away_results.txt", "w") fwrite.write(processed) fwrite.close() # now for the em # not necessary in seeding phase. discovered_words = hard_em_discover_words(processed_indices, text, vocab, lv) record_results(text, processed_indices, discovered_words, "output/explain_away_results.txt") # save data to file lvu.write_data_structures([lv, lang_vectors, n_gram_frequencies, vocab_vec, vocab], \ ["intermediate/lookup_lv", "intermediate/lookup_lang_vectors", \ "intermediate/lookup_n_gram_frequencies", "intermediate/lookup_vocab_vec", \ "intermediate/lookup_vocab"])