def run_test(): import problem_unittests as t t.test_create_lookup_tables(create_lookup_tables) t.test_get_batches(get_batches) t.test_tokenize(token_lookup) t.test_get_inputs(get_inputs) t.test_get_init_cell(get_init_cell) t.test_get_embed(get_embed) t.test_build_rnn(build_rnn) t.test_build_nn(build_nn) t.test_get_tensors(get_tensors) t.test_pick_word(pick_word)
Create lookup tables for vocabulary :param text: The text of tv scripts split into words :return: A tuple of dicts (vocab_to_int, int_to_vocab) """ word_counts = Counter(text) # sorting the words from most to least frequent in text occurrence sorted_vocab = sorted(word_counts, key=word_counts.get, reverse=True) # create int_to_vocab dictionaries int_to_vocab = {ii: word for ii, word in enumerate(sorted_vocab)} vocab_to_int = {word: ii for ii, word in int_to_vocab.items()} # return tuple return (vocab_to_int, int_to_vocab) tests.test_create_lookup_tables(create_lookup_tables) def token_lookup(): """ Generate a dict to turn punctuation into a token. :return: Tokenized dictionary where the key is the punctuation and the value is the token """ tokens = dict() tokens['.'] = '<PERIOD>' tokens[','] = '<COMMA>' tokens['"'] = '<QUOTATION_MARK>' tokens[';'] = '<SEMICOLON>' tokens['!'] = '<EXCLAMATION_MARK>' tokens['?'] = '<QUESTION_MARK>' tokens['('] = '<LEFT_PAREN>' tokens[')'] = '<RIGHT_PAREN>'
def test_lookup_tables(self): test_create_lookup_tables(create_lookup_tables=create_maps)