print('Type a seed word or sentence:') seed = raw_input() print('Start generating text for the seed "{0}"...'.format(seed)) # write seed to temporary file out = io.open('tmp', 'w') out.write(u'{0}'.format(seed)) out.close() config['predict_next'] = 'tmp' if 'char' in config: data = lm_data.charSentenceDataRescore(config, eval_config, TRAIN, VALID, TEST) else: data = lm_data.wordSentenceDataRescore(config, eval_config, TRAIN, VALID, TEST) all_data, vocab_size, _ = data.get_data() # character-level training, in batches (across sentence boundaries) elif 'char' in config: print('Character-level data') if 'per_sentence' in config: print('Sentence per sentence') data = lm_data.charSentenceData(config, eval_config, TRAIN, VALID, TEST) all_data, vocab_size, total_length, seq_lengths = data.get_data() config['num_steps'] = total_length # write maximum sentence length to file
# character-level training, in batches (cross sentence boundaries) if 'char' in config: data = lm_data.charData(config, eval_config) all_data, vocab_size, _ = data.get_data() # word-level training, on sentence level (sentences are padded until maximum sentence length) elif 'per_sentence' in config: if 'rescore' in config: max_length = int( open('{0}max_length'.format( config['trained_model'])).readlines()[0].strip()) # set num_steps = total length of each (padded) sentence config['num_steps'] = max_length data = lm_data.wordSentenceDataRescore(config, eval_config) all_data, vocab_size, _ = data.get_data() else: data = lm_data.wordSentenceData(config, eval_config) all_data, vocab_size, total_length, seq_lengths = data.get_data() # set num_steps = total length of each (padded) sentence config['num_steps'] = total_length print('Write max length of sentence to {0}max_length'.format( config['save_path'])) # write maximum sentence length to file max_length_f = open('{0}max_length'.format(config['save_path']), 'w')