def get_text_generator(max_string_length, alphabet=None): """Generates a lists of tuples of the form (category, content) where category is always "characters." """ gen = essential_generators.DocumentGenerator() while True: sentence = ''.join([s for s in gen.sentence() if (alphabet is None or s in alphabet)])[:max_string_length] yield [('characters', sentence)]
def get_text_generator(alphabet=None, lowercase=False, max_string_length=None): """Generates strings of sentences using only the letters in alphabet. Args: alphabet: The alphabet of permitted characters lowercase: Whether to convert all strings to lowercase. max_string_length: The maximum length of the string """ gen = essential_generators.DocumentGenerator() while True: sentence = gen.sentence() if lowercase: sentence = sentence.lower() sentence = ''.join([s for s in sentence if (alphabet is None or s in alphabet)]) if max_string_length is not None: sentence = sentence[:max_string_length] yield sentence