Пример #1
0
def write_story():
    output_text = [][:]

    # First, set up table of filenames
    section_filenames = [][:]
    for which_section in range(1, 1 + sections_in_chapter):
        section_filenames.append(
            '%s/%02d.txt' % (wandering_rocks_sections_path, which_section))

    log_it("INFO: filenames table set up")
    log_it("  length is %d" % len(section_filenames), 2)
    log_it("\n    and the filenames table is:\n" + pformat(section_filenames))

    stats_file = open(wandering_rocks_stats_file)
    the_line = stats_file.readline()  # Read and ignore the header line

    log_it(
        "INFO: header read from stats file, about to parse stats file and start generating text"
    )

    for which_section in range(1, 1 + sections_in_chapter):
        the_line = stats_file.readline(
        )  # Read another line from the stats file
        log_it("INFO: Parsing the line '%s'." % the_line.split(), 2)
        sec, pars, sents, words = map(int, the_line.split(','))
        log_it(
            "    sec: %d; pars: %d; sents: %d; words: %d" %
            (sec, pars, sents, words), 2)
        if sec != which_section:  # elementary sanity check
            raise IndexError(
                "The stats file for Wandering Rocks is corrupt: section number %d encountered out of order."
                % sec)
        log_it(
            "    generating based on sections %d, %d, %d." %
            (1 + (which_section + 17) % 19, which_section,
             (which_section + 1) % 19), 2)
        log_it(
            "      asking for %d sentences with paragraph break probability of %f."
            % (sents, pars / sents))

        which_rocks_sections = [
            section_filenames[1 + (which_section + 17) % 19 - 1],
            section_filenames[which_section - 1],
            section_filenames[(which_section + 1) % 19 - 1]
        ]

        section_genny = tg.TextGenerator(
            name="Wandering Rocks generator for section %d" % which_section)
        train_with_mixins(section_genny, chain_length, which_rocks_sections,
                          glob.glob('%s/*txt' % mixin_texts_dir))
        output_text.append(
            section_genny.gen_text(sentences_desired=sents,
                                   paragraph_break_probability=(pars / sents)))

    return '\n<center>*   *   *</center>\n'.join(output_text)
Пример #2
0
def write_generic_story(
        chain_length,
        chapter_length,  # In sentences
        sentences_per_paragraph,  # On average
        joyce_text_path,  # A list
        mixin_texts_dir,  # Full path
        joyce_ratio=1.2):
    genny = tg.TextGenerator()
    train_with_mixins(genny, chain_length, [joyce_text_path],
                      glob.glob('%s/*txt' % mixin_texts_dir), joyce_ratio)
    return genny.gen_text(
        sentences_desired=chapter_length,
        paragraph_break_probability=(1 / sentences_per_paragraph))
Пример #3
0
import os, glob, sys
sys.path.append('/UlyssesRedux/scripts/')
from directory_structure import *           # Gets us the listing of file and directory locations.

sys.path.append(markov_generator_path)
import text_generator as tg

import patrick_logger    # From https://github.com/patrick-brian-mooney/personal-library
from patrick_logger import log_it

patrick_logger.verbosity_level = 0
log_it("INFO: Imports successful, moving on", 2)

# Create the necessary sets of Markov chains once, at the beginning of the script's run
headlines_genny = tg.TextGenerator(name="Aeolus headlines generator")
headlines_genny.train(the_files=[aeolus_headlines_path], markov_length=headline_chain_length)

joyce_text_length = os.stat(aeolus_nonheadlines_path).st_size
mixin_texts_length = 0
articles_files = glob.glob('%s/07/*txt' % current_run_corpus_directory)
for which_file in articles_files:
    mixin_texts_length += os.stat(which_file).st_size
ratio = int(round( (mixin_texts_length / joyce_text_length) * joyce_ratio ))
articles_files = [aeolus_nonheadlines_path] * ratio + articles_files
articles_genny = tg.TextGenerator(name="Aeolus articles generator")
articles_genny.train(the_files=articles_files, markov_length=nonheadline_chain_length)

log_it("INFO: trained generators for both headlines and non-headlines files, moving on", 2)

def getParagraph(genny, num_sents, num_words):
Пример #4
0
 def output_to_terminal(self, group_parser):
     """ Outputs a listing of each symptom burst and its details to
         the terminal depending on the level of verbosity selected. """
     text_generator.TextGenerator(group_parser, self.flags.verbosity).write_output()
Пример #5
0
with open('/lovecraft/current-tags') as tagfile:
    the_tags = ', '.join([t.strip() for t in tagfile.readlines()])

story_length = random.choice(list(range(25, 71)))
the_content = ''


# Utility functions
def print_usage():    # Note that, currently, nothing calls this.
    """Print the docstring as a usage message to stdout"""
    patrick_logger.log_it("INFO: print_usage() was called")
    print(__doc__)

patrick_logger.log_it("INFO: tags and sentence lengths set up ...", 2)

genny = tg.TextGenerator(name='Lovecraft Generator')
genny.chains.read_chains(chains_file)

patrick_logger.log_it("INFO: chains read, starting run ...", 2)

# Next, pick out a title between 10 and 70 characters
the_length = 300
patrick_logger.log_it("INFO: getting a story title ...", 2)
while not 10 <= the_length <= 70:
    the_title = genny.gen_text().strip()
    the_length = len(the_title)
    patrick_logger.log_it("INFO: The story title generated was '%s'" % the_title, 2)
    patrick_logger.log_it("INFO:    And the length of that title is: " + str(the_length), 2)
    if the_title in open('/lovecraft/titles.txt').read():
        patrick_logger.log_it("That title's been used! Trying again ...\n\n\n")
        the_length = 300                                                            # Force the loop to grind through again
Пример #6
0
def write_story():
    corpora = {}.copy()

    log_it("INFO: about to start processing corpora.")

    for which_corpus in sorted(glob.glob(circe_corpora_path + '*txt')):
        log_it('  INFO: processing "%s".' % which_corpus, 2)
        corpus_name = os.path.basename(which_corpus)[:-4]
        genny = tg.TextGenerator(name="%s generator" % corpus_name)
        train_with_mixins(genny, chain_length, [which_corpus],
                          glob.glob('%s/*txt' % mixin_texts_dir))
        corpora[corpus_name] = genny

    log_it("DEBUGGING: Corpora are: \n" + pprint.pformat(corpora), 3)

    the_chapter = [][:]

    def get_speaker_text(speaker_name, num_sentences):
        if speaker_name in corpora:
            which_index = speaker_name
        elif speaker_name == 'STAGE':
            which_index = 'STAGE DIRECTIONS'
        else:
            which_index = 'MINOR CHARACTERS'
        return corpora[which_index].gen_text(sentences_desired=num_sentences,
                                             paragraph_break_probability=0)

    log_it("INFO: About to process stats file.")

    with open(circe_stats_path) as circe_stats_file:
        for the_encoded_paragraph in circe_stats_file:
            # Process each line, using it as a map of the corresponding paragraph in 'Circe'.
            # Structure of these lines is defined in /UlyssesRedux/scripts/utility_scripts/analyze-chapter-15.py.
            # But here's a quick reminder:
            # Two parts: first, the name of a speaker (or "STAGE" if it's a paragraph of stage directions)
            # Then, a series of codes for "chunks" of the paragraph.
            # A "chunk" is a number of sentences. If the number is preceded by opening parens, it's an intraparagraph stage direction.
            # Parts of the line, and chunk descriptions, are separated by vertical bars (pipe characters), hence the .psv extension.
            log_it(
                'INFO: Processing coded line "%s".' %
                the_encoded_paragraph.strip(), 2)
            code_to_process = the_encoded_paragraph.split('|')
            speaker_name = code_to_process.pop(0)
            log_it('  speaker name is "%s".' % speaker_name, 2)
            if speaker_name != 'STAGE':  # Unless the name is 'STAGE', add it to the beginning of this paragraph
                this_paragraph = '%s: ' % speaker_name
            else:  # In which case, begin with an opening parenthesis.
                this_paragraph = '('
            while len(code_to_process) > 0:
                chunk_descriptor = code_to_process.pop(0)
                log_it('    processing chunk "%s".' % chunk_descriptor.strip(),
                       2)
                if chunk_descriptor[0] == '(':
                    this_paragraph = this_paragraph + '(%s) ' % get_speaker_text(
                        'STAGE', int(chunk_descriptor[1:])).strip()
                else:
                    this_paragraph = this_paragraph + '%s ' % (
                        get_speaker_text(speaker_name, int(chunk_descriptor)))
                log_it(
                    '      current paragraph length is now %d.' %
                    len(this_paragraph), 3)
            if speaker_name == 'STAGE':
                this_paragraph = this_paragraph.strip() + ')'
            log_it(
                '        done with this paragraph; total length is %d.' %
                len(this_paragraph), 2)
            the_chapter.append(this_paragraph)

    return '\n'.join(the_chapter)
Пример #7
0
import text_generator as tg

import patrick_logger                 # From https://github.com/patrick-brian-mooney/personal-library
from patrick_logger import log_it

# First, set up constants
questions_chain_length = 1
answers_chain_length = 2
mixin_texts_dir = '%s17' % current_run_corpus_directory

patrick_logger.verbosity_level = 0
log_it("INFO: Imports successful, moving on", 2)

# Create the necessary sets of Markov chains once, at the beginning of the script's run

questions_genny = tg.TextGenerator(name="Ithaca questions generator")
questions_genny.train([ithaca_questions_path], markov_length=questions_chain_length)

answers_genny = tg.TextGenerator(name="Ithaca answers generator")
train_with_mixins(answers_genny, joyce_text_list=[ithaca_answers_path], mixin_texts_list=glob.glob('%s/*txt' %
                  mixin_texts_dir), chain_length=answers_chain_length)




log_it("INFO: trained generators for both questions and answers; moving on ...", 2)

# Unlike the 'Aeolus' script, this script makes no effort to enforce sticking within word-limit boundaries.
# You can see that in the next two routines, which just call sentence_generator.gen_text() directly.

def getQuestion(num_sents, num_words):