示例#1
0
def check_ping_config():
    """Verify that the user-supplied PING configuration results in a useful executable
    command.
    """
    def report_ping_error(exit_code=0, error=None, ping_transcript=""):
        log_it("WARNING: unable to validate ping configuration.", 0)
        if exit_code: log_it("       The return code was: %s" % exit_code, 0)
        if error: log_it("       The system complained: %s" % error, 0)
        log_it(
            "       Please check the ping configuration at the top of reporter.py",
            0)
        if ping_transcript:
            log_it(
                "\n\n       Transcript of interaction with PING executable:\n\n%s"
                % ping_transcript, 2)

    log_it("INFO: testing ping configuration", 3)
    try:
        ping_command = "%s %s %s" % (ping_exec, ping_count_flag % 1,
                                     ping_target)
        status, result = subprocess.getstatusoutput(ping_command)
        log_it(
            "INFO: successfully ran the test ping command `%s`" % ping_command,
            3)
    except BaseException as e:
        report_ping_error(error=e)
def gen_text(the_mapping, starts, markov_length=1, sentences_desired=1, is_html=False, paragraph_break_probability = 0.25):
    """Actually generate the text."""
    log_it("gen_text() called.", 4)
    log_it("  Markov length is %d; requesting %d sentences." % (markov_length, sentences_desired), 4)
    log_it("  Legitimate starts: %s" % starts, 5)
    log_it("  Probability data: %s" % the_mapping, 5)
    if is_html:
        log_it("  -- and we're generating an HTML fragment.", 3)
        the_text = "<p>"
    else:
        the_text = ""
    if sentences_desired > 0:
        for which_sentence in range(0, sentences_desired):
            try:
                if the_text[-1] != "\n" and the_text[-3:] != "<p>":
                    the_text = the_text + " "   # Add a space to the end if we're not starting a new paragraph.
            except IndexError:
                pass        # If the string is so far empty, well, just move forward. We don't need to add a space to the beginning of the text, anyway.
            the_text = the_text + genSentence(markov_length, the_mapping, starts)
            if random.random() <= paragraph_break_probability:
                if is_html:
                    the_text = the_text.strip() + "</p>\n\n<p>"
                else:
                    the_text = the_text.strip() + "\n\n"
    if is_html:
        the_text = the_text + "</p>"
    for which_replacement in final_substitutions:
        the_text = the_text.replace(which_replacement[0], which_replacement[1])
    return the_text
示例#3
0
 def _gen_sentence(self):
     """Build a sentence, starting with a random 'starting word.' Returns a string,
     which is the generated sentence.
     """
     assert self.is_trained(
     ), "ERROR: the model %s needs to be trained before it can generate text" % self
     log_it("      _gen_sentence() called.", 4)
     log_it("        markov_length = %d." % self.chains.markov_length, 5)
     log_it("        the_mapping = %s." % self.chains.the_mapping, 5)
     log_it("        the_starts = %s." % self.chains.the_starts, 5)
     log_it(
         "        allow_single_character_sentences = %s." %
         self.allow_single_character_sentences, 5)
     curr = random.choice(self.chains.the_starts)
     sent = curr
     prevList = [curr]
     # Keep adding words until we hit a period, exclamation point, or question mark
     while curr not in sentence_ending_punct:
         curr = self.next(prevList, self.chains.the_mapping)
         prevList.append(curr)
         # if the prevList has gotten too long, trim it
         while len(prevList) > self.chains.markov_length:
             prevList.pop(0)
         if not self.chains.character_tokens:  # Don't add spaces between tokens that are just single characters.
             if curr not in punct_with_no_space_before:
                 if (len(prevList) < 2
                         or prevList[-2] not in punct_with_no_space_after):
                     sent += " "  # Add spaces between words (but not punctuation)
         sent += curr
     if not self.allow_single_character_sentences:
         if len(sent.strip().strip(sentence_ending_punct).strip()) == 1:
             if sent.strip().strip(
                     sentence_ending_punct).strip().upper() != "I":
                 sent = self._gen_sentence()  # Retry, recursively.
     return th.capitalize(sent)
示例#4
0
def out_of_content_warning():
    """Remind me that we're out of content."""
    log_it(
        "WARNING: There's work to be done! You have to reset the blog state on ulyssesredux.tumblr.com to get it working again! A full Ulysses project is done and needs to be cleared!"
    )
    log_it("    REMINDER: make this a more prominent warning!")  # FIXME
    sys.exit(2)
示例#5
0
def get_data_store(which_store=None, second_try=False):
    """Private function to get an entire stored data dictionary. If the data
    storage dictionary cannot be read, create a new dictionary with default
    values. If WHICH_STORE is None, then it returns data from the current data
    store; otherwise, it returns the data from the store specified by WHICH_STORE.

    Returns a dictionary containing all of the stored data from the specified
    data store.
    """
    if which_store == None:
        which_store = current_data_store_name()
    try:
        with open(which_store, 'rb') as the_data_file:
            return pickle.load(the_data_file)
    except Exception as e:
        if second_try:
            log_it(
                "FATAL ERROR: unable to create a readable data store; quitting ...",
                0)
            os.exit()
        else:
            log_it(
                'WARNING: Data store does not exist or cannot be read (the system said: %s); creating new data store ...'
                % e)
            create_data_store()
            return get_data_store(second_try=True)
示例#6
0
def load_proper_noun_data() -> List[str]:
    """Reads all known dictionaries; aggregates capitalized words; returns the
    aggregate list. Makes lots of assumptions, including:
      * all files are in utf-8 encoding and end with the extension ".utf-8";
      * a word is capitalized in a dictionary iff it should always be capitalized;
      * dictionaries are all in the DICTIONARIES_LOC directory;
      * all dictionaries have one word per line.
    """
    log_it("Loading known proper nouns ...", 2)
    ret = set()
    for f in [
            f for f in fu.get_files_list(dictionaries_loc)
            if os.path.isfile(f) and f.endswith('utf-8')
    ]:
        with open(f) as dict_file:
            entries = frozenset({i.strip()
                                 for i in dict_file.readlines()
                                 })  # The set of words in the dictionary
        new = frozenset({l.strip()
                         for l in entries if th.is_capitalized(l)
                         })  # Capitalized words in dictionary
        also_lower = frozenset({i for i in new if i.lower() in entries})
        new = set(new - also_lower
                  )  # drop words that are also in the dictionary in lowercase.
        ret |= new
    log_it("  ... done.", 2)
    return ret
示例#7
0
 def gen_html_frag(self,
                   sentences_desired=1,
                   paragraph_break_probability=0.25):
     """Produce the same text that _produce_text would, but wrapped in HTML <p></p> tags."""
     log_it("We're generating an HTML fragment.", 3)
     the_text = self._produce_text(sentences_desired,
                                   paragraph_break_probability)
     return '\n\n'.join(['<p>%s</p>' % p.strip() for p in the_text])
def read_chains(filename):
    """Read the pickled chain-based data from a chains file."""
    try:
        the_chains_file = open(filename, 'rb')
        chains_dictionary = pickle.load(the_chains_file)
        the_chains_file.close()
    except IOError as e:
        log_it("ERROR: Can't read chains from %s; the system said '%s'." % (filename, str(e)), 0)
    except pickle.PickleError as e:
        log_it("ERROR: Can't read chains from %s because a pickling error occurred; the system said '%s'." % (filename, str(e)), 0)
    return chains_dictionary['markov_length'], chains_dictionary['the_starts'], chains_dictionary['the_mapping']
示例#9
0
 def report_ping_error(exit_code=0, error=None, ping_transcript=""):
     log_it("WARNING: unable to validate ping configuration.", 0)
     if exit_code: log_it("       The return code was: %s" % exit_code, 0)
     if error: log_it("       The system complained: %s" % error, 0)
     log_it(
         "       Please check the ping configuration at the top of reporter.py",
         0)
     if ping_transcript:
         log_it(
             "\n\n       Transcript of interaction with PING executable:\n\n%s"
             % ping_transcript, 2)
def store_chains(markov_length, the_starts, the_mapping, filename):
    """Shove the relevant chain-based data into a dictionary, then pickle it and store
    it in the designated file."""
    chains_dictionary = { 'markov_length': markov_length, 'the_starts': the_starts, 'the_mapping': the_mapping }
    try:
        the_chains_file = open(filename, 'wb')
        the_pickler = pickle.Pickler(the_chains_file, protocol=-1)    # Use the most efficient protocol possible, even if not readable by earlier Pythons
        the_pickler.dump(chains_dictionary)
        the_chains_file.close()
    except IOError as e:
        log_it("ERROR: Can't write chains to %s; the system said '%s'." % (filename, str(e)), 0)
    except pickle.PickleError as e:
        log_it("ERROR: Can't write chains to %s because a pickling error occurred; the system said '%s'." % (filename, str(e)), 0)
示例#11
0
def getParagraph(genny, num_sents, num_words):
    "Generic text-generation routine that all other text-generation routines call internally."
    minl = (1 - length_tolerance) * num_words
    maxl = (1 + length_tolerance) * num_words
    log_it("      getParagraph() called", 2)
    log_it("        num_sents: %d\n        num_words: %d\n        chain_length: %s" % (num_sents, num_words, genny.chains.markov_length), 3)
    log_it("        looking for a paragraph of %d to %d words" % (minl, maxl), 3)
    ret = ""
    while not ( minl <= len (ret.split(' ')) <= maxl ):  # Keep trying until it's within acceptable length params
        ret = genny.gen_text(sentences_desired=num_sents, paragraph_break_probability=0)
        log_it("          length of generated text is %d words / %d characters" % (len(ret.split(' ')), len(ret)), 3)
        log_it("            generated sentence was '%s'." % ret, 4)
    return ret
示例#12
0
def getParagraph(num_sents, num_words, chain_length, mapping, starts):
    "Generic text-generation routine that all other text-generation routines call internally."
    minl = (1 - length_tolerance) * num_words
    maxl = (1 + length_tolerance) * num_words
    log_it("      getParagraph() called", 2)
    log_it("        num_sents: %d\n        num_words: %d\n        chain_length: %s" % (num_sents, num_words, chain_length), 3)
    log_it("        looking for a paragraph of %d to %d words" % (minl, maxl), 3)
    ret = ""
    while not ( minl <= len (ret.split(' ')) <= maxl ):  # Keep trying until it's w/in acceptable length params
        ret = gen_text(mapping, starts, markov_length=chain_length, sentences_desired=num_sents, paragraph_break_probability=0)
        log_it("          length of generated text is %d words / %d characters" % (len(ret.split(' ')), len(ret)), 3)
        log_it("            generated sentence was '%s'." % ret, 4)
    return ret
示例#13
0
 def _printer(self, what, columns=-1):
     """Print WHAT in an appropriate way, wrapping to the specified number of
     COLUMNS. Override this function to change its behavior.
     """
     if columns == 0:  # Wrapping is totally disabled. Print exactly as generated.
         log_it("INFO: COLUMNS is zero; not wrapping text at all", 2)
         print(what)
     else:
         if columns == -1:  # Wrap to best guess for terminal width
             log_it(
                 "INFO: COLUMNS is -1; wrapping text to best-guess column width",
                 2)
             padding = 0
         else:  # Wrap to specified width (unless current terminal width is odd, in which case we're off by 1. Oh well.)
             padding = max((th.terminal_width() - columns) // 2, 0)
             log_it(
                 "INFO: COLUMNS is %s; padding text with %s spaces on each side"
                 % (columns, padding), 2)
             log_it("NOTE: terminal width is %s" % th.terminal_width(), 2)
         what = th.multi_replace(what, [
             ['\n\n', '\n'],
         ])  # Last chance to postprocess text is right here
         for the_paragraph in what.split('\n'):
             if the_paragraph:  # Skip any empty paragraphs that may pop up
                 th.print_indented(the_paragraph, each_side=padding)
                 print()
示例#14
0
 def _produce_text(self,
                   sentences_desired=1,
                   paragraph_break_probability=0.25):
     """Actually generate some text. This is a generator function that produces (yields)
     one paragraph at a time. If you just need all the text at once, you might want
     to use the convenience wrapper gen_text() instead.
     """
     log_it("_produce_text() called.", 4)
     log_it(
         "  Markov length is %d; requesting %d sentences." %
         (self.chains.markov_length, sentences_desired), 4)
     log_it("  Legitimate starts: %s" % self.chains.the_starts, 5)
     log_it("  Probability data: %s" % self.chains.the_mapping, 5)
     the_text = ""
     for which_sentence in range(0, sentences_desired):
         try:
             if the_text[
                     -1] != "\n":  # If we're not starting a new paragraph ...
                 the_text = the_text + " "  #   ... add a space after the sentence-ending punctuation.
         except IndexError:  # If this is the very beginning of our generated text ...
             pass  #   ... well, we don't need to add a space to the beginning of the text, then.
         the_text = the_text + self._gen_sentence()
         if random.random(
         ) <= paragraph_break_probability or which_sentence == sentences_desired - 1:
             the_text = th.multi_replace(the_text, self.final_substitutions)
             yield the_text.strip() + "\n"
             the_text = ""
     raise StopIteration
def process_file(filename):
    """Loads the specified file and verifies it, producing a list of verified lines.
    It returns this list, which is a list of lines that SHOULD BE written back to
    disk.

    This routine DOES NOT SAVE the file back to disk; save_files() does that.
    """
    print("Opening: %s ..." % os.path.split(filename)[1], end=" ")

    with open(filename, 'r') as f:
        the_lines = f.readlines()

    print("successfully read %d lines. Processing..." % len(the_lines))

    for (count, which_line) in zip(range(len(the_lines)), the_lines):   # Go through the text, paragraph by paragraph.
        if patrick_logger.verbosity_level > 0:
            patrick_logger.log_it("\nProcessing line %d" % (count + 1), 1)
            patrick_logger.log_it('THE ORIGINAL LINE IS:\t%s' % which_line, 1)
            patrick_logger.log_it('', 1)

        which_line = which_line.strip()                                 # Note that this is one source of changes of some lines from one valid form to another
        sentences = [].copy()                                           # Build a list of corrected sentences to re-assemble when done.
        for sentence in tokenizer.tokenize(which_line):                 # Go through the paragraph, sentence by sentence.
            sentence = correct_sentence_capitalization(sentence)
            sentences += [ sentence ]                                   # Add corrected sentence to the list of sentences in this paragraph
        corrected_line = ' '.join(sentences) + '\n'                     # Note that we're not (necessarily) preserving original spacing here.

        patrick_logger.log_it('\nTHE CORRECTED LINE IS:\t%s' % corrected_line)

        the_lines[count] = corrected_line

    return the_lines
示例#16
0
def problem_log(data):
    """Returns a markdown fragment detailing the usability events logged in DATA."""
    probs = dict()
    for p_level in range(2, 6):
        probs[p_level] = [
            dict(i) for i in data['usability_events'].values()
            if i['worst_problem'] == p_level
        ]

    ret = """There were %d network usability events:

* %d events at level 2
* %d events at level 3
* %d events at level 4
* %d events at level 5

### Entire log

Here follows a list of all logged problems. Note that failures to log are not reported; currently,
there are several known reasons why logging fails occasionally. Even worse, the only way to detect these problems at
present is to inspect the raw (binary) log files by reading them with the <code>pickle</code> module in Python 3.5+.
Too, logging often begins and ends abruptly because development is still occurring. This also means that the exact
data format written to the raw files still changes occasionally.

All of this is to say that this log file is still documenting an experimental system; part of the aim of this
particular log file that you are reading right now is to help increase the stability of that system. The above
disclaimers will gradually disappear or be rewritten as the system approaches a more finalized form.
""" % (len(data['usability_events']), len(probs[2]), len(
        probs[3]), len(probs[4]), len(probs[5]))
    if 'usability_events' in data:
        ret += "\n<ul>\n"
        for timestamp, event_data in data['usability_events'].items():
            ret += "<li><strong>%s</strong> (problem level %d):\n <ul>\n" % (
                timestamp, event_data['worst_problem'])
            try:
                for test in event_data['tests_failed']:
                    ret += "  <li>Failed test: %s (%s)</li>\n" % (
                        test['test_failed'], "; ".join([
                            "%s=%s" % (label, value)
                            for label, value in test['relevant_data'].items()
                        ]))
            except BaseException:
                log_it(
                    "WARNING: apparently, no tests were failed here. What's going on?",
                    1)
            ret += " </ul>\n</li>\n"
        ret += "</ul>"
    return ret
def do_open_dialog(**kwargs):
    """Shows a dialog asking the user which file to open, or comes as close as
    possible to doing so. Any keyword arguments passed in are piped to the
    underlying function tkinter.filedialog.askopenfilename

    Returns a path to the file that the user wants to open.

    Adapted from more complex code in Zombie Apocalypse.
    """
    patrick_logger.log_it("DEBUGGING: simple_standard_file.do_open_dialog() called", 2)
    try:            # Otherwise, use TKinter if possible
        import tkinter
        import tkinter.filedialog
        tkinter.Tk().withdraw()     # No root window
        filename = tkinter.filedialog.askopenfilename(**kwargs)
    except:         # If all else fails, ask the user to type it.
        filename = input('What file would you like to open? ')
    patrick_logger.log_it('    Selected file is %s' % filename, 2)
    return filename
示例#18
0
def monitor():
    """Intermittently schedule a test, run it, interpret it, and log the results."""
    log_it("INFO: beginning monitoring", 3)
    while True:
        current_time = datetime.datetime.now()
        next_run = current_time + datetime.timedelta(
            minutes=interval_between_pings
        )  # Add interval_between_pings to current time ...
        next_run = datetime.datetime(
            next_run.year, next_run.month, next_run.day, next_run.hour,
            next_run.minute // interval_between_pings * interval_between_pings,
            0)  # ... and round down
        next_run += datetime.timedelta(seconds=+random.randint(
            0, 60 * interval_between_pings - number_of_packets))
        log_it(
            "INFO: it's %s; scheduling next ping test for %s" %
            (current_time.isoformat(sep=' '), next_run.isoformat(sep=' ')), 3)
        sleep_time = (next_run - current_time).total_seconds()
        _thread.start_new_thread(schedule_test, (sleep_time, ))
        time.sleep(1 + sleep_time)
def do_save_dialog(**kwargs):
    """Shows a dialog asking the user where to save a file, or comes as close as
    possible to doing so. Any keyword arguments passed in are piped to the
    underlying function tkinter.filedialog.asksaveasfilename

    Returns a path to the file that the user wants to create.

    Adapted from more complex code in Zombie Apocalypse.
    """
    patrick_logger.log_it(
        "DEBUGGING: simple_standard_file.do_save_dialog() called", 2)
    try:  # Use TKinter if possible
        import tkinter
        import tkinter.filedialog
        tkinter.Tk().withdraw()  # No root window
        filename = tkinter.filedialog.asksaveasfilename()
    except:  # If all else fails, ask the user to type a filename.
        filename = input('Under what name would you like to save the file? ')
    patrick_logger.log_it('    Selected file is %s' % filename, 2)
    return filename
示例#20
0
def do_open_dialog(**kwargs):
    """Shows a dialog asking the user which file to open, or comes as close as
    possible to doing so. Any keyword arguments passed in are piped to the
    underlying function tkinter.filedialog.askopenfilename

    Returns a path to the file that the user wants to open.

    Adapted from more complex code in Zombie Apocalypse.
    """
    import \
        tkinter.filedialog  # Don't want to make tkinter a dependency for every project that uses this module
    patrick_logger.log_it(
        "DEBUGGING: simple_standard_file.do_open_dialog() called", 2)
    try:  # Use TKinter if possible
        import tkinter
        import tkinter.filedialog
        tkinter.Tk().withdraw()  # No root window
        filename = tkinter.filedialog.askopenfilename(**kwargs)
    except:  # If all else fails, ask the user to type it.
        filename = input('What file would you like to open? ')
    if filename == tuple([]):
        patrick_logger.log_it(
            '    INFO: simple_standard_file: do_open_dialog() cancelled', 2)
        filename = None
    else:
        patrick_logger.log_it(
            '    INFO: simple_standard_file: Selected file is "%s"' % filename,
            2)
    return filename
示例#21
0
def menu_choice(choice_menu, prompt):
    """Takes a menu description, passed in as CHOICE_MENU (see below for format),
     and asks the user to make a choice between the options. It then passes back
     the user's choice to the caller.

    :param choice_menu: an OrderedDict that maps a list of options to be typed
                        (short strings, each of which is ideally a single
                        letter) to a full description of what that option means
                        (a longer string). For example:

                        OrderedDict([
                                     ('a', 'always capitalize'),
                                     ('y', 'yes'),
                                     ('n', 'never')
                                    ])

                        as a special case, if both parts of an entry in the
                        OrderedDict are two hyphens, that entry is not a valid menu
                        choice; it is printed as-is, as a visual separator, but is
                        not a selectable option.
    :param prompt:      a direct request for input; printed after all of the
                        menu options have been displayed.
    :return:            a string: the response the user typed that was
                        validated as an allowed choice.
    """
    max_menu_item_width = max(len(x) for x in choice_menu)
    menu_column_width = max_menu_item_width + len("  [ ") + len(" ]")
    spacing_column_width = 3
    options_column_width = text_handling.terminal_width() - (menu_column_width + spacing_column_width + 1)

    # OK, let's print this menu.
    print()
    for option, text in choice_menu.items():
        if (option == '--') and (text == '--'):
            current_line = '  --  ' + ' ' * (max_menu_item_width - len('--')) + ' ' * spacing_column_width + '-----'
        else:
            current_line = '[ %s ]%s%s' % (option, ' ' * (max_menu_item_width - len(option)), ' ' * spacing_column_width)
            text_lines = text_handling._get_wrapped_lines(text, enclosing_width=options_column_width)
            if len(text_lines) == 1:
                current_line = current_line + text_lines[0]
            else:
                current_line = current_line + text_lines.pop(0)     # Finish the line with the first line of the description
                left_padding = '\n' + (' ' * (menu_column_width + spacing_column_width))
                current_line = current_line + left_padding + left_padding.join(text_lines)     # Add in the rest of the lines
        print(current_line)
    print()
    patrick_logger.log_it("INFO: multi_choice_menu.py: menu laid out in %d lines." % len(current_line.split('\n')), 2)
    patrick_logger.log_it("INFO: multi_choice_menu.py: menu contents are: %s" % current_line, 4)

    # Now, get the user's choice
    choice = 'not a legal option'
    legal_options = [ k.lower() for k, v in choice_menu.items() if ((k != '--') or (v != '--')) ]
    patrick_logger.log_it("INFO: multi_choice_menu.py: Legal options for this menu are %s" % legal_options, 2)
    tried_yet = False
    while choice.lower() not in legal_options:
        if tried_yet:           # If the user has got it wrong at least once...
            prompt = prompt.strip() + " [ %s ] " % ('/'.join(legal_options))
        choice = input(prompt.strip() + " ").strip()
        tried_yet = True
    return choice
示例#22
0
 def read_chains(self, filename):
     """Read the pickled chain-based data from FILENAME."""
     default_chains = {
         'character_tokens':
         False,  # We need only assign defaults for keys added in v2.0 and later.
     }  # the_starts, the_mapping, and markov_length have been around since 1.0.
     try:
         with open(filename, 'rb') as the_chains_file:
             chains_dictionary = pickle.load(the_chains_file)
     except IOError as e:
         log_it(
             "ERROR: Can't read chains from %s; the system said '%s'." %
             (filename, str(e)), 0)
     except pickle.PickleError as e:
         log_it(
             "ERROR: Can't read chains from %s because a pickling error occurred; the system said '%s'."
             % (filename, str(e)), 0)
     chains_dictionary = apply_defaults(defaultargs=default_chains,
                                        args=chains_dictionary)
     self.markov_length = chains_dictionary['markov_length']
     self.the_starts = chains_dictionary['the_starts']
     self.the_mapping = chains_dictionary['the_mapping']
     self.character_tokens = chains_dictionary['character_tokens']
示例#23
0
def write_story():
    chapter_paragraphs = []
    log_it("INFO: about to start reading and processing the stats file", 2)
    with open(aeolus_stats_path) as statsfile:     # OK, parse the coded structure line
        log_it("INFO: successfully opened stats file %s." % aeolus_stats_path, 3)
        for structure_line in statsfile:
            log_it("  processing line '%s'." % structure_line.rstrip())
            chapter_paragraphs.append(get_appropriate_paragraph(structure_line))
    return '\n'.join(chapter_paragraphs)
示例#24
0
def write_story():
    chapter_paragraphs = []
    log_it("INFO: about to start reading and processing the stats file", 2)
    with open(aeolus_stats_path) as statsfile:     # OK, parse the coded structure line
        log_it("INFO: successfully opened stats file %s." % aeolus_stats_path, 3)
        for structure_line in statsfile:
            log_it("  processing line '%s'." % structure_line.rstrip())
            chapter_paragraphs.append(get_appropriate_paragraph(structure_line))
    return '\n'.join(chapter_paragraphs)
示例#25
0
 def store_chains(self, filename):
     """Shove the relevant chain-based data into a dictionary, then pickle it and
     store it in the designated file.
     """
     chains_dictionary = {
         'the_starts': self.the_starts,
         'markov_length': self.markov_length,
         'the_mapping': self.the_mapping,
         'character_tokens': self.character_tokens
     }
     try:
         with open(filename, 'wb') as the_chains_file:
             the_pickler = pickle.Pickler(
                 the_chains_file,
                 protocol=-1)  # Use the most efficient protocol possible
             the_pickler.dump(chains_dictionary)
     except IOError as e:
         log_it(
             "ERROR: Can't write chains to %s; the system said '%s'." %
             (filename, str(e)), 0)
     except pickle.PickleError as e:
         log_it(
             "ERROR: Can't write chains to %s because a pickling error occurred; the system said '%s'."
             % (filename, str(e)), 0)
示例#26
0
def schedule_test(delay=0):
    """Waits DELAY seconds; then, runs a test, interprets it, and stores the results.
    """
    log_it("INFO: scheduling next ping test for %.3f seconds from now" % delay,
           3)
    time.sleep(delay)
    log_it("INFO: beginning ping test ...", 1)
    timestamp = current_timestamp()
    transcript = ping_test()
    log_it("INFO: ping test complete, interpreting ...", 2)
    record_and_interpret(timestamp, transcript)
def menu_choice(choice_menu, prompt):
    """Takes a menu description, passed in as CHOICE_MENU (see below for format),
     and asks the user to make a choice between the options. It then passes back
     the user's choice to the caller.

    :param choice_menu: an OrderedDict that maps a list of options to be typed
                        (short strings, each of which is ideally a single
                        letter) to a full description of what that option means
                        (a longer string). For example:

                        OrderedDict([
                                     ('a', 'always capitalize'),
                                     ('y', 'yes'),
                                     ('n', 'never')
                                    ])
    :param prompt:      a direct request for input printed after all of the
                        menu options have been displayed.
    :return:            a string: the response the user typed that was
                        validated as an allowed choice.
    """
    max_menu_item_width = max(len(x) for x in choice_menu)
    menu_column_width = max_menu_item_width + len("  [ ") + len(" ]")
    spacing_column_width = 3
    options_column_width = text_handling.terminal_width() - (menu_column_width + spacing_column_width + 1)

    # OK, let's print this menu.
    print()
    for option, text in choice_menu.items():
        current_line = '[ %s ]%s%s' % (option, ' ' * (max_menu_item_width - len(option)), ' ' * spacing_column_width)
        text_lines = text_handling._get_wrapped_lines(text, enclosing_width=options_column_width)
        if len(text_lines) == 1:
            current_line = current_line + text_lines[0]
        else:
            current_line = current_line + text_lines.pop(0)     # Finish the line with the first line of the description
            left_padding = '\n' + (' ' * (menu_column_width + spacing_column_width))
            current_line = current_line + left_padding.join(text_lines)     # Add in the rest of the description lines
        print(current_line)
    print()
    patrick_logger.log_it("INFO: multi_choice_menu.py: menu laid out in %d lines." % len(current_line.split('\n')), 2)
    patrick_logger.log_it("INFO: multi_choice_menu.py: menu contents are: %s" % current_line, 4)

    # Now, get the user's choice
    choice = 'not a legal option'
    legal_options = [ l.lower() for l in choice_menu ]
    patrick_logger.log_it("INFO: multi_choice_menu.py: Legal options for this menu are %s" % legal_options, 2)
    tried_yet = False
    while choice.lower() not in legal_options:
        if tried_yet:           # If the user has got it wrong at least once.
            prompt = prompt.strip() + " [ %s ] " % ('/'.join(choice_menu))
        choice = input(prompt.strip() + " ").strip()
        tried_yet = True
    return choice
def genSentence(markov_length, the_mapping, starts):
    '''Start with a random "starting word"'''
    log_it("      genSentence() called.", 4)
    log_it("        markov_length = %d." % markov_length, 5)
    log_it("        the_mapping = %s." % the_mapping, 5)
    log_it("        starts = %s." % starts, 5)
    curr = random.choice(starts)
    sent = curr.capitalize()
    prevList = [curr]
    # Keep adding words until we hit a period, exclamation point, or question mark
    while curr not in sentence_ending_punct:
        curr = next(prevList, the_mapping)
        prevList.append(curr)
        # if the prevList has gotten too long, trim it
        if len(prevList) > markov_length:
            prevList.pop(0)
        if curr not in punct_with_no_space_before and (len(prevList) < 2 or prevList[-2] not in punct_with_no_space_after):     # reminder: Python short-circuits
            sent += " " # Add spaces between words (but not punctuation)
        sent += curr
    return sent
def correct_sentence_capitalization(s):
    """Return a corrected version of the sentence that was passed in.
    This is where the real work actually happens.
    """
    count = 0
    tagged_sent = nltk.tag.pos_tag(s.split())   # This is now a list of tuples: [(word, POS), (word, POS) ...]
    for word, pos in tagged_sent:               # POS = "part of speech." Go through the list of tuples, word by word
        count += 1                              # In English language counting order, which word in the sentence is this?

        # OK, let's check for various capitalization problems.
        # First: check for problems that are independent of whether they occur in the first word of a sentence.
        if comparative_form(word) in always_capitalize_list and not word[0].isupper():
            # Check: uncapitalized word we know should always be capitalized?
            patrick_logger.log_it('DEBUGGING: found non-capitalized word "%s" on the always-capitalize list' % comparative_form(word), 2)
            tagged_sent[count-1] = (text_handling.capitalize(tagged_sent[count-1][0]), pos)

        # Next, check for problems related to the first word of a sentence.
        if count == 1:                                  # Beginning of sentence has special handling.
            if not word[0].isupper():                   # Check: should first word of sentence be capitalized?
                patrick_logger.log_it('DEBUGGING: found non-capitalized word "%s" at the beginning of a sentence' % comparative_form(word), 2)
                if always_capitalize_sentence_beginnings or check_word_capitalization(tagged_sent, count-1):
                    # If we capitalize it, set the indicated item in the list of tuples to a tuple that capitalizes the word
                    # in question and maintains the POS tagging for further checking. The rather ugly expression below is of
                    # course necessary because tuples are immutable.
                    tagged_sent[count-1] = (text_handling.capitalize(tagged_sent[count-1][0]), pos)

        # Now, check for problems that can happen only outside the first word of a sentence.
        else:                                           # Checks for words other than the first word of the sentence
            # First: is there an unexplained capitalized word beyond the first word of the sentence?
            if word[0].isupper() and (pos.upper() not in [ 'NNP' ]) and (comparative_form(word) not in allowed_capitalized_words):
                patrick_logger.log_it('DEBUGGING: the word "%s" may be inappropriately capitalized' % comparative_form(word), 2)
                # Capitalized, but not a proper noun?
                if check_word_capitalization(tagged_sent, count-1, allow_always_correct=True):
                    tagged_sent[count-1] = (tagged_sent[count-1][0].lower(), pos)

    return reassemble_sentence(tagged_sent).strip()
示例#30
0
def write_story():
    corpora = {}.copy()

    log_it("INFO: about to start processing corpora.")

    for which_corpus in glob.glob(circe_corpora_path + '*txt'):
        log_it('  INFO: processing "%s".' % which_corpus, 2)
        starts, the_mapping = buildMapping_withMixins(chain_length, [which_corpus], glob.glob('%s/*txt' % mixin_texts_dir))
        corpus_name = os.path.basename(which_corpus)[:-4]
        corpora[corpus_name] = [starts, the_mapping]

    log_it("DEBUGGING: Corpora are: \n" + pprint.pformat(corpora), 6)           # pprint.pformat() for the WHOLE DICTIONARY takes FOREVER

    the_chapter = [][:]

    def get_speaker_text(speaker_name, num_sentences):
        if speaker_name in corpora:
            which_index = speaker_name
        elif speaker_name == 'STAGE':
            which_index = 'STAGE DIRECTIONS'
        else:
            which_index = 'MINOR CHARACTERS'
        starts, the_mapping = tuple(corpora[which_index])
        return gen_text(the_mapping, starts, markov_length=chain_length, sentences_desired=num_sentences, paragraph_break_probability = 0)

    log_it("INFO: About to process stats file.")

    with open(circe_stats_path) as circe_stats_file:
        for the_encoded_paragraph in circe_stats_file:
            # Process each line, using it as a map of the corresponding paragraph in 'Circe'.
            # Structure of these lines is defined in /UlyssesRedux/code/utility_scripts/analyze-chapter-15.py.
            # But here's a quick reminder:
            # Two parts: a name of a speaker (or "STAGE" if it's a paragraph of stage directions), then a series of codes for "chunks" of the paragraph.
            # A "chunk" is a number of sentences. If the number is preceded by opening parens, it's an intraparagraph stage direction.
            # Parts of the line, and chunk descriptions, are separated by vertical bars (pipe characters), hence the .psv extension.
            log_it('INFO: Processing coded line "%s".' % the_encoded_paragraph.strip(), 2)
            code_to_process = the_encoded_paragraph.split('|')
            speaker_name = code_to_process.pop(0)
            log_it('  speaker name is "%s".' % speaker_name, 2)
            if speaker_name != 'STAGE':                                     # Unless the name is 'STAGE', add it to the beginning of this paragraph
                this_paragraph = '%s: ' % speaker_name
            else:                                                           # In which case, begin with an opening parenthesis.
                this_paragraph = '('
            while len(code_to_process) > 0:
                chunk_descriptor = code_to_process.pop(0)
                log_it('    processing chunk "%s".' % chunk_descriptor.strip(), 2)
                if chunk_descriptor[0] == '(':
                    this_paragraph = this_paragraph + '(%s) ' % (get_speaker_text('STAGE', int(chunk_descriptor[1:])))
                else:
                    this_paragraph = this_paragraph + '%s ' % (get_speaker_text(speaker_name, int(chunk_descriptor)))
                log_it('      current paragraph length is now %d.' % len(this_paragraph), 3)
            if speaker_name == 'STAGE':
                this_paragraph = this_paragraph.strip() + ')'
            log_it('        done with this paragraph; total length is %d.' % len(this_paragraph), 2)
            the_chapter.append(this_paragraph)

    return '\n'.join(the_chapter)
示例#31
0
def getHeadline(num_sents, num_words):
    log_it("    getHeadline() called", 2)
    ret = getParagraph(num_sents, num_words, headline_chain_length, headlines_mapping, headlines_starts).upper()
    return ret
# Set up default values
patrick_logger.verbosity_level = 2


# Functions

def print_usage():
    """Print the docstring as a usage message to stdout"""
    patrick_logger.log_it("INFO: print_usage() was called")
    print(__doc__)


the_title = "Matthew Arnold's Guest Lecture of " + datetime.date.today().strftime("%A, %d %B %Y")
the_blog_name = "AutoIrishLitDiscourses"
the_content_path = "/150/extras.txt"
the_tags = ['Matthew Arnold', 'Celtic Literature', 'guest lecture', 'Irish literature', 'automatically generated text', 'Patrick Mooney', 'dadadodo']
the_content = ''

patrick_logger.log_it('INFO: Constants and variables set up; generating content', 2)

story_length = random.choice(list(range(80, 120)))
the_content = subprocess.check_output(["dadadodo -c " + str(story_length) + " -l sources/m.arnold/CelticLiterature.dat -w 10000"], shell=True).decode()
the_lines = ["<p>" + the_line.strip() + "</p>" for the_line in the_content.split('\n\n')]
the_content = "\n\n".join(the_lines)
patrick_logger.log_it('INFO: Attempting to post the content', 2)
patrick_logger.log_it("the_content: \n\n" + the_content)

the_status = social_media.tumblr_text_post(Irish_lit_discourses_client, the_tags, the_title, the_content)

patrick_logger.log_it('INFO: We\'re done', 2)
示例#33
0
        'git commit -m "@DATE@: archiving new data"',
        'git push',
        'git gc --aggressive --prune=now',
    ],
    '/home/patrick/Documents/programming/python_projects/IF utils': [
        'git add specific_games/ATD/working/progress.json',
        'git add specific_games/NBM/beta\ 1.62/explored_paths_Africa.json',
        'git add specific_games/NBM/beta\ 1.62/successful_paths_Africa.txt',
        'git commit -m "@DATE@: archiving new data"',
        'git push',
        'git gc --aggressive --prune=now',
    ]
}

if __name__ == "__main__":
    log_it("INFO: We're starting a run of git_committer.py\n\n", 1)
    olddir = os.getcwd()
    try:
        for dir, acts in task_list.items():
            log_it("INFO: changing directory to '%s'" % dir, 2)
            os.chdir(dir)
            log_it('\n> cd %s' % dir, 0)
            for act in acts:
                try:
                    act = act.replace(
                        '@DATE@',
                        datetime.datetime.now().strftime('%d %b %Y'))
                    log_it('\n> %s\n\n' % act, 0)
                    subprocess.call(act, shell=True)
                except BaseException as e:
                    log_it(
示例#34
0
def getNonQuoteParagraph(num_sents, num_words):
    log_it("    getNonQuoteParagraph() called", 2)
    return getParagraph(articles_genny, num_sents=num_sents, num_words=num_words)
def out_of_content_warning():
    """Remind me that we're out of content."""
    log_it("WARNING: There's work to be done! You have to reset the blog state on ulyssesredux.tumblr.com to get it working again! A full Ulysses project is done and needs to be cleared!")
    log_it("    REMINDER: make this a more prominent warning!", 2)  # For now
    sys.exit(2)
示例#36
0
nonheadline_chain_length = 2
length_tolerance = 0.4      # e.g., 0.3 means the generated text can be up to 30% over or under the length of the requested text.
joyce_ratio = 1.2           # Goal ratio of Joyce to non-Joyce text in the resulting chains.

import os, glob, sys
sys.path.append('/UlyssesRedux/scripts/')
from directory_structure import *           # Gets us the listing of file and directory locations.

sys.path.append(markov_generator_path)
import text_generator as tg

import patrick_logger    # From https://github.com/patrick-brian-mooney/personal-library
from patrick_logger import log_it

patrick_logger.verbosity_level = 0
log_it("INFO: Imports successful, moving on", 2)

# Create the necessary sets of Markov chains once, at the beginning of the script's run
headlines_genny = tg.TextGenerator(name="Aeolus headlines generator")
headlines_genny.train(the_files=[aeolus_headlines_path], markov_length=headline_chain_length)

joyce_text_length = os.stat(aeolus_nonheadlines_path).st_size
mixin_texts_length = 0
articles_files = glob.glob('%s/07/*txt' % current_run_corpus_directory)
for which_file in articles_files:
    mixin_texts_length += os.stat(which_file).st_size
ratio = int(round( (mixin_texts_length / joyce_text_length) * joyce_ratio ))
articles_files = [aeolus_nonheadlines_path] * ratio + articles_files
articles_genny = tg.TextGenerator(name="Aeolus articles generator")
articles_genny.train(the_files=articles_files, markov_length=nonheadline_chain_length)
示例#37
0
def getAnswer(num_sents, num_words):
    log_it("    getAnswer() called", 2)
    log_it("      num_sents: %d; num_words: %d" % (num_sents, num_words), 3)
    return gen_text(answers_mapping, answers_starts, markov_length=answers_chain_length, sentences_desired=num_sents, paragraph_break_probability=0)
示例#38
0
from directory_structure import *           # Gets us the listing of file and directory locations.
from chapter_scripts.generic_chapter import buildMapping_withMixins

sys.path.append(markov_generator_path)
from sentence_generator import *

import patrick_logger                 # From https://github.com/patrick-brian-mooney/personal-library
from patrick_logger import log_it

# First, set up constants
questions_chain_length = 1
answers_chain_length = 2
mixin_texts_dir = '%s17' % current_run_corpus_directory

patrick_logger.verbosity_level = 0
log_it("INFO: Imports successful, moving on", 2)

# Create the necessary sets of Markov chains once, at the beginning of the script's run

questions_starts, questions_mapping = buildMapping(word_list(ithaca_questions_path), markov_length=questions_chain_length)
answers_starts, answers_mapping = buildMapping_withMixins(answers_chain_length, [ithaca_answers_path], glob.glob('%s/*txt' %mixin_texts_dir))

log_it("INFO: built mappings from both question and answer files, moving on", 2)

# Unlike the 'Aeolus' script, this script makes no effort to enforce sticking within word-limit boundaries.
# You can see that in the next two routines, which just call sentence_generator.gen_text() directly.

def getQuestion(num_sents, num_words):
    log_it("    getQuestion() called", 2)
    log_it("      num_sents: %d; num_words: %d" % (num_sents, num_words), 3)
    return gen_text(questions_mapping, questions_starts, markov_length=questions_chain_length, sentences_desired=num_sents, paragraph_break_probability=0)
def main():
    # Set up variables for this run
    if (not sys.stdout.isatty()) and (patrick_logger.verbosity_level < 1): # Assume we're running on a web server.
        print('Content-type: text/html\n\n')
        print("""<!doctype html><html><head><title>Patrick Mooney's Markov chain–based text generator</title><link rel="profile" href="http://gmpg.org/xfn/11" /></head><body><h1>Patrick Mooney's Markov chain–based text generator</h1><p>Code is available <a rel="muse" href="https://github.com/patrick-brian-mooney/markov-sentence-generator">here</a>.</p><pre>%s</pre></body></html>"""% __doc__)
        sys.exit(0)
    markov_length = 1
    chains_file = ''
    starts = None
    the_mapping = None
    sentences_desired = 1
    inputs = [].copy()
    is_html = False
    # Next, parse command-line options, if there are any
    if len(sys.argv) > 1: # The first option in argv, of course, is the name of the program itself.
        try:
            opts, args = getopt.getopt(sys.argv[1:], 'vhqo:l:c:w:p:i:m:',
                    ['verbose', 'help', 'quiet', 'output=', 'load=', 'count=',
                    'columns=', 'pause=', 'html', 'input=', 'markov-length='])
            log_it('INFO: options returned from getopt.getopt() are: ' + pprint.pformat(opts), 2)
        except getopt.GetoptError:
            log_it('ERROR: Bad command-line arguments; exiting to shell')
            print_usage()
            sys.exit(2)
        log_it('INFO: detected number of command-line arguments is %d.' % len(sys.argv), 2)
        for opt, args in opts:
            log_it('Processing option %s.' % opt, 2)
            if opt in ('-h', '--help'):
                log_it('INFO: %s invoked, printing usage message.' % opt)
                print_usage()
                sys.exit()
            elif opt in ('-v', '--verbose'):
                patrick_logger.verbosity_level += 1
                log_it('INFO: %s invoked, added one to verbosity level\n     Verbosity level is now %d.' % (opt, patrick_logger.verbosity_level))
            elif opt in ('-q', '--quiet'):
                log_it('INFO: %s invoked, decreasing verbosity level by one\n     Verbosity level is about to drop to %d.' % (opt, patrick_logger.verbosity_level-1))
                patrick_logger.verbosity_level -= 1
            elif opt in ('-m', '--markov-length'):      # Length of Markov chains generated. Incompatible with -l.
                log_it("INFO: -m invoked, argument is %s." % args, 1)
                if starts == None and the_mapping == None:
                    markov_length = int(args)
                else:
                    log_it("ERROR: If you load previously generated chains with -l, that file specifies the\nMarkov chain length. It cannot be overriden with -m or --markov-length.")
                    sys.exit(2)
            elif opt in ('-o', '--output'):
                chains_file = args          # Specify output file for compiled chains.
            elif opt in ('-l', '--load'):   # Load compiled chains.
                if markov_length == 1:
                    markov_length, the_starts, the_mapping = read_chains(args)
                else:
                    log_it("ERROR: you cannot both specify a chains file with -m and also load a chains file\nwith -l. If you specify a file with -l, that file contains the chain length.")
                    sys.exit(2)
            elif opt in ('-c', '--count'):
                sentences_desired = int(args)    # How many sentences to generate (0 is "keep working until interrupted").
            elif opt in ('-i', '--input'):
                log_it("  -i specified with argument %s." % args)
                inputs.append(args)
            elif opt in ('-w', '--columns'):
                pass    # How many columns wide the output should be. Currently unimplemented.
            elif opt in ('-p', '--pause'):
                pass    # How many seconds to pause between paragraphs. Currently unimplemented.
            elif opt == '--html':
                is_html = True    # Wrap paragraphs of text that are output in <p> ... </p>.
    else:
        log_it('DEBUGGING: No command-line parameters', 2)
        print_usage()
    log_it('DEBUGGING: verbosity_level after parsing command line is %d.' % patrick_logger.verbosity_level, 2)
    if starts == None or the_mapping == None:     # then no chains file was loaded.
        log_it("INFO: No chains file specified; parsing text files specified.", 1)
        log_it("  ... input files specified are %s." % inputs, 1)
        all_words = [].copy()
        for the_file in inputs:
            log_it("    ... processing file %s." % the_file, 2)
            all_words += word_list(the_file)
            log_it("       ... all_words now contains %d words." % len(all_words), 2)
        if all_words:
            starts, the_mapping = buildMapping(all_words, markov_length)
            if chains_file:
                store_chains(markov_length, starts, the_mapping, chains_file)
    if starts == None or the_mapping == None:     # Ridiculous! We must have SOMETHING to work with.
        log_it("ERROR: You must specify a chains file with -l, or else at least one text file with -i.")
        sys.exit(2)
    print(gen_text(the_mapping, starts, markov_length, sentences_desired, is_html))
示例#40
0
def record_and_interpret(timestamp, ping_transcript):
    """Read through the transcript. Break it down and record it, and then evaluate it.
    If necessary, log a usability event.

    Note that this is currently heavily dependent on the format of the output produced
    by the PING command. For reference, this script was developed with Linux ping
    included with iputils-s20121221.
    """
    data = dict()
    log_it("INFO: we're recording a ping transcript from %s" % timestamp, 2)
    log_it("      transcript follows:\n\n%s\n\n" % ping_transcript, 3)
    t = ping_transcript.lower()
    if "failure in name" in t or "net unreachable" in t:
        data['transcript'] = ping_transcript
    elif len(t.split()) < 1:
        pass
    else:
        try:
            lines = ping_transcript.strip().split('\n')
            header = lines.pop(0).rstrip(
                'bytes of data.')  # Read the first line in the ping_transcript
            # Current format is: PING google.com (172.217.1.206) 56(84) bytes of data.
            data['executable_name'], data['hostname'], data['host_IP'], data[
                'bytes'] = header.strip().split(' ')
            # Next, gather data from the last line from the file
            trailer = lines.pop().lstrip('rtt').strip()
            label, trailer = trailer.split('=')
            if ',' in trailer:
                stats, data['pipe_num'] = trailer.split(',')
            else:
                stats = trailer.strip()
            data.update(
                dict(zip(label.strip().split('/'),
                         stats.strip().split('/'))))
            # Now, gather data from the last remaining line that hasn't already been processed
            trailer = lines.pop().strip()
            errors_pos = trailer.find("errors")
            if errors_pos != -1:
                log_it(
                    "WARNING: network errors detected; we may be about to crash"
                )
            data['packets_transmitted'], trailer = trailer.split(
                'packets transmitted,')
            data['received'], trailer = trailer.split(' received,')
            if trailer.strip().startswith('+') and "errors" in trailer:
                errors, trailer = trailer.split('errors, ')
                data['errors'] = errors.strip().lstrip('+').strip()
            data['packet_loss'], trailer = trailer.split('packet loss,')
            data['time'] = trailer.split('time')[1]
            # The last remaining line is purely cosmetic: pop it and ignore it
            _ = lines.pop()
            # Streamline the rest of the ping_transcript, then process it line by line
            lines = [
                l.strip() + '\n' for l in lines
                if len(l.strip()) > 0 and not l.strip().startswith('---')
            ]
            data['log'] = [][:]
            for l in lines:
                event = dict([])
                the_line = l[:]
                if "net unreachable" in the_line:
                    event = {
                        'icmp_seq': 1 + len(data['log']),
                        'transcript': the_line
                    }
                else:
                    try:
                        data['return_packet_size'], the_line = the_line.strip(
                        ).split('bytes from')
                        data[
                            'host ID (reverse DNS?)'], the_line = the_line.split(
                                '(')
                        _, the_line = the_line.split(
                            ':'
                        )  # Drop the IP address, which we've already seen anyway.`
                        icmp, ttl, time, _ = the_line.strip().split(' ')
                        event['icmp_seq'] = icmp.split('=')[1].strip()
                        data['ttl'] = ttl.split('=')[1].strip()
                        event['time'] = time.split('=')[1].strip()
                    except BaseException as e:
                        log_it(
                            "WARNING: unable to parse the ping transcript line: %s; halting instead of trying to parse the phrase: %s"
                            % (l, the_line))
                        event = {
                            'icmp_seq': 1 + len(data['log']),
                            'transcript': the_line
                        }
                data['log'] += [event]
        except Exception as err:
            data['transcript'] = ping_transcript
            data['err'] = str(err)
    for k in data:  # Clean up leading and trailing whitespace in the data.
        if type(data[k]) == type('string'):
            data[k] = data[k].strip()
    add_data_entry('ping_transcripts', timestamp, data)
    interpret(data, timestamp)
# Set up default values
normal_tags = 'H.P. Lovecraft, automatically generated text, Patrick Mooney, Python, Markov chains,'
temporary_tags = 'Dagon, 1917, Dagon week'
story_length = random.choice(list(range(30, 70)))
the_content = ''

patrick_logger.verbosity_level = 2
chains_file = '/lovecraft/chains.dat'

# Utility functions
def print_usage():    # Note that, currently, nothing calls this.
    """Print the docstring as a usage message to stdout"""
    patrick_logger.log_it("INFO: print_usage() was called")
    print(__doc__)

patrick_logger.log_it("INFO: tags and sentence lengths set up ...", 2)

the_markov_length, the_starts, the_mapping = read_chains(chains_file)

patrick_logger.log_it("INFO: chains read, starting run ...", 2)

# Next, pick out a title between 10 and 70 characters
the_length = 300
patrick_logger.log_it("INFO: getting a story title ...", 2)
while not 10 <= the_length <= 70:
    the_title = gen_text(the_mapping, the_starts, markov_length=the_markov_length, sentences_desired=1, paragraph_break_probability=0).strip()
    the_length = len(the_title)
    patrick_logger.log_it("INFO: The story title generated was '" + the_title + ".'", 2)
    patrick_logger.log_it("INFO:    And the length of that title is: " + str(the_length), 2)
    if the_title in open('/lovecraft/titles.txt').read():    # Incidentally, this is a really bad idea if the log of titles ever gets very big
        patrick_logger.log_it("That title's been used! Trying again ...\n\n\n")
def print_usage():
    """Print the docstring as a usage message to stdout"""
    patrick_logger.log_it("INFO: print_usage() was called")
    print(__doc__)
    underlying function tkinter.filedialog.askopenfilename

    Returns a path to the file that the user wants to open.

    Adapted from more complex code in Zombie Apocalypse.
    """
    patrick_logger.log_it(
        "DEBUGGING: simple_standard_file.do_open_dialog() called", 2)
    try:  # Use TKinter if possible
        import tkinter
        import tkinter.filedialog
        tkinter.Tk().withdraw()  # No root window
        filename = tkinter.filedialog.askopenfilename(**kwargs)
    except:  # If all else fails, ask the user to type it.
        filename = input('What file would you like to open? ')
    if filename == tuple([]):
        patrick_logger.log_it(
            '    INFO: simple_standard_file: do_open_dialog() cancelled', 2)
        filename = None
    else:
        patrick_logger.log_it(
            '    INFO: simple_standard_file: Selected file is "%s"' % filename,
            2)
    return filename


if __name__ == "__main__":
    patrick_logger.log_it(
        "ERROR: %s is not a program you can run. It is a collection of software to be used by other software."
        % sys.argv[0])
示例#44
0
def getQuoteParagraph(num_sents, num_words):
    log_it("    getQuoteParagraph() called", 2)
    return "―" + getNonQuoteParagraph(num_sents, num_words)
def print_usage():
    """Print a usage message to the terminal"""
    patrick_logger.log_it("INFO: print_usage() was called", 2)
    print('\n\n')
    print(__doc__)
def sort_archive():
    """Sort the tweet archive. There's no obvious benefit to doing so. Call the script
    with the --sort-archive flag to do this. Currently, this does not ever happen
    automatically, but that might change in the future.
    """
    patrick_logger.log_it("INFO: sort_archive() was called")
    try:
        tweet_archive = open(tweet_archive_path, 'r+')
    except IOError:
        patrick_logger.log_it("ERROR: can't open tweet archive file.", 0)
        sys.exit(3)
    try:
        all_tweets = tweet_archive.readlines() # we now have a list of strings
        patrick_logger.log_it("DEBUGGING: Tweets archive successfully opened", 2)
        patrick_logger.log_it("INFO:   Current size of tweets archive is " + str(tweet_archive.tell()) + " bytes.")
        patrick_logger.log_it("INFO:   And it is currently " + str(datetime.datetime.now()))
        patrick_logger.log_it("INFO: About to sort", 2)
        all_tweets.sort()
        tweet_archive.seek(0)
        patrick_logger.log_it("DEBUGGING: About to start writing.", 2)
        for a_tweet in all_tweets:
            tweet_archive.write(a_tweet.strip() + "\n")
        patrick_logger.log_it("DEBUGGING: Wrote all the tweets back to the archive.", 2)
        tweet_archive.truncate() # This is probably unnecessary: unless leading/trailing whitespace has crept into the tweets, the new file should be the same size as the old one. Still, better safe than sorry. But this is why a high debug level is needed to see this message.
        patrick_logger.log_it("DEBUGGING: Truncated the tweet archive file.", 4)
        tweet_archive.close()
        patrick_logger.log_it("DEBUGGING: Closed the tweet archive file.", 2)
    except IOError:
        patrick_logger.log_it("ERROR: Trouble operating on tweet archive file.", 0)
示例#47
0
def getHeadline(num_sents, num_words):
    log_it("    getHeadline() called", 2)
    ret = getParagraph(headlines_genny, num_sents=num_sents, num_words=num_words).upper()
    return ret
示例#48
0
def preprocess_dir(pathname):
    """Goes through the dir PATHNAME, which should be a directory that contains an
    album of Google photos, making sure that the directory is ready to have its
    contents uploaded. Google Photos directories, as of the time of this writing,
    seem to contain the following files:
        * `metadata.json`, which contains metadata about the album itself
        * individual photos, which are:
          1. the actual graphic or movie file (.png, .jpg, .mov, .mp4, .gif, others?)
          2. a .json file giving the graphic/movie file's metadata.

    Currently, "getting a dir ready for upload" means:
        1. Renames "degenerate" to "proper" extensions, e.g. a.jso -> a.json
        2. making sure that every individual-photo-metadata .json file has a
           corresponding image; if not, it attempts to download it.
        3. that's it. Nothing else.
    """
    patrick_logger.log_it('INFO: preprocessing directory: %s' % pathname)
    degenerate_extensions = {
        'json': ['js', 'jso'],
        'jpg': ['j', 'jpg_', 'JPG'],
    }
    olddir = os.getcwd()
    os.chdir(pathname)
    try:
        # first, fix "degenerate" extensions
        for ext in degenerate_extensions:
            for degen in degenerate_extensions[ext]:
                for f in glob.glob('*' + degen):
                    patrick_logger.log_it(
                        "INFO: renaming degenerate-extension file %s" % f, 2)
                    os.rename(f, os.path.splitext(f)[0] + '.%s' % ext)
                    try:  # Also try to rename any metadata for the photo
                        os.rename(f + ".json",
                                  os.path.splitext(f)[0] + '.%s.json' % ext)
                    except Exception:
                        pass  # Oh well.

        # now, try to make sure there's an actual graphic file for each metadata file.
        for f in [
                f for f in glob.glob('*json') if f.strip() != 'metadata.json'
        ]:
            image_f = os.path.splitext(
                f
            )[0]  # Dropping the .json still leaves us with something ending in .jpg, etc.)
            if not os.path.isfile(image_f):
                patrick_logger.log_it(
                    'INFO: image file %s is missing; downloading ...' %
                    image_f)
                with open(f) as data_file:
                    data = json.load(data_file)
                try:
                    with urllib.request.urlopen(data['url']) as response, open(
                            image_f, 'wb') as out_file:
                        shutil.copyfileobj(response, out_file)
                except Exception as e:
                    patrick_logger.log_it(
                        'WARNING: cannot download image for metadata file %s; the system said: \n%s'
                        % (f, e), 1)
    except Exception as e:
        patrick_logger.log_it("ERROR: %s" % e, 0)
    finally:
        os.chdir(olddir)
    patrick_logger.log_it(
        '    INFO: done preprocessing directory: %s' % pathname, 2)
示例#49
0
def getQuoteParagraph(num_sents, num_words):
    log_it("    getQuoteParagraph() called", 2)
    return "―" + getNonQuoteParagraph(num_sents, num_words)
        import tkinter.filedialog
        tkinter.Tk().withdraw()     # No root window
        filename = tkinter.filedialog.asksaveasfilename()
    except:         # If all else fails, ask the user to type a filename.
        filename = input('Under what name would you like to save the file? ')
    patrick_logger.log_it('    Selected file is %s' % filename, 2)
    return filename

def do_open_dialog(**kwargs):
    """Shows a dialog asking the user which file to open, or comes as close as
    possible to doing so. Any keyword arguments passed in are piped to the
    underlying function tkinter.filedialog.askopenfilename

    Returns a path to the file that the user wants to open.

    Adapted from more complex code in Zombie Apocalypse.
    """
    patrick_logger.log_it("DEBUGGING: simple_standard_file.do_open_dialog() called", 2)
    try:            # Otherwise, use TKinter if possible
        import tkinter
        import tkinter.filedialog
        tkinter.Tk().withdraw()     # No root window
        filename = tkinter.filedialog.askopenfilename(**kwargs)
    except:         # If all else fails, ask the user to type it.
        filename = input('What file would you like to open? ')
    patrick_logger.log_it('    Selected file is %s' % filename, 2)
    return filename

if __name__ == "__main__":
    patrick_logger.log_it("ERROR: %s is not a program you can run. It is a collection of software to be used by other software." % sys.argv[0])
示例#51
0
nonheadline_chain_length = 2
length_tolerance = 0.4      # e.g., 0.3 means the generated text can be up to 30% over or under the length of the requested text.
joyce_ratio = 1.4           # Goal ratio of Joyce to non-Joyce text in the resulting chains. 

import os, glob, sys
sys.path.append('/UlyssesRedux/code/')
from directory_structure import *           # Gets us the listing of file and directory locations.

sys.path.append(markov_generator_path)
from sentence_generator import *

import patrick_logger    # From https://github.com/patrick-brian-mooney/personal-library
from patrick_logger import log_it

patrick_logger.verbosity_level = 0
log_it("INFO: Imports successful, moving on", 2)

# Create the necessary sets of Markov chains once, at the beginning of the script's run
headlines_starts, headlines_mapping = buildMapping(word_list(aeolus_headlines_path), markov_length=headline_chain_length)

joyce_text_length = os.stat(aeolus_nonheadlines_path).st_size
mixin_texts_length = 0
for which_file in glob.glob('%s/07/*txt' % current_run_corpus_directory):
    mixin_texts_length += os.stat(which_file).st_size
the_word_list = word_list(aeolus_nonheadlines_path) * int(round( (mixin_texts_length / joyce_text_length) * joyce_ratio ))
for the_file in glob.glob('%s/07/*txt' % current_run_corpus_directory):
    the_word_list += word_list(the_file)
nonheadlines_starts, nonheadlines_mapping = buildMapping(the_word_list, markov_length=nonheadline_chain_length)

log_it("INFO: built mappings from both headlines and non-headlines files, moving on", 2)
示例#52
0
def write_story():
    output_text = [][:]

    # First, set up table of filenames
    section_filenames = [][:]
    for which_section in range(1, 1 + sections_in_chapter):
        section_filenames.append('%s/%02d.txt' % (wandering_rocks_sections_path, which_section))

    log_it("INFO: filenames table set up")
    log_it("  length is %d" % len(section_filenames), 2)
    log_it("\n    and the filenames table is:\n" + pformat(section_filenames))

    stats_file = open(wandering_rocks_stats_file)
    the_line = stats_file.readline()                  # Read and ignore the header line

    log_it("INFO: header read from stats file, about to parse stats file and start generating text")

    for which_section in range(1, 1 + sections_in_chapter):
        the_line = stats_file.readline()        # Read another line from the stats file
        log_it("INFO: Parsing the line '%s'." % the_line.split(), 2)
        sec, pars, sents, words = map(int, the_line.split(','))
        log_it("    sec: %d; pars: %d; sents: %d; words: %d" % (sec, pars, sents, words), 2)
        if sec != which_section:        # elementary sanity check
            raise IndexError("The stats file for Wandering Rocks is corrupt: section number %d encountered out of order." % sec)
        log_it("    generating based on sections %d, %d, %d." % (1 + (which_section + 17) % 19, which_section, (which_section + 1) % 19), 2)
        log_it("      asking for %d sentences with paragraph break probability of %f." % (sents, pars/sents))
        
        which_rocks_sections = [
                                 section_filenames[1 + (which_section + 17) % 19 - 1],
                                 section_filenames[which_section - 1],
                                 section_filenames[(which_section + 1) % 19 - 1]
                                ]
        starts, the_mapping = buildMapping_withMixins(chain_length, which_rocks_sections, glob.glob('%s/*txt' % mixin_texts_dir))

        output_text.append(gen_text(the_mapping, starts, markov_length=chain_length, sentences_desired=sents,
                paragraph_break_probability=(pars/sents)))

    return '\n*   *   *\n'.join(output_text)
def get_a_tweet():
    """Find a tweet. Keep trying until we find one that's an acceptable length. This
    function doesn't check to see if the tweet has been tweeted before; it just
    finds a tweet that's in acceptable length parameters.

    Normally this procedure asks DadaDodo for a single-sentence chunk of text, but
    note that if and only if -x or --extra-material-archive is in effect, the
    procedure asks for a random number of sentences between one and six. Most
    chunks of text generated from more than one sentence will be too long, which
    means that material accumulates in the archive faster.
    """
    patrick_logger.log_it("INFO: finding a tweet ...")
    the_length = 160
    the_tweet = ''
    sentences_requested = 1
    while not 45 < the_length < 141:
        if extra_material_archive_path:
            sentences_requested = random.choice(list(range(1, 4)))
            patrick_logger.log_it("\nINFO: We're asking for " + str(sentences_requested) + " sentences.", 2)
        if the_tweet and extra_material_archive_path:
            try:
                extra_material_archive_path_file = open(extra_material_archive_path, 'a')
                extra_material_archive_path_file.write(the_tweet + ' ')
                extra_material_archive_path_file.close()
                patrick_logger.log_it("INFO: Wrote tweet to extra material archive", 2)
            except IOError: # and others?
                patrick_logger.log_it("ERROR: Could not write extra material to archive", 0)
        the_tweet = gen_text(the_mapping, the_starts, markov_length=the_markov_length, sentences_desired=sentences_requested, paragraph_break_probability=0)
        the_tweet = the_tweet.strip()
        the_length = len(the_tweet)
        patrick_logger.log_it("\nINFO:  The tweet generated was: " + the_tweet + "\nINFO:     and the length of that tweet is: " + str(the_length))
    patrick_logger.log_it("OK, that's it, we found one")
    if extra_material_archive_path:	# End the paragraph that we've been accumulating during this run.
        try:
            extra_material_archive_path_file = open(extra_material_archive_path, 'a')
            extra_material_archive_path_file.write('\n\n') # Start a new paragraph in the extra material archive.
            extra_material_archive_path_file.close()
        except IOError: # and others?
            patrick_logger.log_it("Couldn't start new paragraph in extra material archive", 0)
    return the_tweet
    patrick_logger.log_it("INFO: print_usage() was called")
    print(__doc__)

def weighted_probability(the_length):
    """Make it more likely to post when more text is built up"""
    return 1 - math.e ** (-2.5e-05 * (the_length-3000))


the_title = "Discourse of " + datetime.date.today().strftime("%A, %d %B %Y")
the_blog_name = "AutoIrishLitDiscourses"
the_content_path = "/150/extras.txt"
normal_tags = ['Irish literature', 'automatically generated text', 'Patrick Mooney', 'dadadodo']
temporary_tags = []
the_content = ''

patrick_logger.log_it('INFO: Constants and variables set up; trying to read content', 2)

try:
    the_file = open(the_content_path)
    the_content = the_file.read()
    the_file.close()
except IOError:
    patrick_logger.log_it("ERROR: Couldn't open, or couldn't read, or couldn't close, the content file", 0)
    sys.exit(2)

the_maximum_roll = weighted_probability(len(the_content))
the_dice_roll = random.random()
patrick_logger.log_it('INFO: Length of content is ' + str(len(the_content)) + '\n   and the dice roll was ' + str(the_dice_roll) + '\n   And the maximum score to post at that length is ' + str(the_maximum_roll), 2)
if the_dice_roll < the_maximum_roll:
    # Make the request
    patrick_logger.log_it('INFO: Attempting to post the content', 2)
示例#55
0
def getNonQuoteParagraph(num_sents, num_words):
    log_it("    getNonQuoteParagraph() called", 2)
    return getParagraph(num_sents, num_words, nonheadline_chain_length, nonheadlines_mapping, nonheadlines_starts)
示例#56
0
    record_and_interpret(timestamp, transcript)


def monitor():
    """Intermittently schedule a test, run it, interpret it, and log the results."""
    log_it("INFO: beginning monitoring", 3)
    while True:
        current_time = datetime.datetime.now()
        next_run = current_time + datetime.timedelta(
            minutes=interval_between_pings
        )  # Add interval_between_pings to current time ...
        next_run = datetime.datetime(
            next_run.year, next_run.month, next_run.day, next_run.hour,
            next_run.minute // interval_between_pings * interval_between_pings,
            0)  # ... and round down
        next_run += datetime.timedelta(seconds=+random.randint(
            0, 60 * interval_between_pings - number_of_packets))
        log_it(
            "INFO: it's %s; scheduling next ping test for %s" %
            (current_time.isoformat(sep=' '), next_run.isoformat(sep=' ')), 3)
        sleep_time = (next_run - current_time).total_seconds()
        _thread.start_new_thread(schedule_test, (sleep_time, ))
        time.sleep(1 + sleep_time)


if __name__ == "__main__":
    log_it("INFO: beginning run ...", 3)
    startup()
    monitor()
    log_it("INFO: program reached normal termination", 5)
示例#57
0
def upload_photos(dir):
    """Upload all of the photos in the current directory. If a .json metadata file
    is available, (appropriate) data from it is passed to Flickr.
    """
    olddir = os.getcwd()
    try:
        os.chdir(dir)
        patrick_logger.log_it('INFO: about to upload photos in directory: %s' %
                              dir)
        default_folder_metadata = {'title': os.path.basename(dir)}
        folder_metadata = default_folder_metadata.copy()
        try:  # First, try to rename the folder to the album name
            with open(os.path.join(dir, 'metadata.json')) as json_file:
                data = json.load(json_file)
            folder_metadata.update(data)
            patrick_logger.log_it('    INFO: successfully read album metadata',
                                  3)
            if os.path.basename(
                    dir).strip() != folder_metadata['title'].strip():
                os.rename(
                    dir,
                    os.path.join(os.path.dirname(dir),
                                 folder_metadata['title'].strip()))
        except Exception:  # Oh well.
            patrick_logger.log_it(
                '    INFO: cannot read album metadata; using defaults', 2)

        default_data_fields = {
            'description': '',
            'tags': [],
            'title': ''
        }  # Fields that must appear in image file metadata
        image_file_extensions = [
            '*jpg', '*png', '*gif', '*avi', "*m4v", '*MOV'
        ]
        images = [][:]
        for ext in image_file_extensions:
            images.extend(glob.glob(ext))
        for image in sorted(list(set(images))):
            patrick_logger.log_it("    INFO: about to upload image %s" % image)
            try:  # First, get any available metadata
                with open(image + '.json') as json_file:
                    file_data = json.load(json_file)
                json_data = default_data_fields.copy()
                json_data.update(file_data)
                if image.strip() != json_data['title'].strip(
                ):  # If the filename doesn't match what the metadata says it should be ...
                    os.rename(image,
                              json_data['title'].strip())  # Rename the file
                    os.rename(image + '.json', json_data['title'].strip() +
                              '.json')  # And its metadata file
                    image = json_data['title'].strip(
                    )  # And track the new name instead of the old one
                patrick_logger.log_it(
                    '    INFO: successfully read photo metadata', 3)
            except Exception:
                json_data = default_data_fields.copy()
                json_data.update({'title': image})
                patrick_logger.log_it(
                    '    INFO: failed to read photo metadata; using defaults',
                    2)
            try:
                json_data['tags'] = '"%s"' % folder_metadata[
                    'title']  # Yes, just the single quoted string. Dump any other tags.
                flickr.upload(filename=image,
                              title=json_data['title'],
                              description=json_data['description'],
                              tags=json_data['tags'])
                patrick_logger.log_it("    INFO: successfully uploaded file",
                                      4)
                os.remove(image)
                try:
                    os.remove(image + '.json')
                except NameError:
                    pass  # If the metadata file doesn't exist, oh well.
            except Exception as e:
                patrick_logger.log_it(
                    "    INFO: unable to upload or delete file %s; the system said %s"
                    % (image, e))
    except Exception as e:
        patrick_logger.log_it('ERROR: system said %s' % e)
    finally:
        os.chdir(olddir)
import subprocess, pprint, getopt, sys, datetime, random

import patrick_logger # From https://github.com/patrick-brian-mooney/personal-library
import social_media
from social_media_auth import IrishLitTweets_client

from sentence_generator import *

# Set up default values
# patrick_logger.verbosity_level = 4    # uncomment this to set the starting verbosity level
chains_file = '/150/2chains.dat'        # The location of the compiled textual data.
extra_material_archive_path = ''        # Full path to a file. An empty string means don't archive (i.e., do discard) material that's too long.
tweet_archive_path = '/150/tweets.txt'  # If you don't like it, use -a on the command line

patrick_logger.log_it("INFO: WE'RE STARTING, and the verbosity level is " + str(patrick_logger.verbosity_level), 0)

the_markov_length, the_starts, the_mapping = read_chains(chains_file)

# Functions
def print_usage():
    """Print a usage message to the terminal"""
    patrick_logger.log_it("INFO: print_usage() was called")
    print(__doc__)

def sort_archive():
    """Sort the tweet archive. There's no obvious benefit to doing so. Call the script
    with the --sort-archive flag to do this. Currently, this does not ever happen
    automatically, but that might change in the future.
    """
    patrick_logger.log_it("INFO: sort_archive() was called")
def print_usage():    # Note that, currently, nothing calls this.
    """Print the docstring as a usage message to stdout"""
    patrick_logger.log_it("INFO: print_usage() was called")
    print(__doc__)
示例#60
0
def startup():
    """Execute necessary startup tasks."""
    check_ping_config()
    _thread.start_new_thread(schedule_daily_report_creation,
                             (current_data_store_name(), ))
    log_it("INFO: startup tasks complete", 3)