示例#1
0
 def __init__(self):
   self.custom_dictionary = CustomDictionary()
示例#2
0
    elif o == "--input":
      input_file = a
    else:
      print "Unhandled Option\n"
      logger.critical("Unhandled Option")
      usage()
      sys.exit(2)
  if not input_file:
    print "No input file set, use --input option."
    usage()
    logger.critical("No input file set")
    sys.exit(2)
      
  try:
    raw_tex = open(input_file, "r").read()
  except IOError as e:
    print "Can't read input file:" + str(e) + "\n"
    logger.critical("Can't read input file : " + str(e))
    sys.exit(2)
  
  custom_dictionary = CustomDictionary(no_dictionary_update=no_dictionary_update)
  haiku_list = find_haiku_in_tex(raw_tex, custom_dictionary) 
  custom_dictionary.save_dict()
  logger.info("Found the following haiku: " + str(haiku_list))
  if len(haiku_list)==0:
    print "Found no Haiku, sorry :("
  else:
    print "Found the following Haiku:"
    for haiku in haiku_list:
      print haiku
示例#3
0
class HaikuFinder(object):
  def __init__(self):
    self.custom_dictionary = CustomDictionary()

  def save_dict(self):
    self.custom_dictionary.save_dict();

  def find_haiku_in_text(self, raw_text):    
    haiku_found = []    
    
    #Split at double line breaks
    paragraphs = raw_text.split("\n\n")
    
    #ignore single line breaks, tabs - replace with spaces
    #remove all non-alphanumeric characters/non-punctuation characters   
    paragraphs = [nonalphanumeric_pattern.sub(' ',p).strip() for p in paragraphs]

    for paragraph in paragraphs:
      clauses = split_at_punctuation(paragraph)
      if len(clauses) < 3: 
        continue
      clauses_with_syllable_count = self.count_syllables_in_clauses(clauses)
      haiku_found.extend(self.find_haiku_in_clauses(clauses_with_syllable_count))

    return haiku_found  

  def find_haiku_in_clauses(self, clauses):
    haiku_found = []
    for i in range(0, len(clauses)-2):
      if [clauses[i][2], clauses[i+1][2], clauses[i+2][2]] == [5,7,5]:
        haiku_found.append(''.join(clauses[i][0] + clauses[i][1] + " " \
          + clauses[i+1][0] + clauses[i+1][1] + " " \
          + clauses[i+2][0] + clauses[i+2][1]))
    return haiku_found

  #Returns None if word isn't recognised
  def count_syllables_in_clause(self, clause):
    return reduce(sum_syllables, [self.number_of_syllables(word) for word in clause.split()])

  def count_syllables_in_clauses(self, clauses_with_punctuation):
    return [(clause, ending_punctuation, self.count_syllables_in_clause(clause)) 
                      for (clause, ending_punctuation) in clauses_with_punctuation]

  #returns None if word isn't recognised
  def number_of_syllables(self, word):  
    if word == "": 
      return 0

    #If the word is hypenated then use the sum of the word on each side of the dash  
    if "-" in word:
      return reduce(sum_syllables, [self.number_of_syllables(w) for w in word.split("-")])
    
    is_vowel_sound = lambda syllable: isdigit(syllable[-1])
    try:    
      #cmudict actually returns a list of phonetics, so by default choose first length
      return len([1 for syllable in cmudictionary[word.lower()][0] if is_vowel_sound(syllable) ])
                
    except KeyError as e:
      num = self.custom_dictionary.number_of_syllables(word)
      if num is None:
        return None
      return num