def main(): graph = './graphs/verkko.db' text = u"Porsas kiiruhti kissan kanssa kotiin." tag_list = ['POS=NOUN','POS=VERB'] tagged = tag_fi.pos_tag(text) substitutes = generate_poem.theme_based_words(u'koira', graph, 7)[0] replaced = generate_poem.replace_all('fi', tagged, substitutes, tag_list, '', '_') morph = add_morph(tagged, replaced) print morph
def main(): corpus = './runoutta/runoutta_aakkosellinen.txt' graph = './graphs/verkko.db' chunk = get_chunk_of_corpus(corpus) tag_list = ['POS=NOUN', 'POS=VERB'] tagged = tag_fi.pos_tag(chunk) substitutes = theme_based_words(u'koira', graph, 7)[0] replaced = replace_all('fi', tagged, substitutes, tag_list, '', '_') generable = finnish_specific.add_morph(tagged, replaced) print generable
def main(): corpus = './runoutta/runoutta_aakkosellinen.txt' graph = './graphs/verkko.db' chunk = get_chunk_of_corpus(corpus) tag_list = ['POS=NOUN','POS=VERB'] tagged = tag_fi.pos_tag(chunk) substitutes = theme_based_words(u'koira', graph, 7)[0] replaced = replace_all('fi', tagged, substitutes, tag_list, '', '_') generable = finnish_specific.add_morph(tagged, replaced) print generable
def generate_poem(language, corpus, db, tag_list, theme='', tag_separator='_', newline='&'): """ Generates a poem. :param language: language id, current options en, fr :param corpus: poetry corpus :param db: graph database :param tag_list: list of POS-tags of the words that should be replaced :param theme: theme of the poem :param tag_separator: character that separates a word and a tag in the graph :param newline: the character that marks the place of the newline character :return: poem """ # Minimum proportion of new words in the poem min_replaced = 0 % 25 if language == 'fi': min_replaced = 0 # Function to use to tag English. # Options: # Stanford tagger (recommended): pos_tag_stanford tagger = 'pos_tag_stanford' # Function to use to tag French. # Options: # MElt tagger (recommended): pos_tag_melt # Stanford tagger (doesn't tag proper nouns): pos_tag_stanford tagger_fr = 'pos_tag_melt' # Error messages to show to the user error_message = 'ERROR\n\ Not enough words found around the theme \'%s\'. Try with another theme.' % theme error_message2 = 'ERROR\n\ Sorry, something went wrong. Try again!' error = ('', '', error_message, [], '') error2 = ('', '', error_message2, [], '') theme = unicode(theme) while True: for i in range(5): ready = False random_theme = False if theme == '' or theme.isspace(): theme = choose_random_theme(db, 1) random_theme = True # Try to solve the POS of the theme if the user hasn't provided it elif theme.find(tag_separator) == -1: theme = theme.lower() if language == 'en': (token, pos_tag) = tag_en.quick_pos_tag(theme, tagger=tagger)[0] elif language == 'fr': (token, pos_tag) = tag_fr.quick_pos_tag\ (theme, output='tuple', tagger=tagger_fr)[0] # Correspondences for the English LSA graph tag_dict = {'NN': 'n', 'VB': 'v', 'JJ': 'a', 'NNP': 'n'} if tag_separator == '\\' and tag_dict.has_key(pos_tag): theme = token + tag_separator + tag_dict[pos_tag] else: theme = token + tag_separator + pos_tag sys.stdout.write('Looking for theme words...') try: (words, goodness) = theme_based_words(theme, db, 20) print(' done.') except: if random_theme: theme = '' print('\nNot enough theme words found.') continue else: return error # Try to find a poem with enough replaceable words # orig_poem is a string of raw text orig_poem = get_chunk_of_corpus(corpus) orig_poem = helpers.capitalize_after_dot(orig_poem) for j in range(5): if language == 'en': tagged = tag_en.pos_tag(orig_poem, tagger=tagger) elif language == 'fr': tagged = tag_fr.pos_tag(orig_poem, tagger=tagger_fr) elif language == 'fi': tagged = tag_fi.pos_tag(orig_poem) tag_counter = 0 for t in tagged: if t[1] in tag_list: tag_counter = tag_counter + 1 if float(tag_counter) / float( len(tagged)) * 100 > min_replaced: break else: print('Not enough replaceable words.') sys.stdout.write('Replacing words...') try: replaced = replace_all(language, tagged, words, tag_list, tagger, tag_separator) ready = True print(' done.') break except Exception: if random_theme: theme = '' continue else: return error if not ready: if random_theme: return error2 else: return error sys.stdout.write('Analysing the new words...') if language == 'en': (analysed, replacing_words) = english_specific.add_morph(tagged, replaced) elif language == 'fr': (analysed, replacing_words) = french_specific.add_morph(tagged, replaced) elif language == 'fi': (analysed, replacing_words) = finnish_specific.add_morph(tagged, replaced) print(' done.') number_of_replaced = len(replacing_words) number_of_words = len(analysed) if number_of_words == 0: percent_replaced = 0 else: percent_replaced=float(number_of_replaced)/float(number_of_words)\ *100 if percent_replaced > min_replaced: message = str(round(percent_replaced, 0))\ +' percent of the words replaced.' break else: print(percent_replaced) print('Not enough words replaced.') if random_theme: theme = '' sys.stdout.write('Generating word forms...') if language == 'en': analysed = english_specific.correct_english(analysed) new_poem = morphg_en.generate_word_form(analysed) elif language == 'fr': analysed = french_specific.place_adjectives(analysed) analysed = french_specific.number_agreement(analysed) analysed = french_specific.gender_agreement(analysed) new_poem = morphg_fr.generate_word_form(analysed) elif language == 'fi': analysed = morphg_fi.generate_word_form(analysed) print(' done.') new_poem = new_poem.rstrip() theme = theme.split(tag_separator)[0] # Generate metadata. generate_metadata(replaced, goodness, 'runo_metadata.txt', newline) # Generate php file. lines = new_poem.split('\n') paragraph = '<p>' for line in lines: paragraph = paragraph + line + '<br />\n' paragraph = paragraph + '</p>' f = codecs.open('runo.php', 'w', 'utf-8') f.write('<h2>' + theme.capitalize() + '</h2>\n') f.write(paragraph) f.close() return (theme, orig_poem, new_poem, replacing_words, message)
def generate_poem(language, corpus, db, tag_list, theme='', tag_separator='_', newline='&'): """ Generates a poem. :param language: language id, current options en, fr :param corpus: poetry corpus :param db: graph database :param tag_list: list of POS-tags of the words that should be replaced :param theme: theme of the poem :param tag_separator: character that separates a word and a tag in the graph :param newline: the character that marks the place of the newline character :return: poem """ # Minimum proportion of new words in the poem min_replaced = 0 % 25 if language == 'fi': min_replaced = 0 # Function to use to tag English. # Options: # Stanford tagger (recommended): pos_tag_stanford tagger = 'pos_tag_stanford' # Function to use to tag French. # Options: # MElt tagger (recommended): pos_tag_melt # Stanford tagger (doesn't tag proper nouns): pos_tag_stanford tagger_fr = 'pos_tag_melt' # Error messages to show to the user error_message = 'ERROR\n\ Not enough words found around the theme \'%s\'. Try with another theme.' % theme error_message2 = 'ERROR\n\ Sorry, something went wrong. Try again!' error = ('', '', error_message, [], '') error2 = ('', '', error_message2, [], '') theme = unicode(theme) while True: for i in range(5): ready = False random_theme = False if theme == '' or theme.isspace(): theme = choose_random_theme(db, 1) random_theme = True # Try to solve the POS of the theme if the user hasn't provided it elif theme.find(tag_separator) == -1: theme = theme.lower() if language == 'en': (token, pos_tag) = tag_en.quick_pos_tag(theme, tagger=tagger)[0] elif language == 'fr': (token, pos_tag) = tag_fr.quick_pos_tag\ (theme, output='tuple', tagger=tagger_fr)[0] # Correspondences for the English LSA graph tag_dict = {'NN':'n', 'VB':'v', 'JJ':'a', 'NNP':'n'} if tag_separator == '\\' and tag_dict.has_key(pos_tag): theme = token + tag_separator + tag_dict[pos_tag] else: theme = token + tag_separator + pos_tag sys.stdout.write('Looking for theme words...') try: (words, goodness) = theme_based_words(theme, db, 20) print(' done.') except: if random_theme: theme = '' print('\nNot enough theme words found.') continue else: return error # Try to find a poem with enough replaceable words # orig_poem is a string of raw text orig_poem = get_chunk_of_corpus(corpus) orig_poem = helpers.capitalize_after_dot(orig_poem) for j in range(5): if language == 'en': tagged = tag_en.pos_tag(orig_poem, tagger=tagger) elif language == 'fr': tagged = tag_fr.pos_tag(orig_poem, tagger=tagger_fr) elif language == 'fi': tagged = tag_fi.pos_tag(orig_poem) tag_counter = 0 for t in tagged: if t[1] in tag_list: tag_counter = tag_counter + 1 if float(tag_counter)/float(len(tagged))*100 > min_replaced: break else: print('Not enough replaceable words.') sys.stdout.write('Replacing words...') try: replaced = replace_all(language, tagged, words, tag_list, tagger, tag_separator) ready = True print(' done.') break except Exception: if random_theme: theme = '' continue else: return error if not ready: if random_theme: return error2 else: return error sys.stdout.write('Analysing the new words...') if language == 'en': (analysed, replacing_words) = english_specific.add_morph(tagged, replaced) elif language == 'fr': (analysed, replacing_words) = french_specific.add_morph(tagged, replaced) elif language == 'fi': (analysed, replacing_words) = finnish_specific.add_morph(tagged, replaced) print(' done.') number_of_replaced = len(replacing_words) number_of_words = len(analysed) if number_of_words == 0: percent_replaced = 0 else: percent_replaced=float(number_of_replaced)/float(number_of_words)\ *100 if percent_replaced > min_replaced: message = str(round(percent_replaced, 0))\ +' percent of the words replaced.' break else: print(percent_replaced) print('Not enough words replaced.') if random_theme: theme = '' sys.stdout.write('Generating word forms...') if language == 'en': analysed = english_specific.correct_english(analysed) new_poem = morphg_en.generate_word_form(analysed) elif language == 'fr': analysed = french_specific.place_adjectives(analysed) analysed = french_specific.number_agreement(analysed) analysed = french_specific.gender_agreement(analysed) new_poem = morphg_fr.generate_word_form(analysed) elif language == 'fi': analysed = morphg_fi.generate_word_form(analysed) print(' done.') new_poem = new_poem.rstrip() theme = theme.split(tag_separator)[0] # Generate metadata. generate_metadata(replaced, goodness, 'runo_metadata.txt', newline) # Generate php file. lines = new_poem.split('\n') paragraph = '<p>' for line in lines: paragraph = paragraph + line + '<br />\n' paragraph = paragraph + '</p>' f = codecs.open('runo.php', 'w', 'utf-8') f.write('<h2>' + theme.capitalize() + '</h2>\n') f.write(paragraph) f.close() return (theme, orig_poem, new_poem, replacing_words, message)