def process(novel): edb = EntityDatabase() edb.load(ENTITIES_FILENAME) data = Data() dialogue_stats = [] def process_chapter(chapter): global dialogue_stats dialogue_stats = [] def process_chapter_done(chapter): global dialogue_stats for i, (speaker, words) in enumerate(dialogue_stats): if speaker == data.NARRATOR: continue before, after = get_surrounding_speakers(i, dialogue_stats) score = len(words) if before and after and before != after: # with different persons before and after this speech # assume half of it was meant for the last person, half # of it for the next score //= 2 if before: data.add_talked_to(speaker, before, score) if after: data.add_talked_to(speaker, after, score) def process_chunk(chunk): global dialogue_stats if chunk.is_direct(): if not chunk.speaker: # print("unknowns speaker: {}".format(chunk.data['data'])) pass speaker = chunk.speaker or Data.UNKNOWN else: speaker = Data.NARRATOR words = [] for word in WORD_SPLIT.split(chunk.get_data()): word = word.lower().strip() if not word: continue words.append(word) data.add_word(word, speaker) dialogue_stats.append((speaker, words)) novel.for_each(chapter=process_chapter, chapter_done=process_chapter_done, chunk=process_chunk) data.save(args.input + '.stats')
def process(novel): edb = EntityDatabase() edb.load(ENTITIES_FILENAME) def process_chapter_done(chapter): chapter.identify_speakers(edb) novel.for_each(chapter_done=process_chapter_done)
def process(novel): edb = EntityDatabase() edb.load(ENTITIES_FILENAME) context = Context() def process_chapter_done(chapter): context.process_chapter_done(chapter) def process_sentence(sentence): context.process_sentence(sentence, edb) novel.for_each(chapter_done=process_chapter_done, sentence=process_sentence)
def process(novel): entities = EntityDatabase() entities.load(ENTITIES_FILENAME) entities.clear_unknown() # entities.clear_aliases() def process_sentence(sentence): for entity in entities.enumerate_entities(sentence.get_words()): entities.add(entity) novel.for_each(sentence=process_sentence) entities.save("generated." + ENTITIES_FILENAME) print(entities.get_stats())