def __load_chapter_text_into_db(self, chapter_paths: List[dict]): """ Load chapters text from the `stories` files into the chapters table. Uses Windows 1252 if UTF-8 fails. :param chapter_paths: List of chapter metadata including path, author id and chapter id :return: """ warnings = [] self.logger.info("...loading data from chapters table...") old_chapters, current, total = self.sql.read_table_with_total( self.working_original, "chapters") self.logger.info("...removing rows from existing chapters table...") self.sql.execute(self.working_open_doors, "TRUNCATE TABLE chapters;") self.logger.info("...loading text from chapter files...") for old_chapter in old_chapters: chapid = old_chapter['chapid'] chapter = [ chapter_path for chapter_path in chapter_paths if chapter_path['chap_id'] == str(chapid) ] if chapter: file = chapter[0]['path'] try: with open(file, 'r') as f: raw = f.read() except UnicodeDecodeError as err: warnings.append( f"Chapter with id {chapid} contains non-ASCII characters which are not valid " f"UTF-8. Trying Windows 1252...") with open(file, 'r', encoding='cp1252') as f: raw = f.read() text = normalize(raw) if old_chapter['endnotes']: text = text + f"\n\n\n<hr>\n{old_chapter['endnotes']}" query = """ INSERT INTO chapters (id, position, title, text, story_id, notes) VALUES (%s, %s, %s, %s, %s, %s); """ self.sql.execute( self.working_open_doors, query, (chapid, old_chapter['inorder'], old_chapter['title'], text, old_chapter['sid'], old_chapter['notes'])) current = print_progress(current, total, "chapters converted") # If there were any errors, display a warning for the user to check the affected chapters if warnings: self.logger.warning("\n".join(warnings)) self.logger.error( make_banner( '-', "There were warnings; check the affected chapters listed above to make sure curly quotes " "and accented characters are correctly displayed.")) return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM chapters;")
def convert_stories(self, language_code): """ Convert eFiction stories to the Open Doors format. Note that we leave all the tag columns (rating, relationships, tags, categories etc) empty because they are all in the `tags` table and will be populated with AO3 tags when this archive is processed in the ODAP. :return: The Open Doors stories table as a dict. """ self.logger.info("Converting stories...") old_stories, current, total = self.sql.read_table_with_total( self.working_original, "stories") for old_story in old_stories: new_story = { 'id': old_story['sid'], 'title': key_find('title', old_story, '').strip(), 'summary': normalize(old_story['summary']), 'notes': key_find('storynotes', old_story, '').strip(), 'date': str(old_story['date']), 'updated': str(old_story['updated']), 'language_code': language_code } self.logger.debug( f"Converting story metadata for '{new_story['title']}'") query = f""" INSERT INTO stories (id, title, summary, notes, date, updated, language_code) VALUES {new_story['id'], new_story['title'], new_story['summary'], new_story['notes'], new_story['date'], new_story['updated'], new_story['language_code']}; """ self.sql.execute(self.working_open_doors, query) self.logger.debug(f" tags...") tags = self._convert_story_tags(old_story) self._convert_tags_join(new_story, tags) self.logger.debug(f" authors...") self._convert_author_join(new_story, old_story['uid']) # Find if there are any coauthors for the work coauthors = self.fetch_coauthors(new_story) for coauthor in coauthors: self._convert_author_join(new_story, coauthor) current = print_progress(current, total, "stories converted") return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM stories")
def convert_stories(self): """ Convert eFiction stories to the Open Doors format. :return: The Open Doors stories table as a dict. """ self.logger.info("Converting stories...") old_stories, current, total = self.sql.read_table_with_total( self.working_original, "stories") for old_story in old_stories: new_story = { 'id': old_story['sid'], 'title': (old_story['title'] or '').strip(), 'summary': normalize(old_story['summary']), 'notes': (old_story['storynotes'] or '').strip(), 'date': str(old_story['date']), 'updated': str(old_story['updated']), } self.logger.debug( f"Converting story metadata for '{new_story['title']}'") query = f""" INSERT INTO stories (id, title, summary, notes, date, updated) VALUES {new_story['id'], new_story['title'], new_story['summary'], new_story['notes'], new_story['date'], new_story['updated']}; """ self.sql.execute(self.working_open_doors, query) self.logger.debug(f" tags...") tags = self._convert_story_tags(old_story) self._convert_tags_join(new_story, tags) self.logger.debug(f" authors...") self._convert_author_join(new_story, old_story['uid']) coauthors = [] if old_story[ 'coauthors'] is not None and old_story['coauthors'] != "": for authorid in old_story['coauthors'].split(","): coauthors.append(authorid.strip()) for coauthor in coauthors: self._convert_author_join(new_story, coauthor) current = print_progress(current, total, "stories converted") return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM stories")