def __load_chapter_text_into_db(self, chapter_paths: List[dict]): """ Load chapters text from the `stories` files into the chapters table. Uses Windows 1252 if UTF-8 fails. :param chapter_paths: List of chapter metadata including path, author id and chapter id :return: """ warnings = [] self.logger.info("...loading data from chapters table...") old_chapters, current, total = self.sql.read_table_with_total( self.working_original, "chapters") self.logger.info("...removing rows from existing chapters table...") self.sql.execute(self.working_open_doors, "TRUNCATE TABLE chapters;") self.logger.info("...loading text from chapter files...") for old_chapter in old_chapters: chapid = old_chapter['chapid'] chapter = [ chapter_path for chapter_path in chapter_paths if chapter_path['chap_id'] == str(chapid) ] if chapter: file = chapter[0]['path'] try: with open(file, 'r') as f: raw = f.read() except UnicodeDecodeError as err: warnings.append( f"Chapter with id {chapid} contains non-ASCII characters which are not valid " f"UTF-8. Trying Windows 1252...") with open(file, 'r', encoding='cp1252') as f: raw = f.read() text = normalize(raw) if old_chapter['endnotes']: text = text + f"\n\n\n<hr>\n{old_chapter['endnotes']}" query = """ INSERT INTO chapters (id, position, title, text, story_id, notes) VALUES (%s, %s, %s, %s, %s, %s); """ self.sql.execute( self.working_open_doors, query, (chapid, old_chapter['inorder'], old_chapter['title'], text, old_chapter['sid'], old_chapter['notes'])) current = print_progress(current, total, "chapters converted") # If there were any errors, display a warning for the user to check the affected chapters if warnings: self.logger.warning("\n".join(warnings)) self.logger.error( make_banner( '-', "There were warnings; check the affected chapters listed above to make sure curly quotes " "and accented characters are correctly displayed.")) return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM chapters;")
def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb, step_info: StepInfo): self.next_step = step_info.next_step self.sql = sql self.logger = logger self.config = config self.code_name = config['Archive']['code_name'] self.step = step_info.step_number self.step_path = self.create_working_sub_dir() banner = make_banner( '-', f' Running Step {step_info.step_number}: {step_info.step_description} ' ) self.logger.info(banner)
def run(self): """ Load original database or skip this step :return: True if this step was successful and can move on to step 02, False if an error occurred """ banner = make_banner('-', ' Running Step 01 ') # Copy the original database file into the working directory and then process try: if self.__check_config_and_continue(): self.logger.info(banner) if self.__copy_to_working_sub_dir() and self.__edit_db_copy(): self.finish() return True else: return False else: self.logger.info( "This step needs to be performed manually for archives that are not eFiction or Automated Archive") self.finish() return True except Exception as e: self.logger.error(e) return False
@atexit.register def save_config_and_exit(): print("Saving config...") config.save() if __name__ == "__main__": if len(sys.argv) > 1: code_name = sys.argv[1] else: code_name = input( ">> Please provide a short, lowercase code name with no spaces or punctuation for the archive " "you are processing (and make a note of it as you'll need it in future!):" ) banner_text = f"""Starting processing for archive "{code_name}"...""" banner = make_banner('=', banner_text) working_dir = sys.argv[2] if len( sys.argv) > 2 else create_or_set_working_dir(code_name) logger = Logging(working_dir, code_name).logger() logger.info(banner) config = ArchiveConfig(logger, code_name, working_dir) archive_config = config.config progress.continue_from_last(archive_config, logger)
def test_make_banner_with_padding(self): banner1 = make_banner('-', "TEXT", 3) self.assertEqual( "\n----------\n TEXT \n----------", banner1, "text should be padded by the specified number of spaces")
def test_make_banner_with_default_padding(self): banner2 = make_banner('-', "TEXT") self.assertEqual( "\n--------\n TEXT \n--------", banner2, "text should be padded by two spaces when no padding is specified")