Пример #1
0
    def _convert_story_tags(self, old_story):
        old_tags = {
            'rating': key_find('rid', old_story, '').split(','),
            'categories': key_find('catid', old_story, '').split(','),
            'classes': key_find('classes', old_story, '').split(','),
            'characters': key_find('charid', old_story, '').split(',')
        }
        ratings = [r['id'] for r in self.ratings if str(r['original_tagid']) in old_tags['rating']]
        categories = [c['id'] for c in self.categories if str(c['original_tagid']) in old_tags['categories']]
        classes = [c['id'] for c in self.classes if str(c['original_tagid']) in old_tags['classes']]
        characters = [c['id'] for c in self.classes if str(c['original_tagid']) in old_tags['characters']]

        return {
            'rating': ratings,
            'categories': categories,
            'classes': classes,
            'characters': characters
        }
Пример #2
0
    def convert_stories(self, language_code):
        """
        Convert eFiction stories to the Open Doors format. Note that we leave all the tag columns (rating, relationships,
        tags, categories etc) empty because they are all in the `tags` table and will be populated with AO3 tags when
        this archive is processed in the ODAP.
        :return: The Open Doors stories table as a dict.
        """
        self.logger.info("Converting stories...")
        old_stories, current, total = self.sql.read_table_with_total(
            self.working_original, "stories")
        for old_story in old_stories:
            new_story = {
                'id': old_story['sid'],
                'title': key_find('title', old_story, '').strip(),
                'summary': normalize(old_story['summary']),
                'notes': key_find('storynotes', old_story, '').strip(),
                'date': str(old_story['date']),
                'updated': str(old_story['updated']),
                'language_code': language_code
            }

            self.logger.debug(
                f"Converting story metadata for '{new_story['title']}'")
            query = f"""
            INSERT INTO stories (id, title, summary, notes, date, updated, language_code)
            VALUES {new_story['id'], new_story['title'], new_story['summary'],
                    new_story['notes'], new_story['date'], new_story['updated'], new_story['language_code']};
            """
            self.sql.execute(self.working_open_doors, query)

            self.logger.debug(f"  tags...")
            tags = self._convert_story_tags(old_story)
            self._convert_tags_join(new_story, tags)

            self.logger.debug(f"  authors...")
            self._convert_author_join(new_story, old_story['uid'])
            # Find if there are any coauthors for the work
            coauthors = self.fetch_coauthors(new_story)
            for coauthor in coauthors:
                self._convert_author_join(new_story, coauthor)

            current = print_progress(current, total, "stories converted")
        return self.sql.execute_and_fetchall(self.working_open_doors,
                                             "SELECT * FROM stories")
Пример #3
0
    def convert_stories(self, language_code):
        """
        Convert eFiction stories to the Open Doors format.
        :return: The Open Doors stories table as a dict.
        """
        self.logger.info("Converting stories...")
        old_stories, current, total = self.sql.read_table_with_total(self.working_original, "stories")
        for old_story in old_stories:
            new_story = {
                'id': old_story['sid'],
                'title': key_find('title', old_story, '').strip(),
                'summary': normalize(old_story['summary']),
                'notes': key_find('storynotes', old_story, '').strip(),
                'date': str(old_story['date']),
                'updated': str(old_story['updated']),
                'language_code': language_code
            }

            self.logger.debug(f"Converting story metadata for '{new_story['title']}'")
            query = f"""
            INSERT INTO stories (id, title, summary, notes, date, updated, language_code)
            VALUES {new_story['id'], new_story['title'], new_story['summary'],
                    new_story['notes'], new_story['date'], new_story['updated'], new_story['language_code']};
            """
            self.sql.execute(self.working_open_doors, query)

            self.logger.debug(f"  tags...")
            tags = self._convert_story_tags(old_story)
            self._convert_tags_join(new_story, tags)

            self.logger.debug(f"  authors...")
            self._convert_author_join(new_story, old_story['uid'])
            coauthors = []
            if key_find('coauthors', old_story):
                for authorid in old_story['coauthors'].split(","):
                    coauthors.append(authorid.strip())
            for coauthor in coauthors:
                self._convert_author_join(new_story, coauthor)

            current = print_progress(current, total, "stories converted")
        return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM stories")
Пример #4
0
    def __load_chapter_text_into_db(self, chapter_paths: List[dict]):
        """
        Load chapters text from the `stories` files into the chapters table. Uses Windows 1252 if UTF-8 fails.
        :param chapter_paths: List of chapter metadata including path, author id and chapter id
        :return:
        """
        warnings = []
        self.logger.info("...loading data from chapters table...")
        old_chapters, current, total = self.sql.read_table_with_total(
            self.working_original, "chapters")

        self.logger.info("...removing rows from existing chapters table...")
        self.sql.execute(self.working_open_doors, "TRUNCATE TABLE chapters;")

        self.logger.info("...loading text from chapter files...")
        for old_chapter in old_chapters:
            chapid = old_chapter['chapid']
            chapter = [
                chapter_path for chapter_path in chapter_paths
                if chapter_path['chap_id'] == str(chapid)
            ]
            if chapter:
                file = chapter[0]['path']
                try:
                    with open(file, 'r', encoding="utf-8") as f:
                        raw = f.read()
                except UnicodeDecodeError as err:
                    warnings.append(
                        f"Chapter with id {chapid} contains non-ASCII characters which are not valid "
                        f"UTF-8. Trying Windows 1252...")
                    try:
                        with open(file, 'r', encoding='cp1252') as f:
                            raw = f.read()
                    except UnicodeDecodeError as err:
                        warnings.append(
                            f"Chapter with id {chapid} contains non-ASCII characters which are not valid "
                            f"Windows 1252. Trying Latin 1...")
                        with open(file, 'r', encoding='latin-1') as f:
                            raw = f.read()

                text = normalize(raw)
                if key_find('endnotes', old_chapter):
                    text = text + f"\n\n\n<hr>\n{old_chapter['endnotes']}"

                query = """
                    INSERT INTO chapters (id, position, title, text, story_id, notes) 
                    VALUES (%s, %s, %s, %s, %s, %s);
                """
                self.sql.execute(
                    self.working_open_doors, query,
                    (chapid, old_chapter['inorder'], old_chapter['title'],
                     text, old_chapter['sid'], old_chapter['notes']))
            current = print_progress(current, total, "chapters converted")

        # If there were any errors, display a warning for the user to check the affected chapters
        if warnings:
            self.logger.warning("\n".join(warnings))
            self.logger.error(
                make_banner(
                    '-',
                    "There were warnings; check the affected chapters listed above to make sure curly quotes "
                    "and accented characters are correctly displayed."))
        return self.sql.execute_and_fetchall(self.working_open_doors,
                                             "SELECT * FROM chapters;")