示例#1
0
    def run(cls):
        print("### スクレイピング開始 ###")

        pokemon_df = cls.__init_pokemon_df(Settings.COLUMNS)
        detail_page_html_paths = sorted(
            glob(
                os.path.join(os.path.dirname(os.path.dirname(cls.FILE_PATH)),
                             "html/detail_page/*.html")))
        for detail_page_html_path in detail_page_html_paths:
            pokemon = Pokemon()
            html_str = Utils.read_html(detail_page_html_path)
            detail_page_bs = BeautifulSoup(html_str, "html.parser")

            pokemon.no, pokemon.name = re.match(
                r".*/([0-9]+)_(.+).html", detail_page_html_path).groups()
            pokemon.types = cls.__get_types(detail_page_bs)
            pokemon.abilities = cls.__get_abilities(detail_page_bs)
            pokemon.status = cls.__get_status(detail_page_bs)
            pokemon.is_final = cls.__get_is_final(detail_page_bs)
            pokemon.region = cls.__get_region(pokemon.no, pokemon.name)
            pokemon.is_mega_evolution = cls.__get_is_mega_evolution(
                pokemon.name)

            print(pokemon)

            pokemon_srs = pd.Series(data=pokemon.reshape_to_list(),
                                    index=Settings.COLUMNS)
            pokemon_df = pokemon_df.append(pokemon_srs, ignore_index=True)

        stat_columns = ["HP", "こうげき", "ぼうぎょ", "とくこう", "とくぼう", "すばやさ"]
        stat_db = pokemon_df.loc[:, stat_columns].values.tolist()
        pokemon_df["同一種族値"] = [
            Utils.is_same_status(i, stat, stat_db)
            for i, stat in enumerate(stat_db)
        ]
        csv_path = Utils.make_dir(
            os.path.dirname(os.path.dirname(cls.FILE_PATH)), "output")
        pokemon_df.to_csv(os.path.join(csv_path, "pokemon_db.csv"),
                          index=False,
                          encoding="utf-8")
        print("FILE: {} を出力しました。".format(
            os.path.join(csv_path, "pokemon_db.csv")))

        print("### スクレイピング終了 ###")