def test_append(): html_path = Config.test_resource("daikoku1.html") parser = Parser("daikoku").parse_html(html_path) html_path2 = Config.test_resource("isogo1.html") parser2 = Parser("isogo").parse_html(html_path2) parser.append(parser2) timestamps = parser.get_timestamps() print(timestamps) assert timestamps['choka'] assert timestamps['newsline']
def test_cleansing_comment(): comment_path = Config.test_resource("comment1.txt") f = open(comment_path, encoding='utf-8') comment = f.read() f.close() comment = Converter.clensing_summary_comment(comment) assert comment print(comment) comment_path = Config.test_resource("comment2.txt") f = open(comment_path, encoding='utf-8') comment = f.read() f.close() comment2 = Converter.clensing_summary_comment(comment) assert comment2 print(comment2)
def test_html_no_data(): html_path = Config.test_resource("not_found1.html") f = open(html_path, encoding='euc_jp', errors='ignore') html = f.read() f.close() download = Download().load_config().check_html_no_data(html) assert not download.page_found
def test_honmoku_html_parseer(): html_path = Config.test_resource("honmoku1.html") parser = Parser("honmoku").parse_html(html_path) timestamps = parser.get_timestamps() print(timestamps) assert timestamps['choka'] assert timestamps['newsline']
def test_get_path(): assert Config.get_datastore_path("choka.csv") assert Config.get_download_path("choka_daikoku_2021_04_001.html") assert Config.test_resource("daikoku1.html") assert Config.get_url("daikoku") assert Config.get_download_file("daikoku", 2021, 4) assert Config.get_db_path() assert Config.get_config_path("config.toml")
def test_append_load(): datastore = Datastore(TEST_DB).reset_database() parser = Parser("daikoku").parse_html( Config.test_resource("daikoku1.html")) parser.export('csv') datastore.csv_import() parser2 = Parser("isogo").parse_html(Config.test_resource("isogo1.html")) parser2.export('csv') datastore.csv_import() assert datastore.load_counts['choka.csv'] == 19 parser3 = Parser("honmoku").parse_html( Config.test_resource("honmoku1.html")) parser3.export('csv') datastore.csv_import() assert datastore.load_counts['choka.csv'] == 54
def test_daikoku_html_parser(): html_path = Config.test_resource("daikoku1.html") parser = Parser("daikoku").parse_html(html_path) timestamps = parser.get_timestamps() print(parser.choka.columns) print(parser.comment.columns) print(parser.newsline.columns) assert timestamps['choka'] assert timestamps['newsline']
def test_initial_export(): datastore = Datastore(TEST_DB).reset_database() html_path = Config.test_resource("daikoku1.html") parser = Parser("daikoku").parse_html(html_path) parser.export('csv') datastore.csv_import() assert datastore.load_counts == { 'choka.csv': 12, 'comment.csv': 1, 'newsline.csv': 9 }
def test_not_found(): html_path = Config.test_resource("not_found1.html") assert not Parser("daikoku").parse_html(html_path)
def test_export(): html_path = Config.test_resource("daikoku1.html") parser = Parser("daikoku").parse_html(html_path) parser.export('csv')
def test_daikoku_only_newsline_parser(): html_path = Config.test_resource("daikoku1_newsline.html") parser = Parser("daikoku").parse_html(html_path) timestamps = parser.get_timestamps() assert not timestamps['choka'] assert timestamps['newsline']