def __init__(self): self.crawler = Crawler() self.region_url = { "hololive": "hololive", "en": "english", "id": "indonesia" }
class WebService(FlaskView): def __init__(self): self.crawler = Crawler() self.region_url = { "hololive": "hololive", "en": "english", "id": "indonesia" } @route("/health", methods=["GET"]) def health_check(self): return jsonify({"status": "OK"}), 200 @route("/schedules", methods=["GET"]) @route("<string:region_code>/schedules", methods=["GET"]) def get_schedules(self, region_code=None): if region_code == None: return jsonify(self.crawler.get_schedules()), 200 elif self.region_url[region_code] != None: return jsonify( self.crawler.get_schedules(self.region_url[region_code])), 200 @route("/schedules/today", methods=["GET"]) @route("<string:region_code>/schedules/today", methods=["GET"]) def get_today_schedules(self, region_code=None): if region_code == None: print("region code unspecified") return jsonify(self.crawler.get_today_schedules()), 200 elif self.region_url[region_code] != None: return jsonify( self.crawler.get_today_schedules( self.region_url[region_code])), 200
class TestCrawler: def setup_class(self): self.crawler = Crawler() def teardown_method(self): pass def test_get_date_schedules(self, container_soup): date_schedules = self.crawler.get_date_schedules(container_soup) assert 3 == len(date_schedules) assert "08/26" == date_schedules[0].date assert "08/27" == date_schedules[1].date assert "08/28" == date_schedules[2].date assert "天音かなた" == date_schedules[0].schedules[0].member assert "00:00" == date_schedules[0].schedules[0].time assert "猫又おかゆ" == date_schedules[2].schedules[0].member def test_get_date_tags(self, container_soup): container_tags = self.crawler.get_date_tags(container_soup) assert 0 == container_tags[0][0] assert 4 == container_tags[1][0] assert 16 == container_tags[2][0] assert "08/26" == container_tags[0][1] assert "08/27" == container_tags[1][1] assert "08/28" == container_tags[2][1] def test_generate_schedule(self, schedule_soup): schedule = self.crawler.generate_schedule(schedule_soup) assert schedule.member == "獅白ぼたん" assert schedule.time == "01:59" assert schedule.youtube_url == "https://www.youtube.com/watch?v=vgX_7SD8Qts"
#!/usr/bin/env python3 #coding:utf8 from service.crawler import Crawler if __name__ == '__main__': crawler = Crawler() crawler.run()
# 全部の歴史データを取得 data = requests.get('https://lab.isaaclin.cn/nCoV/api/area?latest=0') data = data.json() print("一共有{0}条记录。", len(data['results'])) res = data['results'] df = pd.DataFrame(res) for i in range(len(df)): df.iloc[i, 16] = time_c(df["updateTime"][0]) for i in range(len(df)): df.iloc[i, 16] = df.iloc[i, 16][5:10] # 去重部分代码 tem = df[df['updateTime'] == '03-02'] tem = tem.drop_duplicates(['provinceShortName'], keep='last') for i in date[1:41]: tem1 = df[df['updateTime'] == i] tem1 = tem1.drop_duplicates(['provinceName'], keep='last') tem = tem.append(tem1) tem = tem.reset_index(drop=True) timeline_bar().render_notebook() if __name__ == '__main__': crawler = Crawler() main() # crawler.run()
def lambda_handler(event, context): crawler = Crawler() crawler.run() return {'statusCode': 200, 'body': json.dumps('Crawled successfully')}
def setup_class(self): self.crawler = Crawler()