def main(): redis = redis_.Redis(module_name='game_abstract_crawler') finished = {} s3_bucket = s3_.Bucket(module_name='game_abstract_crawler') while True: message = redis.blpop_websocket_message('game-abstract-list') redis.set_timestamp('archiver.heartbeat') if message['request_direction'] != 'outbound': raise RuntimeError('An outbound WebSocket message is expected,\ but got an inbound one.') message = message['response'] game_abstract_list = _parse(message) for game_abstract in game_abstract_list: uuid = game_abstract['uuid'] if uuid in finished: continue s3_bucket.put_game_abstract(game_abstract) logging.info(f'Archived the abstract of the game {uuid}.') finished[uuid] = game_abstract['start_time'] if len(finished) > 20000: now = datetime.datetime.now(tz=datetime.timezone.utc) stale_uuid_list = [] for uuid, start_time in finished.items(): if now - start_time > datetime.timedelta(days=1): stale_uuid_list.append(uuid) for stale_uuid in stale_uuid_list: del finished[stale_uuid]
def __init__(self, *, module_name: str, service_name: str): config = config_.get(module_name) config = config[service_name] config = config['logging'] config = config['redis'] super().__init__() self.__redis = redis_.Redis(module_name=module_name) self.__key = config['key'] self.__max_entries = config['max_entries']
def main(): redis = redis_.Redis(module_name='game_detail_crawler') s3_bucket = s3_.Bucket(module_name='game_detail_crawler') while True: message = redis.blpop_websocket_message('game-detail-list') redis.set_timestamp('archiver.heartbeat') fetch_time = datetime.datetime.now(tz=datetime.timezone.utc) if message['request_direction'] != 'outbound': raise RuntimeError('An outbound WebSocket message is\ expected, but got an inbound one.') message = message['response'] try: game_detail_.validate(message) except game_detail_.ValidationError as e: raise s3_bucket.put_game_detail(message) now = datetime.datetime.now(tz=datetime.timezone.utc) elapsed_time = now - fetch_time logging.info(f'Elapsed time to validate the message: {elapsed_time}')
def main(driver: WebDriver) -> None: fetch_time = datetime.datetime.now(tz=datetime.timezone.utc) driver.get('https://game.mahjongsoul.com/') canvas = _wait_for_page_to_present(driver) _get_screenshot(driver, '00-ページ読み込み.png') yostar_login = YostarLogin(module_name='game_abstract_crawler') # 「ログイン」ボタンをクリック click_canvas_within(driver, canvas, 540, 177, 167, 38) time.sleep(1) _get_screenshot(driver, '01-ログインボタンクリック.png') # 「メールアドレス」フォームをクリックしてフォーカスを移動 click_canvas_within(driver, canvas, 145, 154, 291, 30) time.sleep(1) email_address = yostar_login.get_email_address() # 「メールアドレス」フォームにメールアドレスを入力 ActionChains(driver).send_keys(email_address).perform() time.sleep(1) _get_screenshot(driver, '02-メールアドレス入力.png') # 「コードを受け取る」ボタンをクリック start_time = datetime.datetime.now(tz=datetime.timezone.utc) click_canvas_within(driver, canvas, 351, 206, 86, 36) time.sleep(1) _get_screenshot(driver, '03-コードを受け取るボタンクリック.png') # 「確認」ボタンをクリック click_canvas_within(driver, canvas, 378, 273, 60, 23) time.sleep(1) _get_screenshot(driver, '04-確認ボタンクリック.png') # 「認証コード」フォームをクリックしてフォーカスを移動 click_canvas_within(driver, canvas, 144, 211, 196, 30) time.sleep(1) auth_code = yostar_login.get_auth_code( start_time=start_time, timeout=datetime.timedelta(minutes=1)) # 「認証コード」フォームに認証コードを入力 ActionChains(driver).send_keys(auth_code).perform() time.sleep(1) _get_screenshot(driver, '05-認証コード入力.png') # 「ログイン」ボタンをクリック click_canvas_within(driver, canvas, 209, 293, 163, 37) redis = redis_.Redis(module_name='game_abstract_crawler') redis.delete('archiver.heartbeat') failure_count = 0 while True: try: _after_login(fetch_time, canvas, redis) except RefreshRequest: failure_count += 1 timestamp = redis.get_timestamp('archiver.heartbeat') if timestamp is None: if failure_count >= 3: # ログイン~観戦画面表示を3回試みても # `archiver` サービスがまったく動かなかった.つまり # # - `archiver` サービスが停止している, # - ログインできなかった,もしくは # - 観戦画面への画面遷移が不可能になっている # # などの場合. raise RuntimeError( '`archiver` service does not seem to run.') else: # 一度は観戦画面への画面遷移ができていた場合. failure_count = 0 # `archiver` サービスが5分以上動いていないならば # 例外を投げて終了する. now = datetime.datetime.now(tz=datetime.timezone.utc) threshold = datetime.timedelta(minutes=5) if timestamp is not None and now - timestamp > threshold: raise RuntimeError( '`archiver` service seems stuck for 5 minutes.') while True: fetch_time = datetime.datetime.now( tz=datetime.timezone.utc) try: logging.warning( 'Requesting the driver to refresh the page...') driver.refresh() logging.info('The driver has refreshed the page.') break except TimeoutException: logging.warning( 'Failed to refresh the page. Trying again\ requesting the driver to refresh the page after 1-minute sleep...') time.sleep(60) canvas = _wait_for_page_to_present(driver) continue
import flask import mahjongsoul_sniffer.redis as redis_ app = flask.Flask(__name__) _HTML_PREFIX = pathlib.Path( '/srv/mahjongsoul-sniffer/game-detail-crawler') _LOG_PREFIX = pathlib.Path( '/var/log/mahjongsoul-sniffer/game-detail-crawler') _redis = redis_.Redis(module_name='game_detail_crawler') @app.route('/') def top_page(): return flask.send_from_directory( _HTML_PREFIX, 'index.html', mimetype='text/html') @app.route('/js/<path:filename>') def js(filename): return flask.send_from_directory( _HTML_PREFIX / 'js', filename, mimetype='text/javascript') @app.route('/screenshots.json')
def __init__(self, *, module_name: str, config: dict): self.__redis = redis_.Redis(module_name=module_name) jsonschema.validate(instance=config, schema=_CONFIG_SCHEMA) self.__config = config self.__websocket_message_queue = {}
import flask import mahjongsoul_sniffer.redis as redis_ app = flask.Flask(__name__) _HTML_PREFIX = pathlib.Path( '/srv/mahjongsoul-sniffer/game-abstract-crawler') _LOG_PREFIX = pathlib.Path( '/var/log/mahjongsoul-sniffer/game-abstract-crawler') _redis = redis_.Redis(module_name='game_abstract_crawler') @app.route('/') def top_page(): return flask.send_from_directory( _HTML_PREFIX, 'index.html', mimetype='text/html') @app.route('/js/<path:filename>') def js(filename): return flask.send_from_directory( _HTML_PREFIX / 'js', filename, mimetype='text/javascript') @app.route('/screenshots.json')