async def download_one(cc, base_url, semaphore, verbose): #semaphore는 동시 요청 수를 제한하는 동기화 장치. asyncio.Semaphore 객체를 받는다. #semaphore는 동기 카운터를 운영하다가, 최대 허용 수에 이르렀을 때만 코루틴이 블로킹된다. try: async with semaphore: #Semaphore 콘텍스트 관리자는 with문에 진입할 때 (혹은 acquire() 호출 시) 카운터를 감소시키고, 빠져나갈 때 (혹은 release() 호출시) 카운터를 증가시킨다. #카운터가 0보다 크면 계속되고, 0이 되면 블로킹된다. 초기 카운터는 세마포어 객체를 생성할 때 생성자에 인수를 넘겨 설정한다. (예시: semaphore=asyncio.Semaphore(concur_req)) image = await get_flag(base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: loop = asyncio.get_event_loop() loop.run_in_executor( None, save_flag, image, cc.lower() + '.gif') #<-run_in_executor는 스레드풀을 이용해 넘겨진 함수+인수를 돌린다. status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
async def download_one( client: httpx.AsyncClient, cc: str, base_url: str, semaphore: asyncio.Semaphore, verbose: bool, ): try: async with semaphore: image = await get_flag(client, base_url, cc) except httpx.HTTPStatusError as exc: res = exc.response if res.status_code == HTTPStatus.NOT_FOUND: status = DownloadStatus.not_found msg = f"not found {res.url}" else: raise else: await asyncio.to_thread(save_flag, image, f"{cc}.gif") status = DownloadStatus.ok msg = "OK" if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, verbose=False): try: image = get_flag(base_url, cc) # download_oneはrequests.exceptions.HTTPErrorをキャッチし、 # HTTPステータスコード404を処理します。 except requests.exeptions.HTTPError as exc: res = exc.response if res.status_code == 404: # ステータスコードが404なら、 # ここ独自のステータス(status)にHTTPStatus.not_foundを割り当てます。 # なお、HTTPStatusはEnumで、flags2_commonからインポートされます。 status = HTTPStatus.not_found msg = 'not found' else: # 例外HTTPErrorが404以外なら再度上げられ、 # 呼び出し元へと伝播されます。 raise else: save_flag(iamge, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' # コマンドラインの-v/--verboseはverbose(詳細表示)オプションで、デフォルトではオフです。 # これが指定されたら、進行状況を確認できるように国別コードとステータスメッセージを表示します。 if verbose: print(cc, msg) # downlaod_oneはnamedtupleのResultを返します。Resultにはstatusフィールドがあり、 # HTTPStatus.not_foundかHTTPStatus.okのどちらかの値が収容されています。 return Result(status, cc)
def download_one(cc, base_url, semaphore, verbose): """ 引数のsemaphoreにはasyncio.Semaphoreのインスタンスを指定します。 このクラスは並行して行うリクエストの数を制限するための同期用メカニズムです。 """ try: # システムが全体としてはブロックされないようにするため、 # semaphoreをyield from式の中でコンテキストマネージャとして使用します。 # semaphoreのカウンタが上限に達しているとき、このコルーチンだけがブロックされます。 with (yield from semaphore): # このwith文が終了すると、semaphoreのカウンタは1つ減じられます。 # これで、同じsemaphoreオブジェクトで待機しているであろう他のコルーチンインスタンスのブロックが解除されます。 image = yield from get_flag(base_url, cc) # 指定の国旗が見つからなかったときは、その旨をResultのステータスにセットします。 except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: # 上記以外の例外はすべて、raise X from Yという構文を使って # 国別コードとひも付けられた元の例外を収容したFetchErrorとして報告されます。 # この構文は、「PEP 3134 - Exception Chaining and Embedded Tracebacks」で導入されたものです。 raise FetchError(cc) from exc else: # # 実際に国旗の画像をディスクに保存するのはこの関数です。 # save_flag(image, cc.lower() + '.gif') # status = HTTPStatus.ok # msg = 'OK' # イベントループオブジェクトの参照を取得します。 loop = asyncio.get_event_loop() # run_in_executorの第!引数にはExecutorインスタンスを指定します。 # Noneならば、イベントループのデフォルトのスレッドプールExecutorが使用されます。 # 残りの引数は呼び出し可能オブジェクトとその位置引数です。 loop.run_in_executor(None, save_flag, image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, semaphore, verbose): # ➌ semaphore 参数是 asyncio.Semaphore 类的实例。Semaphore 类是同步装置,用于限制并发请求数量。 try: with (yield from semaphore): # ➍ 在yield from 表达式中把 semaphore 当成上下文管理器使用,防止阻塞整个系统:如果semaphore 计数器的值是所允许的最大值,只有这个协程会阻塞。 image = yield from get_flag(base_url, cc) # ➎ 退出这个with 语句后,semaphore 计数器的值会递减,解除阻塞可能在等待同一个 semaphore 对象的其他协程实例。 except web.HTTPNotFound: # ➏ 如果没找到国旗,相应地设置Result 的状态。 status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc # ➐ 其他异常当作FetchError 抛出, 传入国家代码, 并使用“PEP 3134 — Exception Chaining and Embedded Tracebacks”(https://www.python.org/dev/peps/pep-3134/)引入的raise X from Y 句法链接原来的异常。 else: save_flag(image, cc.lower() + '.gif') # ➑ 这个函数的作用是把国旗文件保存到硬盘中。 status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, verbose=False): try: image = get_flag(base_url, cc) except requests.exceptions.HTTPError as exc: res = exc.response if res.status_code == 404: status = HTTPStatus.not_found msg = 'not found' else: raise else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, semaphore, verbose): try: with (yield from semaphore): image = yield from get_flag(base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, verbose=False): try: image = get_flag(base_url, cc) except requests.exceptions.HTTPError as exc: res = exc.response if res.status_code == 404: #如果异常是404,则会被压下来,而不传到download_many函数 status = HTTPStatus.not_found #是个枚举成员 msg = 'not found' else: raise else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok #是个枚举成员 msg = 'ok' if verbose: print(cc, msg) return Result(status, cc) #具名元组的第一字段是个枚举成员,第二字段是个国旗代码字符串
async def download_one(session, cc, base_url, semaphore, verbose): # <3> try: async with semaphore: # <4> image = await get_flag(session, base_url, cc) # <5> except web.HTTPNotFound: # <6> status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc # <7> else: save_flag(image, cc.lower() + '.gif') # <8> status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, semaphore, verbose): try:#Semaphore 类是同步装置,用于限制并发请求数量。 with(yield from semaphore): #中把 semaphore 当成上下文管理器使用,防止阻塞整个系统:如果 semaphore 计数器的值是所允许的最大值,只有这个协程会阻塞。 #这段代码保证,任何时候都不会有超过 concur_req 个 get_flag 协程启动。 image = yield from get_flag(base_url, cc) except web.HTTPNotFound: status = HTTPNotFound msg = 'not found' except Exception as exc: raise FetchError(cc) from exc #其他异常当作 FetchError 抛出,传入国家代码 else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'ok' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, verbose=False): try: image = get_flag(base_url, cc) except requests.exceptions.HTTPError as exc: # 捕获 requests.exceptions.HTTPError 异常 res = exc.response if res.status_code == 404: # 特别处理 HTTP 404 错误 status = HTTPStatus.not_found # HTTPStatus 为 Enum 对象 msg = 'not found' else: # 重新抛出其他 HTTPError 异常,这些异常会向上冒泡,传给调用方 raise else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose: # 通过 -v/--verbose 选项来选择是否显示国家代码和状态信息 print(cc, msg) return Result(status, cc)
def download_one(cc: str, base_url: str, verbose: bool = False): try: image = get_flag(base_url, cc) except requests.exceptions.HTTPError as exc: # <2> res = exc.response if res.status_code == 404: status = HTTPStatus.not_found # <3> msg = 'not found' else: # <4> raise else: save_flag(image, f'{cc}.gif') status = HTTPStatus.ok msg = 'OK' if verbose: # <5> print(cc, msg) return Result(status, cc) # <6>
def download_one(cc, base_url, verbose=False): try: image = get_flag(base_url, cc) except httpx._exceptions.HTTPError as exc: # <2> resp = exc.response if resp is not None and resp.status_code == 404: status = HTTPStatus.not_found # <3> msg = 'not found' else: # <4> raise else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose: # <5> print(cc, msg) return Result(status, cc) # <6>
async def download_one(client, cc, base_url, semaphore, verbose): try: async with semaphore: image = await get_flag(client, base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: client.loop.run_in_executor(None, save_flag, image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, verbose=False): # 负责下载的基本函数 try: image = get_flag(base_url, cc) except requests.exceptions.HTTPError as exc: # 捕获异常,这里专门处理HTTP 404错误,其他异常不管 res = exc.response if res.status_code == 404: # 处理HTTP 404错误 status = HTTPStatus.not_found msg = 'not found' else: # 重新抛出其他异常,向上冒泡传给调用方 raise else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose: # 如果在命令行中设定了 -v/--verbose 选项,显示国家代码和状态消息(即详细模式) print(cc, msg) return Result(status, cc) # 返回一个命名元组,其中有个status字段,其值是HTTPStatus.not_found或HTTPStatus.ok
def download_one(cc, base_url, semaphore, verbose): try: with (yield from semaphore): image = yield from get_flag(base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: loop = asyncio.get_event_loop() # <1> loop.run_in_executor(None, # <2> save_flag, image, cc.lower() + '.gif') # <3> status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
async def download_one(cc, base_url, semaphore, verbose=False): try: with (await semaphore): image = await get_flag(base_url, cc) except aiohttp.web.HTTPNotFound: status = HTTPStatus.not_found msg = "not found" except Exception as exc: raise FetchError(cc) from exc else: # 与 flags2_asyncio 只有这里不同 loop = asyncio.get_event_loop() # None则使用事件循环默认的 ThreadPoolExecutor实例 loop.run_in_executor(None, save_flag, image, cc.lower() + ".gif") status = HTTPStatus.ok msg = "OK" if verbose and msg: print(cc, msg) return Result(status, cc)
async def download_one(session, cc, base_url, semaphore, verbose): try: async with semaphore: image = await get_flag(session, base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: loop = asyncio.get_event_loop() # 获取事件循环的引用,它在背后维护着一个ThreadPoolExecutor实例 loop.run_in_executor(None, save_flag, image, cc.lower() + '.gif') # 第一个参数是一个Executor实例,如果设为None,则使用事件循环默认的ThreadPoolExecutor实例。 status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
async def download_one(cc, base_url, semaphore, verbose): try: async with semaphore: image = await get_flag(base_url, cc) async with semaphore: country = await get_country(base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: country = country.replace(' ', '_') filename = '{}-{}.gif'.format(country, cc) loop = asyncio.get_event_loop() loop.run_in_executor(None, save_flag, image, filename) status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, verbose=False): try: image = get_flag(base_url, cc) country = get_country(base_url, cc) except requests.exceptions.HTTPError as exc: res = exc.response if res.status_code == 404: status = HTTPStatus.not_found msg = 'not found' else: # <4> raise else: country = country.replace(' ', '_') save_flag(image, '{}-{}.gif'.format(country, cc)) status = HTTPStatus.ok msg = 'OK' if verbose: print(cc, msg) return Result(status, cc)
async def download_one( session: aiohttp.ClientSession, # <4> cc: str, base_url: str, semaphore: asyncio.Semaphore, verbose: bool) -> Result: try: async with semaphore: # <5> image = await get_flag(session, base_url, cc) except aiohttp.ClientResponseError as exc: if exc.status == 404: # <6> status = HTTPStatus.not_found msg = 'not found' else: raise FetchError(cc) from exc # <7> else: save_flag(image, f'{cc}.gif') status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, semaphor, verbose): try: with (yield from semaphore): image = yield from get_flag(base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: # get a reference to the event loop object loop = asyncio.get_event_loop() # the first argument to run_in_executor is an executor instance; if None, the default thread pool executor of the event loop is used # the remaining arguments are the callble and its positional arguments loop.run_in_executor(None, save_flag, image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one( cc, base_url, semaphore, verbose ): # semaphore参数是asyncio.Semaphore类的实例,Semaphore类是同步装置,用于限制并发请求数量 try: with ( yield from semaphore ): # 在yield from表达式中把semaphore当成上下文管理器使用,防止阻塞整个系统:如果semaphore计数器的值是所允许的最大值,只有这个协程会阻塞 image = yield from get_flag( base_url, cc ) # 退出这个with语句后,semaphore计数器的值会递减,解除阻塞可能在等待同一个semaphore对象的其他协程实例 except web.HTTPNotFound: # 如果没有找到国旗,设置Result的状态 status = HTTPStatus.not_found msg = 'not found' except Exception as exc: # 其他异常当作FetchError抛出,传入国家代码 raise FetchError(cc) from exc else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
async def download_one(cc, base_url, semaphore, verbose): """二级被调协程,驱动get_flag()协程,并根据后者的行为,保存抓取的内容,或者处理并记录后者抛出的异常""" try: async with semaphore: #信号量用于向worker(协程)定额分配共享的资源,相当于N把lock,用于限制可以同时/异步执行的协程数量 image = await get_flag(base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found #是个枚举成员 msg = 'not found' except Exception as exc: raise FetchError( cc) from exc #显式异常链,exc异常被链接为FetchError()异常对象的__cause__属性 else: loop = asyncio.get_event_loop() #获取当前的事件循环对象 await loop.run_in_executor(None, save_flag, image, cc.lower() + '.gif' ) #通过事件循环维护线程池,并发执行save_flag()这个I/O函数 #save_flag(image, cc.lower()+'.gif') #不用线程池并发保存的版本,以供对比 status = HTTPStatus.ok #是个枚举成员 msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc) #返回一个记录了单个任务完成状态的具名元组
async def download_one(session: aiohttp.ClientSession, cc: str, base_url: str, semaphore: asyncio.Semaphore, verbose: bool) -> Result: try: async with semaphore: image = await get_flag(session, base_url, cc) # <1> async with semaphore: country = await get_country(session, base_url, cc) # <2> except aiohttp.ClientResponseError as exc: if exc.status == 404: status = HTTPStatus.not_found msg = 'not found' else: raise FetchError(cc) from exc else: filename = country.replace(' ', '_') # <3> filename = f'{filename}.gif' loop = asyncio.get_running_loop() loop.run_in_executor(None, save_flag, image, filename) status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, semaphor, verbose): try: # A semaphore is used as a context manager in a yield from expression so that the system as a whole is NOT blocked: only this coroutine is blocked while the semaphore counter is at the maximum allowed number # guarantees that NO MORE than concur_req instances of get_flags coroutines will be started at any time with (yield from semaphore): # When this with statement exits, the semaphore counter is decreased, unblocking some other coroutine instance that may be waiting for the same semaphore object image = yield from get_flag(base_url, cc) # if the flag was NOT found, just set the status for the Result accordingly except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: # Any other exception will be reported as a FetchError with the country code and the original exception chained using the raise X from Y syntax # Explicit Exception Chaining raise FetchError(cc) from exc else: # this function call actually saves the flag image to disk save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)
def download_one(cc, base_url, verbose=False): try: image = get_flag(base_url, cc) # download_one catches requests.exceptions.HTTPError to handle HTTP code 404 specifically except requests.exceptions.HTTPError as exc: res = exc.response if res.status_code == 404: # by setting its local status to HTTPStatus.not_found; HTTPStatus is an Enum imported from flags2_common status = HTTPStatus.not_found msg = 'not found' # Any other HTTPError exception is re-raised; other exceptions will just propagate to the caller else: raise else: save_flag(image, cc.lower() + '.gif') status = HTTPStatus.ok msg = 'OK' # If the -v/--verbose command-line option is set, the country code and status message will be displayed; this is how you'll see progress in the verbose mode if verbose: print(cc, msg) # The Result namedtuple returned by download_one, will have a status field with a value of HTTPStatus.not_found or HTTPStatus.ok return Result(status, cc)
def download_one(cc, base_url, semaphore, verbose): try: # I put the calls to get_flag and get_country in separate with blocks controlled by the semaphore because I want to keep it acquired for the shortest possible time with (yield from semaphore): image = yield from get_flag(base_url, cc) with (yield from semaphore): country = yield from get_country(base_url, cc) except web.HTTPNotFound: status = HTTPStatus.not_found msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: country = country.replace(' ', '_') filename = '{}-{}.gif'.format(country, cc) loop = asyncio.get_event_loop() loop.run_in_executor(None, save_flag, image, filename) status = HTTPStatus.ok msg = 'OK' if verbose and msg: print(cc, msg) return Result(status, cc)