def make_dir(dir_path: str) -> None: """ make the directories recursively. \n :param dir_path: The path to that directory. :return: True on success. | False on failure. """ try: os_makedirs(dir_path, exist_ok=True) except FileExistsError: Logger.log([make_dir, f"{dir_path} exists. "])
def set_max_worker_size(self, size): """ Set the max_worker_size. \n :param size: int :return: None """ self.MAX_WORKER_SIZE = size Logger.log([ f"[{__class__.__name__}]", self.set_max_worker_size, f'set max_worker_size to: [{self.MAX_WORKER_SIZE}]' ])
def adjust_worker_size(self): """ Adjust worker size by the length of self.url_list. max is 10. \n :return: None """ worker_size = len(self.url_list) // self.work_per_worker self.worker_size = worker_size + 1 if worker_size % self.work_per_worker else worker_size self.worker_size = self.worker_size if self.worker_size < self.MAX_WORKER_SIZE else self.MAX_WORKER_SIZE Logger.log([ f"[{__class__.__name__}]", self.adjust_worker_size, f"adjusted worker size to: [{self.worker_size}]" ])
def fill_up_self_work_queue(self): """ Fill up self.work_queue which's consists of tasks. :return: None """ Logger.log([ f"[{__class__.__name__}]", self.fill_up_self_work_queue, 'Start filling up work queue...' ]) queue_lock.acquire() [self.work_queue.put((url_dict, )) for url_dict in self.url_list] queue_lock.release() Logger.log([ f"[{__class__.__name__}]", self.fill_up_self_work_queue, 'Done. ' ])
def rest(base=None, gain=1.0, gap=None) -> None: """ snap a little bit\n [1.3-3.6]s + base\n :param base: base time line. :param gain: The amplifier. :param gap: Iterable obj but not dict, snap + random in the gap """ snap = [random_randint(1, 3), random_randint(1, 3), random_randint(1, 3)] ndigit = [random_randint(1, 3), random_randint(1, 3), random_randint(1, 3)] random_shuffle(ndigit) choice = random_choice([0, 1, 2, 3, 4, 5]) if choice == 0: snap = (snap[0] / (ndigit[ndigit[0] - 1]) + snap[1] / (10**ndigit[ndigit[1] - 1]) + snap[2] / (10**ndigit[ndigit[2] - 1])) elif choice == 1: snap = (snap[0] / (ndigit[ndigit[0] - 1]) + snap[2] / (10**ndigit[ndigit[1] - 1]) + snap[1] / (10**ndigit[ndigit[2] - 1])) elif choice == 2: snap = (snap[1] / (ndigit[ndigit[0] - 1]) + snap[0] / (10**ndigit[ndigit[1] - 1]) + snap[2] / (10**ndigit[ndigit[2] - 1])) elif choice == 3: snap = (snap[1] / (ndigit[ndigit[0] - 1]) + snap[2] / (10**ndigit[ndigit[1] - 1]) + snap[0] / (10**ndigit[ndigit[2] - 1])) elif choice == 4: snap = (snap[2] / (ndigit[ndigit[0] - 1]) + snap[0] / (10**ndigit[ndigit[1] - 1]) + snap[1] / (10**ndigit[ndigit[2] - 1])) elif choice == 5: snap = (snap[0] / (ndigit[ndigit[0] - 1]) + snap[1] / (10**ndigit[ndigit[1] - 1]) + snap[0] / (10**ndigit[ndigit[2] - 1])) snap = round(snap, ndigit[choice % 3]) if snap < 1.5: snap += 1 if base: snap += int(base) if gain != 1: snap *= gain if gap is not None: snap += random_choice(gap) Logger.log([rest, f"Now resting for [{snap}]s... "]) time_sleep(int(snap)) Logger.log([rest, "Done. "])
def grouping_workers(self): worker_names = [] Logger.log([ f"[{__class__.__name__}]", self.grouping_workers, 'Start grouping up workers...' ]) for i in range(self.worker_size): worker_names.append(f"Worker-{str(i)}") worker = DlWorker(i, worker_names[i], self.work_queue, self.headers) worker.start() self.worker_group.append(worker) Logger.log([ f"[{__class__.__name__}]", self.grouping_workers, f'Grouped [{len(self.worker_group)}] workers. ' ])
def run(self): """ The worker starts downloading.\n :return: None """ while not exitFlag: if not self.queue.empty(): queue_lock.acquire() data = self.queue.get()[0] queue_lock.release() url = data["url"] Logger.log([f"[{self.name}] ", f'is getting: "{url}"']) data["content"] = get_this_url(data["url"], self.headers, text_mode=False)["content"] saver(data, text_mod=False) # === test === # data["content"] = b'\xff\xd8\xff' # saver(get(data["url"], self.headers), text_mod=False) sleep(0.1)
def saver(msg: dict, text_mod=True): """ A saver. \n :param msg: A dict. {"content": ..., "saving_path": ..., "url": ...} :param text_mod: "w" if set to True, "wb" if set to False :return: """ # print("----callback func --pid=%d" % os.getpid()) content, url = msg["content"], msg["url"] if content == TIMED_OUT: Logger.log([saver, f"\"{url}\" failed due to {TIMED_OUT}. "]) elif content == UNKNOWN_ERROR: Logger.log([saver, f"\"{url}\" failed due to {UNKNOWN_ERROR}. "]) else: content_format = pic_format(content[:8]) saving_path = f'{msg["saving_path"]}.{content_format}' content_name = '/'.join(saving_path.split("/")[-2:]) Logger.log([saver, f"Saving '{content_name}' to '{saving_path}'"]) make_dir('/'.join(saving_path.split('/')[:-1])) if text_mod: with open(saving_path, 'w', encoding='utf-8') as fp: fp.write(content) else: with open(saving_path, "wb") as fp: fp.write(content)
def get_this_url(_url: str, _headers: dict, text_mode=True): """ :param _url: Target url :param _headers: Request headers :param text_mode: Return r.txt if set to True. Return r.content if set to False :return: {"content": error_msg if error occurs else r.text or r.content, "url": ...} """ res = {"content": UNKNOWN_ERROR, "url": _url} try: r = requests_get(url=_url, headers=_headers) r.raise_for_status() # Logger.log([get_this_url, f'status_code: [{r.status_code}] | url: {_url}']) if text_mode: r.encoding = r.apparent_encoding res["content"] = r.text else: res["content"] = r.content except TimeoutError: Logger.log([get_this_url, f'{TimeoutError}: {url}']) res["content"] = TIMED_OUT finally: return res
def run(self): """ Grouping workers and starting downloading. \n :return: None """ self.fill_up_self_work_queue() self.grouping_workers() Logger.log( [f'[{__class__.__name__}]', self.run, "=== start downloading ==="]) while not self.work_queue.empty(): sleep(0.1) pass sleep(1) global exitFlag exitFlag = True Logger.log( [f'[{__class__.__name__}]', self.run, "=== end downloading ==="]) for worker in self.worker_group: worker.join() Logger.log([f'[{__class__.__name__}]', self.run, "Workers's joined. "]) """ For The Next Run. """ exitFlag = False