Exemplo n.º 1
0
def make_dir(dir_path: str) -> None:
    """
    make the directories recursively. \n
    :param dir_path: The path to that directory.
    :return: True on success. | False on failure.
    """
    try:
        os_makedirs(dir_path, exist_ok=True)
    except FileExistsError:
        Logger.log([make_dir, f"{dir_path} exists. "])
Exemplo n.º 2
0
 def set_max_worker_size(self, size):
     """
     Set the max_worker_size. \n
     :param size: int
     :return: None
     """
     self.MAX_WORKER_SIZE = size
     Logger.log([
         f"[{__class__.__name__}]", self.set_max_worker_size,
         f'set max_worker_size to: [{self.MAX_WORKER_SIZE}]'
     ])
Exemplo n.º 3
0
 def adjust_worker_size(self):
     """
     Adjust worker size by the length of self.url_list. max is 10. \n
     :return: None
     """
     worker_size = len(self.url_list) // self.work_per_worker
     self.worker_size = worker_size + 1 if worker_size % self.work_per_worker else worker_size
     self.worker_size = self.worker_size if self.worker_size < self.MAX_WORKER_SIZE else self.MAX_WORKER_SIZE
     Logger.log([
         f"[{__class__.__name__}]", self.adjust_worker_size,
         f"adjusted worker size to: [{self.worker_size}]"
     ])
Exemplo n.º 4
0
 def fill_up_self_work_queue(self):
     """
     Fill up self.work_queue which's consists of tasks.
     :return: None
     """
     Logger.log([
         f"[{__class__.__name__}]", self.fill_up_self_work_queue,
         'Start filling up work queue...'
     ])
     queue_lock.acquire()
     [self.work_queue.put((url_dict, )) for url_dict in self.url_list]
     queue_lock.release()
     Logger.log([
         f"[{__class__.__name__}]", self.fill_up_self_work_queue, 'Done. '
     ])
Exemplo n.º 5
0
def rest(base=None, gain=1.0, gap=None) -> None:
    """
    snap a little bit\n
    [1.3-3.6]s + base\n
    :param base: base time line.
    :param gain: The amplifier.
    :param gap: Iterable obj but not dict, snap + random  in the gap
    """
    snap = [random_randint(1, 3), random_randint(1, 3), random_randint(1, 3)]
    ndigit = [random_randint(1, 3), random_randint(1, 3), random_randint(1, 3)]
    random_shuffle(ndigit)
    choice = random_choice([0, 1, 2, 3, 4, 5])
    if choice == 0:
        snap = (snap[0] / (ndigit[ndigit[0] - 1]) + snap[1] /
                (10**ndigit[ndigit[1] - 1]) + snap[2] /
                (10**ndigit[ndigit[2] - 1]))
    elif choice == 1:
        snap = (snap[0] / (ndigit[ndigit[0] - 1]) + snap[2] /
                (10**ndigit[ndigit[1] - 1]) + snap[1] /
                (10**ndigit[ndigit[2] - 1]))
    elif choice == 2:
        snap = (snap[1] / (ndigit[ndigit[0] - 1]) + snap[0] /
                (10**ndigit[ndigit[1] - 1]) + snap[2] /
                (10**ndigit[ndigit[2] - 1]))
    elif choice == 3:
        snap = (snap[1] / (ndigit[ndigit[0] - 1]) + snap[2] /
                (10**ndigit[ndigit[1] - 1]) + snap[0] /
                (10**ndigit[ndigit[2] - 1]))
    elif choice == 4:
        snap = (snap[2] / (ndigit[ndigit[0] - 1]) + snap[0] /
                (10**ndigit[ndigit[1] - 1]) + snap[1] /
                (10**ndigit[ndigit[2] - 1]))
    elif choice == 5:
        snap = (snap[0] / (ndigit[ndigit[0] - 1]) + snap[1] /
                (10**ndigit[ndigit[1] - 1]) + snap[0] /
                (10**ndigit[ndigit[2] - 1]))
    snap = round(snap, ndigit[choice % 3])
    if snap < 1.5:
        snap += 1
    if base:
        snap += int(base)
    if gain != 1:
        snap *= gain
    if gap is not None:
        snap += random_choice(gap)
    Logger.log([rest, f"Now resting for [{snap}]s... "])
    time_sleep(int(snap))
    Logger.log([rest, "Done. "])
Exemplo n.º 6
0
 def grouping_workers(self):
     worker_names = []
     Logger.log([
         f"[{__class__.__name__}]", self.grouping_workers,
         'Start grouping up workers...'
     ])
     for i in range(self.worker_size):
         worker_names.append(f"Worker-{str(i)}")
         worker = DlWorker(i, worker_names[i], self.work_queue,
                           self.headers)
         worker.start()
         self.worker_group.append(worker)
     Logger.log([
         f"[{__class__.__name__}]", self.grouping_workers,
         f'Grouped [{len(self.worker_group)}] workers. '
     ])
Exemplo n.º 7
0
 def run(self):
     """
     The worker starts downloading.\n
     :return: None
     """
     while not exitFlag:
         if not self.queue.empty():
             queue_lock.acquire()
             data = self.queue.get()[0]
             queue_lock.release()
             url = data["url"]
             Logger.log([f"[{self.name}] ", f'is getting: "{url}"'])
             data["content"] = get_this_url(data["url"],
                                            self.headers,
                                            text_mode=False)["content"]
             saver(data, text_mod=False)
             # === test ===
             # data["content"] = b'\xff\xd8\xff'
             # saver(get(data["url"], self.headers), text_mod=False)
         sleep(0.1)
Exemplo n.º 8
0
def saver(msg: dict, text_mod=True):
    """
    A saver. \n
    :param msg: A dict. {"content": ..., "saving_path": ..., "url": ...}
    :param text_mod: "w" if set to True, "wb" if set to False
    :return:
    """
    # print("----callback func --pid=%d" % os.getpid())
    content, url = msg["content"], msg["url"]
    if content == TIMED_OUT:
        Logger.log([saver, f"\"{url}\" failed due to {TIMED_OUT}. "])
    elif content == UNKNOWN_ERROR:
        Logger.log([saver, f"\"{url}\" failed due to {UNKNOWN_ERROR}. "])
    else:
        content_format = pic_format(content[:8])
        saving_path = f'{msg["saving_path"]}.{content_format}'
        content_name = '/'.join(saving_path.split("/")[-2:])
        Logger.log([saver, f"Saving '{content_name}' to '{saving_path}'"])
        make_dir('/'.join(saving_path.split('/')[:-1]))
        if text_mod:
            with open(saving_path, 'w', encoding='utf-8') as fp:
                fp.write(content)
        else:
            with open(saving_path, "wb") as fp:
                fp.write(content)
Exemplo n.º 9
0
def get_this_url(_url: str, _headers: dict, text_mode=True):
    """
    :param _url: Target url
    :param _headers: Request headers
    :param text_mode: Return r.txt if set to True. Return r.content if set to False
    :return: {"content": error_msg if error occurs else r.text or r.content, "url": ...}
    """
    res = {"content": UNKNOWN_ERROR, "url": _url}
    try:
        r = requests_get(url=_url, headers=_headers)
        r.raise_for_status()
        # Logger.log([get_this_url, f'status_code: [{r.status_code}] | url: {_url}'])
        if text_mode:
            r.encoding = r.apparent_encoding
            res["content"] = r.text
        else:
            res["content"] = r.content
    except TimeoutError:
        Logger.log([get_this_url, f'{TimeoutError}: {url}'])
        res["content"] = TIMED_OUT
    finally:
        return res
Exemplo n.º 10
0
 def run(self):
     """
     Grouping workers and starting downloading. \n
     :return: None
     """
     self.fill_up_self_work_queue()
     self.grouping_workers()
     Logger.log(
         [f'[{__class__.__name__}]', self.run, "=== start downloading ==="])
     while not self.work_queue.empty():
         sleep(0.1)
         pass
     sleep(1)
     global exitFlag
     exitFlag = True
     Logger.log(
         [f'[{__class__.__name__}]', self.run, "=== end downloading ==="])
     for worker in self.worker_group:
         worker.join()
     Logger.log([f'[{__class__.__name__}]', self.run, "Workers's joined. "])
     """
     For The Next Run. 
     """
     exitFlag = False