def check_all_complete(): all_done = False while not all_done: slot = 0 for k, v in LINKS.items(): if v['status'] == 'checked': continue elif v['status'] == 'proccess': continue else: v['status'] = 'proccess' slot += 1 if slot == WOKERS: start() break start() all_done = True for _, v in LINKS.items(): if v['status'] == 'unchecked' or v['status'] == 'proccess': all_done = False break
def start(): batch_urls = [] for k, v in LINKS.items(): if v['status'] == 'proccess': batch_urls.append(k) if not batch_urls: return with concurrent.futures.ThreadPoolExecutor(max_workers=len(batch_urls)) as executor: future_to_url = {executor.submit( get_page if LINKS[url]['own'] else head_foreign_page, url): url for url in batch_urls} for future in concurrent.futures.as_completed(future_to_url): url = future_to_url[future] text, status_code = future.result() if LINKS[url]['own']: LINKS[url]['status'] = 'checked' LINKS[url]['result'] = str(status_code) links = get_links(text) analyse_and_add_links(links) else: LINKS[url]['status'] = 'checked' LINKS[url]['result'] = str(status_code)
continue else: v['status'] = 'proccess' slot += 1 if slot == WOKERS: start() break start() all_done = True for _, v in LINKS.items(): if v['status'] == 'unchecked' or v['status'] == 'proccess': all_done = False break if __name__ == '__main__': text, status_code = get_page(SITE) LINKS[SITE] = {'status': 'checked', 'own': True, 'result': str(status_code)} links = get_links(text) analyse_and_add_links(links) check_all_complete() for l, v in LINKS.items(): print(l, v) # for link in links: # if link[0] == '/': # own += 1 # if link[0] == 'h': # foreign += 1 # print(link) # print(f'Внутренних сылок: {own} Внешних ссылок: {foreign} Всего: {own + foreign}:{len(links)}')