def get_urls(cls): while len(Test.urls) > 0: url = Test.get_url() try: Test.count += 1 print(Test.count, url) analysis = PageParser(url) test = analysis.get_urls() Test.urls += test except: pass
def get_urls(cls): while len(Test.urls) > 0: url = Test.get_url() try: Test.count += 1 print(Test.count,url) analysis = PageParser(url) test = analysis.get_urls() Test.urls += test except: pass
def get_urls(url): global urls, counter try: #可选,进行抓取的url写入一个文件中,但会增加I/O操作 # with open('url_list.txt','a') as test: # test.write(url + '\n') data.delete(url) print(url) analysis = PageParser(url) for i in analysis.get_urls(): if data.check(i): data.delete(i) else: data.insert(i) except: pass