def deepCrawl(crawled) : tmp = [] for each in crawled : crawl = Crawl(each['url']) crawl.filter() tmp.extend(crawl.get()) return tmp
def Run(): start_urls = Ctrip.StartURL() my_crawl = Crawl(start_urls) try: my_crawl.Run() price_pannel_list = my_crawl.price_pannel_list for price_pannel in price_pannel_list: SplunkLog.Save(price_pannel) finally: my_crawl.Finish()
def appCrawl(url): crawl = Crawl(url) crawl.filter() crawled = [] crawled.extend(crawl.get()) crawled.extend(SubCrawl.deepCrawl(crawled)) crawled = SubCrawl.deleteOverlap(crawled) crawled = SubCrawl.replaceAttack(crawled) crawled = SubCrawl.getAttackable(crawled) return crawled
def crawl(self): self.url = input(">>> Enter url of website: ") if not self.url: self.url = "http://testphp.vulnweb.com" if not self.url.startswith("http"): self.url = "http://" + self.url host = self.url.replace("http://", "").replace("https://", "").split("/")[0] current_path = os.path.dirname(os.path.realpath(__file__)) self.output_dir = os.path.join(current_path, "output/" + host) if os.path.exists(self.output_dir): chose = input("Scan results are available at output/{}, continue? (Y/N | Default = Y)".format(host)) if chose.upper() == "N": print("Stopping.....") exit() else: shutil.rmtree(self.output_dir) os.makedirs(self.output_dir) crawl = Crawl(self.url) return crawl
sDB_User = _cf["StockAnalysis"]["DB_User"] sDB_Pwd = _cf["StockAnalysis"]["DB_Pwd"] sDB_Name = _cf["StockAnalysis"]["DB_Name"] #endregion log = Log.hyLog() #第一種函數宣告, 要用 instance (或是呼叫函數時第一個引數帶入物件) start_date = datetime.date(2019, 11, 1) #.strftime("%Y%m%d") end_date = datetime.date.today() #.strftime("%Y%m%d") day = datetime.timedelta(days=1) #獲取昨天的日期 log.writeLog(apname=_APName, text="要處理的時間 ({} ~ {})".format(start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d"))) # initial crawl object craw = Crawl(sDownloadFilePath) db = DB(_APName, sDB_Host, sDB_User, sDB_Pwd, sDB_Name) sleep_sec = 5 lastprocmonth = 0 while start_date <= end_date: try: # 星期六,日不處理 if start_date.weekday() == 5 or start_date.weekday() == 6: start_date = start_date + day continue #暫時的 code procemonthdata = False if start_date.month != lastprocmonth: procemonthdata = True lastprocmonth = start_date.month