Пример #1
0
def deepCrawl(crawled) :
    tmp = []
    for each in crawled :
        crawl = Crawl(each['url'])
        crawl.filter()
        tmp.extend(crawl.get())
    return tmp
Пример #2
0
def Run():
    start_urls = Ctrip.StartURL()
    my_crawl = Crawl(start_urls)
    try:
        my_crawl.Run()
        price_pannel_list = my_crawl.price_pannel_list
        for price_pannel in price_pannel_list:
            SplunkLog.Save(price_pannel)
    finally:
        my_crawl.Finish()
Пример #3
0
def appCrawl(url):
    crawl = Crawl(url)
    crawl.filter()
    crawled = []
    crawled.extend(crawl.get())
    crawled.extend(SubCrawl.deepCrawl(crawled))
    crawled = SubCrawl.deleteOverlap(crawled)
    crawled = SubCrawl.replaceAttack(crawled)
    crawled = SubCrawl.getAttackable(crawled)
    return crawled
Пример #4
0
 def crawl(self):
     self.url = input(">>> Enter url of website: ")
     if not self.url:
         self.url = "http://testphp.vulnweb.com"
     if not self.url.startswith("http"):
         self.url = "http://" + self.url
     host = self.url.replace("http://", "").replace("https://", "").split("/")[0]
     current_path = os.path.dirname(os.path.realpath(__file__))
     self.output_dir = os.path.join(current_path, "output/" + host)
     if os.path.exists(self.output_dir):
         chose = input("Scan results are available at output/{}, continue? (Y/N | Default = Y)".format(host))
         if chose.upper() == "N":
             print("Stopping.....")
             exit()
         else:
             shutil.rmtree(self.output_dir)
     os.makedirs(self.output_dir)
     crawl = Crawl(self.url)
     return crawl
Пример #5
0
    sDB_User = _cf["StockAnalysis"]["DB_User"]
    sDB_Pwd = _cf["StockAnalysis"]["DB_Pwd"]
    sDB_Name = _cf["StockAnalysis"]["DB_Name"]
    #endregion

    log = Log.hyLog()  #第一種函數宣告, 要用 instance (或是呼叫函數時第一個引數帶入物件)

    start_date = datetime.date(2019, 11, 1)  #.strftime("%Y%m%d")
    end_date = datetime.date.today()  #.strftime("%Y%m%d")
    day = datetime.timedelta(days=1)  #獲取昨天的日期
    log.writeLog(apname=_APName,
                 text="要處理的時間 ({} ~ {})".format(start_date.strftime("%Y%m%d"),
                                                end_date.strftime("%Y%m%d")))

    # initial crawl object
    craw = Crawl(sDownloadFilePath)
    db = DB(_APName, sDB_Host, sDB_User, sDB_Pwd, sDB_Name)
    sleep_sec = 5
    lastprocmonth = 0
    while start_date <= end_date:
        try:
            # 星期六,日不處理
            if start_date.weekday() == 5 or start_date.weekday() == 6:
                start_date = start_date + day
                continue
            #暫時的 code
            procemonthdata = False
            if start_date.month != lastprocmonth:
                procemonthdata = True
                lastprocmonth = start_date.month