Exemplo n.º 1
0
class Service_Sina_Auto(Service_Abstract):
    def __init__(self):
        super(Service_Sina_Auto, self).__init__()
        self.sinaBaseMapper = Model_Mapper_Sina_Base()

    def scrape(self, url):
        self.scraper = Model_Scraper_Sina_Auto()
        try:
            data = self.scraper.scrape(url)
            if (data):
                # 此处应该进行数据插入
                # print (data)
                for item in data:
                    self.sinaBaseMapper.save(item)
                if (len(data) > 0):
                    return True
            else:
                return False
        except Exception as err:
            print(err)
            return False
Exemplo n.º 2
0
    def __init__(self):
        self.downloadQueueMapper = Model_Mapper_Sina_Base()
        self.tencentDownloadQueueMapper = Model_Mapper_Tencent_Base()
        self.sinaBaseService = Service_Sina_Base()
        self.sinaAutoService = Service_Sina_Auto()
        self.sinaBlogService = Service_Sina_Blog()
        self.sinaEduService = Service_Sina_Edu()
        self.sinaTravelService = Service_Sina_Travel()
        self.sinaCityService = Service_Sina_City()
        self.sinaVideoService = Service_Sina_Video()

        self.tencentBaseService = Service_Tencent_Base()
        self.tencentSportsService = Service_Tencent_Sports()
        self.tencentAutoService = Service_Tencent_Auto()
        self.tencentGongyiService = Service_Tencent_Gongyi()
Exemplo n.º 3
0
 def __init__(self):
     super(Service_Sina_Travel, self).__init__()
     self.sinaBaseMapper = Model_Mapper_Sina_Base()
Exemplo n.º 4
0
class Service_Sina_Travel(Service_Abstract):

    def __init__(self):
        super(Service_Sina_Travel, self).__init__()
        self.sinaBaseMapper = Model_Mapper_Sina_Base()

    def scrape(self, url):
        self.scraper = Model_Scraper_Sina_Travel()
        try:
            data = self.scraper.scrape(url)
            if (data):
                # 此处应该进行数据插入
                # print len(data)
                for items in data:
                    # print len(items)
                    for item in items:
                        self.sinaBaseMapper.save(item)
                if (len(data) > 0):
                    return True
            else:
                return False
        except Exception as err:
            print (err)
            return False


    # def get_travelpage_by_phantomjs(self, key, url):
    #     try:
    #         downloader_perform = Downloader_Selenium("all")
    #         downloader_perform.set_page_load_timeout(60)
    #         downloader_perform.get_html(url)
    #     except:
    #         downloader_perform.quit()
    #         self.get_travelpage_by_phantomjs(key, url)
    #     time.sleep(2)
    #     # 抓取新浪旅游首页,需要进行下一步操作,进行翻页(OK)
    #     try:
    #         count = 0
    #         while count < 5:
    #             time.sleep(2)
    #             downloader_perform.scrollTo(0, 100000)
    #             time.sleep(2)
    #             downloader_perform.scrollTo(0, 100000)
    #             time.sleep(2)
    #             downloader_perform.scrollTo(0, 100000)
    #             time.sleep(5)
    #             html = downloader_perform.get_page_source()
    #             if (html != ""):
    #                 time.sleep(2)
    #                 process(key, html)
    #                 try:
    #                     pageCount = getCurrnetPage(html)
    #                 except:
    #                     print ("no page count")
    #                 print (pageCount)
    #                 time.sleep(2)
    #                 try:
    #                     downloader_perform.driver.find_element_by_xpath("//span[@class='pagebox_next']").click()
    #                 except:
    #                     print ("wro")
    #                 # time.sleep(5)
    #                 # try:
    #                 #     anotherpageCount = getCurrnetPage(html)
    #                 #     if (pageCount == anotherpageCount):
    #                 #         downloader_perform.driver.find_element_by_xpath("//span[@class='pagebox_next']").click()
    #                 # except:
    #                 #     print ("wrong")
    #                 time.sleep(5)
    #                 count += 1
    #         downloader_perform.quit()
    #     except Exception as err:
    #         print (err)
    #         downloader_perform.quit()
    #         self.get_travelpage_by_phantomjs(key, url)
    #     return False