Пример #1
0
    def setUp(self):
        self.siteMock = mock(SiteConfiguration)

        self.filterMock = mock(UrlFilter)
        self.fetcherMock = mock(UrlFetcher)
        self.pageMock = mock(PageExtractor)
        self.regularCrawlerMock = mock(RegularCrawler)
        self.complexConfigMock = mock(ComplexCrawler)

        self.basicConfigMock = mock(UrlBasicConfiguration)
        self.complexConfigMock = mock(UrlComplexConfiguration)

        self.sut = Ungoliant(site_config=self.siteMock,
                             fetcher=self.fetcherMock,
                             extractor=self.pageMock,
                             url_filter=self.filterMock,
                             crawler=self.regularCrawlerMock)
        self.sut.set_url_config(self.basicConfigMock)
        self.sut.set_max_crawl(1)
Пример #2
0
class UngoliantTest(unittest.TestCase):
    def setUp(self):
        self.siteMock = mock(SiteConfiguration)

        self.filterMock = mock(UrlFilter)
        self.fetcherMock = mock(UrlFetcher)
        self.pageMock = mock(PageExtractor)
        self.regularCrawlerMock = mock(RegularCrawler)
        self.complexConfigMock = mock(ComplexCrawler)

        self.basicConfigMock = mock(UrlBasicConfiguration)
        self.complexConfigMock = mock(UrlComplexConfiguration)

        self.sut = Ungoliant(site_config=self.siteMock,
                             fetcher=self.fetcherMock,
                             extractor=self.pageMock,
                             url_filter=self.filterMock,
                             crawler=self.regularCrawlerMock)
        self.sut.set_url_config(self.basicConfigMock)
        self.sut.set_max_crawl(1)

    def test_verQueSpiderInicializoCorrectamente(self):
        assert (self.siteMock.called)

    def test_crawlUnSiteConUnaConfiguracionBasicaFunciona(self):
        self.sut.crawl()
        self.regularCrawlerMock.assert_called_with(self.sut)
Пример #3
0
class UngoliantTest(unittest.TestCase):

    def setUp(self):
        self.siteMock = mock(SiteConfiguration)
        
        self.filterMock = mock(UrlFilter)
        self.fetcherMock = mock(UrlFetcher)
        self.pageMock = mock(PageExtractor)
        self.regularCrawlerMock = mock(RegularCrawler)
        self.complexConfigMock = mock(ComplexCrawler)
        
        self.basicConfigMock = mock(UrlBasicConfiguration)
        self.complexConfigMock = mock(UrlComplexConfiguration)
        
        self.sut = Ungoliant(site_config=self.siteMock, fetcher=self.fetcherMock, extractor=self.pageMock, url_filter=self.filterMock, crawler=self.regularCrawlerMock)
        self.sut.set_url_config(self.basicConfigMock)
        self.sut.set_max_crawl(1)
        
    def test_verQueSpiderInicializoCorrectamente(self):
        assert(self.siteMock.called)
    
        
    def test_crawlUnSiteConUnaConfiguracionBasicaFunciona(self):
        self.sut.crawl()
        self.regularCrawlerMock.assert_called_with(self.sut)
Пример #4
0
 def setUp(self):
     self.siteMock = mock(SiteConfiguration)
     
     self.filterMock = mock(UrlFilter)
     self.fetcherMock = mock(UrlFetcher)
     self.pageMock = mock(PageExtractor)
     self.regularCrawlerMock = mock(RegularCrawler)
     self.complexConfigMock = mock(ComplexCrawler)
     
     self.basicConfigMock = mock(UrlBasicConfiguration)
     self.complexConfigMock = mock(UrlComplexConfiguration)
     
     self.sut = Ungoliant(site_config=self.siteMock, fetcher=self.fetcherMock, extractor=self.pageMock, url_filter=self.filterMock, crawler=self.regularCrawlerMock)
     self.sut.set_url_config(self.basicConfigMock)
     self.sut.set_max_crawl(1)
Пример #5
0
'''
Created on Jan 3, 2016

@author: igzo
'''
from src.main.Sites import sites
from src.model.Ungoliant import Ungoliant
from src.model.fetcher.JSFetcher import JSFetcher
from src.model.crawler.ComplexCrawler import ComplexCrawler




if __name__ == '__main__':

    url_sites = sites.keys()
    site = sites[url_sites[1]]
    
    spider = Ungoliant(site_config=site)
    
    spider.set_max_crawl(150)
    spider.set_crawler(ComplexCrawler())
    spider.set_fetcher(JSFetcher())
    
    crawled = spider.crawl()
    print len(crawled)
    print crawled
Пример #6
0
@author: igzo
'''

import logging
from src.model.Ungoliant import Ungoliant
from src.main.Sites import sites
from src.model.fetcher.JSFetcher import JSFetcher

if __name__ == '__main__':

    #ver si puedo crear un modulo de logger y verificar mas paginas con js

    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logger = logging.getLogger('ungoliant')

    url_sites = sites.keys()
    site = sites[url_sites[4]]

    #site = sites['cityboxoffice.com']
    #site = sites['engageinteractive.co.uk']

    spider = Ungoliant(site_config=site)
    spider.set_max_crawl(150)
    spider.set_fetcher(JSFetcher())

    crawled = spider.crawl()

    print len(crawled)
    print crawled