class UngoliantTest(unittest.TestCase): def setUp(self): self.siteMock = mock(SiteConfiguration) self.filterMock = mock(UrlFilter) self.fetcherMock = mock(UrlFetcher) self.pageMock = mock(PageExtractor) self.regularCrawlerMock = mock(RegularCrawler) self.complexConfigMock = mock(ComplexCrawler) self.basicConfigMock = mock(UrlBasicConfiguration) self.complexConfigMock = mock(UrlComplexConfiguration) self.sut = Ungoliant(site_config=self.siteMock, fetcher=self.fetcherMock, extractor=self.pageMock, url_filter=self.filterMock, crawler=self.regularCrawlerMock) self.sut.set_url_config(self.basicConfigMock) self.sut.set_max_crawl(1) def test_verQueSpiderInicializoCorrectamente(self): assert (self.siteMock.called) def test_crawlUnSiteConUnaConfiguracionBasicaFunciona(self): self.sut.crawl() self.regularCrawlerMock.assert_called_with(self.sut)
class UngoliantTest(unittest.TestCase): def setUp(self): self.siteMock = mock(SiteConfiguration) self.filterMock = mock(UrlFilter) self.fetcherMock = mock(UrlFetcher) self.pageMock = mock(PageExtractor) self.regularCrawlerMock = mock(RegularCrawler) self.complexConfigMock = mock(ComplexCrawler) self.basicConfigMock = mock(UrlBasicConfiguration) self.complexConfigMock = mock(UrlComplexConfiguration) self.sut = Ungoliant(site_config=self.siteMock, fetcher=self.fetcherMock, extractor=self.pageMock, url_filter=self.filterMock, crawler=self.regularCrawlerMock) self.sut.set_url_config(self.basicConfigMock) self.sut.set_max_crawl(1) def test_verQueSpiderInicializoCorrectamente(self): assert(self.siteMock.called) def test_crawlUnSiteConUnaConfiguracionBasicaFunciona(self): self.sut.crawl() self.regularCrawlerMock.assert_called_with(self.sut)
''' Created on Jan 3, 2016 @author: igzo ''' from src.main.Sites import sites from src.model.Ungoliant import Ungoliant from src.model.fetcher.JSFetcher import JSFetcher from src.model.crawler.ComplexCrawler import ComplexCrawler if __name__ == '__main__': url_sites = sites.keys() site = sites[url_sites[1]] spider = Ungoliant(site_config=site) spider.set_max_crawl(150) spider.set_crawler(ComplexCrawler()) spider.set_fetcher(JSFetcher()) crawled = spider.crawl() print len(crawled) print crawled
@author: igzo ''' import logging from src.model.Ungoliant import Ungoliant from src.main.Sites import sites from src.model.fetcher.JSFetcher import JSFetcher if __name__ == '__main__': #ver si puedo crear un modulo de logger y verificar mas paginas con js logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('ungoliant') url_sites = sites.keys() site = sites[url_sites[4]] #site = sites['cityboxoffice.com'] #site = sites['engageinteractive.co.uk'] spider = Ungoliant(site_config=site) spider.set_max_crawl(150) spider.set_fetcher(JSFetcher()) crawled = spider.crawl() print len(crawled) print crawled