def test_loadConfiguration(self): print ">>> test_loadConfiguration" app = LinkCrawler() configIsLoaded = app.loadConfigurationSite("unittest") print "<<< test_loadConfiguration [configIsLoaded: %s]\n" % configIsLoaded self.failUnless(configIsLoaded)
def _process_html(self, path, url, task, site): url_mapping = {} def _process_link(l): if not self._accept_download(l): return l if not url_mapping.has_key(l): if l.count(":"): #mailto:, javascript:, http: url_mapping[l] = l else: url_mapping[l] = utils.absolute_path(path, l) return url_mapping[l] crawler = LinkCrawler() crawler.crawling(site.real_path(path), url, _process_link) for link, path in url_mapping.iteritems(): if link.count(':') > 0: continue link = utils.absolute_url(url, link) self.logger.info("add spider:%s==>%s" % (link, path)) task.add_action("%s==>%s" % (link, path))
def test_getInHMS(self): print ">>> test_getInHMS" app = LinkCrawler() app.LOG_LEVEL = 10 timeInHMS = Iprods_Date_Utils.getInHMS(3600) print "<<< test_getInHMS [seconds: 3600, timeInHMS: %s]\n" % (timeInHMS) self.failUnless(timeInHMS)
def test_getInHMS(self): print ">>> test_getInHMS" app = LinkCrawler() app.LOG_LEVEL = 10 timeInHMS = Iprods_Date_Utils.getInHMS(3600) print "<<< test_getInHMS [seconds: 3600, timeInHMS: %s]\n" % ( timeInHMS) self.failUnless(timeInHMS)
def test_smtpsend(self): print ">>> test_smtpsend" app = LinkCrawler() configIsLoaded = app.loadConfigurationSite("unittest") if configIsLoaded is False: self.fail("load configuration failed") else: sendResult = app.smtpsend("*****@*****.**", "*****@*****.**", "", "", "Webreport Unittest test_smtpsend", "Lorem ipsum") print "<<< test_smtpsend [sendResult: %s]\n" % sendResult self.failIfEqual(sendResult, None)
def test_getUrlStatus(self): print ">>> test_getUrlStatus" app = LinkCrawler() checker = Checker(app) configIsLoaded = app.loadConfigurationSite("unittest") if configIsLoaded is False: self.fail("load configuration failed") else: statusCode = checker.getStatusCode("http://www.google.de") print "<<< test_getUrlStatus [statusCode: %s]\n" % statusCode self.failUnless(statusCode)
def test_extractLinks(self): print ">>> test_extractLinks" app = LinkCrawler() reader = Reader(app) configIsLoaded = app.loadConfigurationSite("unittest") if configIsLoaded is False: self.fail("load configuration failed") else: response = reader.getResponse('http://www.scandio.de') responseData = response[3] links = reader.extractLinks(responseData, 'http://www.scandio.de') print "<<< test_extractLinks [links: %s]\n" % links self.failUnless(links)
def test_write2file(self): print ">>> test_write2file" writeResult = True file = "" try: app = LinkCrawler() file = "%s%s.txt" % ( app.REPORT_PATH, Iprods_File_Utils.getValidFileName("unittest")) Iprods_File_Utils.write2file(file, "Webreport Unittest test_write2file") except: writeResult = False exctype, value = sys.exc_info()[:2] print("exctype: %s, value: %s" % (exctype, value)) print "<<< test_write2file [file: %s]\n" % (file) self.failUnless(writeResult)
def test_completeRelativePath(self): print ">>> test_completeRelativePath\n" app = LinkCrawler() reader = Reader(app) array = [['/en/de/index.html', '/aa/bb/index.htm'], ['/en/de/index.html', 'cc/dd/index.htm'], ['/en/de/index.html', '../cc/dd/index.htm'], ['/en/', 'search_iframe_en.htm'], ['/en/', '/search_iframe_en.htm'], ['/en/', '../search_iframe_en.htm']] for i in range(0, len(array)): relPath = reader.completeRelativePath(array[i][0], array[i][1]) print "test_completeRelativePath [path: %s, parent: %s, relPath: %s]\n" % ( array[i][0], array[i][1], relPath) self.failUnless(relPath) print "<<< test_completeRelativePath \n"