Пример #1
0
    def test_loadConfiguration(self):
        print ">>> test_loadConfiguration"
        app = LinkCrawler()
        configIsLoaded = app.loadConfigurationSite("unittest")

        print "<<< test_loadConfiguration [configIsLoaded: %s]\n" % configIsLoaded
        self.failUnless(configIsLoaded)
Пример #2
0
    def test_loadConfiguration(self):
        print ">>> test_loadConfiguration"
        app = LinkCrawler()
        configIsLoaded = app.loadConfigurationSite("unittest")

        print "<<< test_loadConfiguration [configIsLoaded: %s]\n" % configIsLoaded
        self.failUnless(configIsLoaded)
Пример #3
0
    def _process_html(self, path, url, task, site):

        url_mapping = {}

        def _process_link(l):
            if not self._accept_download(l):
                return l

            if not url_mapping.has_key(l):
                if l.count(":"):  #mailto:, javascript:, http:
                    url_mapping[l] = l
                else:
                    url_mapping[l] = utils.absolute_path(path, l)

            return url_mapping[l]

        crawler = LinkCrawler()
        crawler.crawling(site.real_path(path), url, _process_link)

        for link, path in url_mapping.iteritems():
            if link.count(':') > 0: continue
            link = utils.absolute_url(url, link)

            self.logger.info("add spider:%s==>%s" % (link, path))
            task.add_action("%s==>%s" % (link, path))
Пример #4
0
    def test_getInHMS(self):
        print ">>> test_getInHMS"
        app = LinkCrawler()
        app.LOG_LEVEL = 10
        timeInHMS = Iprods_Date_Utils.getInHMS(3600)

        print "<<< test_getInHMS [seconds: 3600, timeInHMS: %s]\n" % (timeInHMS)
        self.failUnless(timeInHMS)
Пример #5
0
    def test_getInHMS(self):
        print ">>> test_getInHMS"
        app = LinkCrawler()
        app.LOG_LEVEL = 10
        timeInHMS = Iprods_Date_Utils.getInHMS(3600)

        print "<<< test_getInHMS [seconds: 3600, timeInHMS: %s]\n" % (
            timeInHMS)
        self.failUnless(timeInHMS)
Пример #6
0
    def test_smtpsend(self):
        print ">>> test_smtpsend"
        app = LinkCrawler()
        configIsLoaded = app.loadConfigurationSite("unittest")

        if configIsLoaded is False:
            self.fail("load configuration failed")
        else:
            sendResult = app.smtpsend("*****@*****.**", "*****@*****.**", "", "", "Webreport Unittest test_smtpsend", "Lorem ipsum")

        print "<<< test_smtpsend [sendResult: %s]\n" % sendResult
        self.failIfEqual(sendResult, None)
Пример #7
0
    def test_getUrlStatus(self):
        print ">>> test_getUrlStatus"
        app     = LinkCrawler()
        checker = Checker(app)
        configIsLoaded = app.loadConfigurationSite("unittest")

        if configIsLoaded is False:
            self.fail("load configuration failed")
        else:
            statusCode = checker.getStatusCode("http://www.google.de")

        print "<<< test_getUrlStatus [statusCode: %s]\n" % statusCode
        self.failUnless(statusCode)
Пример #8
0
    def test_getUrlStatus(self):
        print ">>> test_getUrlStatus"
        app = LinkCrawler()
        checker = Checker(app)
        configIsLoaded = app.loadConfigurationSite("unittest")

        if configIsLoaded is False:
            self.fail("load configuration failed")
        else:
            statusCode = checker.getStatusCode("http://www.google.de")

        print "<<< test_getUrlStatus [statusCode: %s]\n" % statusCode
        self.failUnless(statusCode)
Пример #9
0
    def test_extractLinks(self):
        print ">>> test_extractLinks"
        app    = LinkCrawler()
        reader = Reader(app)
        configIsLoaded = app.loadConfigurationSite("unittest")

        if configIsLoaded is False:
            self.fail("load configuration failed")
        else:
            response     = reader.getResponse('http://www.scandio.de')
            responseData = response[3]
            links = reader.extractLinks(responseData, 'http://www.scandio.de')

        print "<<< test_extractLinks [links: %s]\n" % links
        self.failUnless(links)
Пример #10
0
    def test_smtpsend(self):
        print ">>> test_smtpsend"
        app = LinkCrawler()
        configIsLoaded = app.loadConfigurationSite("unittest")

        if configIsLoaded is False:
            self.fail("load configuration failed")
        else:
            sendResult = app.smtpsend("*****@*****.**",
                                      "*****@*****.**", "", "",
                                      "Webreport Unittest test_smtpsend",
                                      "Lorem ipsum")

        print "<<< test_smtpsend [sendResult: %s]\n" % sendResult
        self.failIfEqual(sendResult, None)
Пример #11
0
    def test_extractLinks(self):
        print ">>> test_extractLinks"
        app = LinkCrawler()
        reader = Reader(app)
        configIsLoaded = app.loadConfigurationSite("unittest")

        if configIsLoaded is False:
            self.fail("load configuration failed")
        else:
            response = reader.getResponse('http://www.scandio.de')
            responseData = response[3]
            links = reader.extractLinks(responseData, 'http://www.scandio.de')

        print "<<< test_extractLinks [links: %s]\n" % links
        self.failUnless(links)
Пример #12
0
 def _process_html(self, path, url, task, site):
     
     url_mapping = {}
     def _process_link(l):
         if not self._accept_download(l):
             return l
         
         if not url_mapping.has_key(l):
             if l.count(":"): #mailto:, javascript:, http:
                 url_mapping[l] = l
             else:
                 url_mapping[l] = utils.absolute_path(path, l)
         
         return url_mapping[l]
     
     crawler = LinkCrawler()
     crawler.crawling(site.real_path(path), url, _process_link)
     
     for link, path in url_mapping.iteritems():
         if link.count(':') > 0: continue
         link = utils.absolute_url(url, link)
                     
         self.logger.info("add spider:%s==>%s" % (link, path))            
         task.add_action("%s==>%s" % (link, path))
Пример #13
0
    def test_write2file(self):
        print ">>> test_write2file"
        writeResult = True
        file = ""

        try:
            app = LinkCrawler()
            file = "%s%s.txt" % (
                app.REPORT_PATH,
                Iprods_File_Utils.getValidFileName("unittest"))
            Iprods_File_Utils.write2file(file,
                                         "Webreport Unittest test_write2file")
        except:
            writeResult = False
            exctype, value = sys.exc_info()[:2]
            print("exctype: %s, value: %s" % (exctype, value))

        print "<<< test_write2file [file: %s]\n" % (file)
        self.failUnless(writeResult)
Пример #14
0
    def test_completeRelativePath(self):
        print ">>> test_completeRelativePath\n"
        app = LinkCrawler()
        reader = Reader(app)

        array = [['/en/de/index.html', '/aa/bb/index.htm'],
                 ['/en/de/index.html', 'cc/dd/index.htm'],
                 ['/en/de/index.html', '../cc/dd/index.htm'],
                 ['/en/', 'search_iframe_en.htm'],
                 ['/en/', '/search_iframe_en.htm'],
                 ['/en/', '../search_iframe_en.htm']]

        for i in range(0, len(array)):
            relPath = reader.completeRelativePath(array[i][0], array[i][1])
            print "test_completeRelativePath [path: %s, parent: %s, relPath: %s]\n" % (
                array[i][0], array[i][1], relPath)
            self.failUnless(relPath)

        print "<<< test_completeRelativePath \n"