示例#1
0
 def test_sourceUnique(self):
     url = 'www.google.com'
     list1 = ['google.com', 'http://google.com']
     list2 = ['www.google.com', 'something.net']
     
     self.assertTrue(UrlComparator.isSourceUnique(url, list1))
     self.assertFalse(UrlComparator.isSourceUnique(url, list2))
示例#2
0
    def test_sourceUnique(self):
        url = 'www.google.com'
        list1 = ['google.com', 'http://google.com']
        list2 = ['www.google.com', 'something.net']

        self.assertTrue(UrlComparator.isSourceUnique(url, list1))
        self.assertFalse(UrlComparator.isSourceUnique(url, list2))
示例#3
0
 def test_normunique(self):
     url = 'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
     # same url
     list1 = ['http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations']
     
     # norm same url
     list2 = ['http://en.wikipedia.org/wiki/Unit_testing#Language-']
     
     # different url
     list3 = ['wikipedia.org']
     
     self.assertFalse(UrlComparator.isNormalizeUnique(url, list1))
     self.assertFalse(UrlComparator.isNormalizeUnique(url, list2))
     self.assertTrue(UrlComparator.isNormalizeUnique(url, list3))
示例#4
0
    def test_normunique(self):
        url = 'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
        # same url
        list1 = [
            'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
        ]

        # norm same url
        list2 = ['http://en.wikipedia.org/wiki/Unit_testing#Language-']

        # different url
        list3 = ['wikipedia.org']

        self.assertFalse(UrlComparator.isNormalizeUnique(url, list1))
        self.assertFalse(UrlComparator.isNormalizeUnique(url, list2))
        self.assertTrue(UrlComparator.isNormalizeUnique(url, list3))
示例#5
0
 def test_wikiExample(self):
     urlA = 'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
     urlB = 'http://en.wikipedia.org/wiki/Unit_testing#Language-'
     
     expected = 0
     res = UrlComparator.compareNormalizeUrl(urlA, urlB)
     
     self.assertEqual(expected, res, 'expected: ' + str(expected) +\
                                      ', actual: ' + str(res))
示例#6
0
    def test_wikiExample(self):
        urlA = 'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
        urlB = 'http://en.wikipedia.org/wiki/Unit_testing#Language-'

        expected = 0
        res = UrlComparator.compareNormalizeUrl(urlA, urlB)

        self.assertEqual(expected, res, 'expected: ' + str(expected) +\
                                         ', actual: ' + str(res))
示例#7
0
 # filter out empty strings
 urls = filter(lambda s: s.strip(), urls)
 
 # process each url 
 for url in urls:
     # url valid
     uv = UrlValidator()
     isValid = uv.validate(url)
             
     # remove url in urls
     wo_url_in_urls = urls[:]
     wo_url_in_urls.remove(url)
     
     # initialize param
     normURL = None
     isSrcUnique = UrlComparator.isSourceUnique(url, wo_url_in_urls)
     isNormUnique = None
     
     if isValid:
         uc = UrlCanonicalizer()
         normURL = uc.canonicalizerValidator(uv)
         isNormUnique = UrlComparator.isNormalizeUnique(url, wo_url_in_urls, False)
         
     print 'Source: ' + url
     print 'Valid: ' + str(isValid)
     print 'Canonical: ' + ('None' if normURL == None else normURL)
     print 'Source unique: ' + str(isSrcUnique)
     print 'Canonicalized URL unique: ' + ('N/A' if isNormUnique == None else str(isNormUnique))
     
     print ''
 
示例#8
0
 def test_normalizedWWWDotDifferentUrl(self):
     urlA = 'www.google.com'
     urlB = 'nba.com'
     
     self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) < 0)
示例#9
0
 def test_sourcecomparison(self):
     urlA = 'www.google.com'
     urlB = 'nba.com'
     self.assertTrue(UrlComparator.compareSourceUrl(urlA, urlB) > 0)
示例#10
0
    def test_caseSensitiveCases(self):
        urlA = 'www.google.com/Images'
        urlB = 'www.google.com/images'

        self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) < 0)
示例#11
0
    def test_normalizedEqualDifferentQueryUrl(self):
        urlA = 'www.google.com/?q=cse403;id=1'
        urlB = 'www.google.com/?id=1&q=cse403'

        self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) == 0)
示例#12
0
    def test_normalizedWWWDotDifferentUrl(self):
        urlA = 'www.google.com'
        urlB = 'nba.com'

        self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) < 0)
示例#13
0
    def test_normalGreaterLesser(self):
        urlA = 'www.google.com'
        urlB = 'www.nba.com'

        self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) < 0)
        self.assertTrue(UrlComparator.compareNormalizeUrl(urlB, urlA) > 0)
示例#14
0
 def test_normalGreaterLesser(self):
     urlA = 'www.google.com'
     urlB = 'www.nba.com'
     
     self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) < 0)
     self.assertTrue(UrlComparator.compareNormalizeUrl(urlB, urlA) > 0)
示例#15
0
 def test_normalizedEqualDifferentQueryUrl(self):
     urlA = 'www.google.com/?q=cse403;id=1'
     urlB = 'www.google.com/?id=1&q=cse403'
     
     self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) == 0)
示例#16
0
 def test_caseSensitiveCases(self):
     urlA = 'www.google.com/Images'
     urlB = 'www.google.com/images'
     
     self.assertTrue(UrlComparator.compareNormalizeUrl(urlA, urlB) < 0)
示例#17
0
 def test_sourcecomparison(self):
     urlA = 'www.google.com'
     urlB = 'nba.com'
     self.assertTrue(UrlComparator.compareSourceUrl(urlA, urlB) > 0)