示例#1
0
 def test_all(self):
     self.assertTrue(
         is_valid_url(
             "http://google.com:80/blah/blah?this=test&blah&blah=#icky"))
     self.assertFalse(
         is_valid_url(
             "htt?p://google.z:800/bla()/?this is a = test#blah test"))
示例#2
0
 def test_with_port(self):
     self.assertTrue(is_valid_url("http://google.com:8000"))
     self.assertFalse(is_valid_url("http://google.com:"))
     self.assertFalse(is_valid_url("http://google.com:100000"))
示例#3
0
 def test_scheme_and_domain_is_valid_url(self):
     self.assertTrue(is_valid_url("http://google.com"))
     self.assertTrue(is_valid_url("http://www.google.com"))
     self.assertTrue(is_valid_url("https://google.com"))
示例#4
0
 def test_empty(self):
     self.assertFalse(is_valid_url(""))
示例#5
0
 def test_all(self):
   self.assertTrue(is_valid_url("http://google.com:80/blah/blah?this=test&blah&blah=#icky"))
   self.assertFalse(is_valid_url("htt?p://google.z:800/bla()/?this is a = test#blah test"))
示例#6
0
 def test_with_port(self):
   self.assertTrue(is_valid_url("http://google.com:8000"))
   self.assertFalse(is_valid_url("http://google.com:"))
   self.assertFalse(is_valid_url("http://google.com:100000"))
示例#7
0
 def test_scheme_and_domain_is_valid_url(self):
   self.assertTrue(is_valid_url("http://google.com"))
   self.assertTrue(is_valid_url("http://www.google.com"))
   self.assertTrue(is_valid_url("https://google.com"))
示例#8
0
 def test_empty(self):
   self.assertFalse(is_valid_url(""))
示例#9
0
    try:
        lines = infile.read().splitlines()
    except IOError, e:
        parser.error(str(e))
    finally:
        infile.close()
        infile = None

    source_counter = Counter(lines)
    canonical_lines = map(canonicalize, lines)
    canonical_counter = Counter(canonical_lines)

    for index in range(0, len(lines)):
        url = lines[index]
        curl = canonical_lines[index]
        valid = is_valid_url(url)
        print "Source: " + url
        print "Valid: " + ("true" if valid else "false")
        if valid:
            print "Canonical: " + curl
        print "Source unique: " + ("true"
                                   if source_counter[url] == 1 else "false")
        if valid:
            print "Canonicalized URL unique: " + (
                "true" if canonical_counter[curl] == 1 else "false")

    # write output
    try:
        for line in lines:
            outfile.write(line)
    except IOError, e:
示例#10
0
  try:
    lines = infile.read().splitlines()
  except IOError, e:
    parser.error(str(e))
  finally:
    infile.close()
    infile = None

  source_counter = Counter(lines)
  canonical_lines = map(canonicalize, lines)
  canonical_counter = Counter(canonical_lines)

  for index in range(0, len(lines)):
    url = lines[index]
    curl = canonical_lines[index]
    valid = is_valid_url(url)
    print "Source: " + url
    print "Valid: " + ("true" if valid else "false")
    if valid:
      print "Canonical: " + curl
    print "Source unique: " + ("true" if source_counter[url] == 1 else "false")
    if valid:
      print "Canonicalized URL unique: " + ("true" if canonical_counter[curl] == 1 else "false")

  # write output
  try:
    for line in lines:
      outfile.write(line)
  except IOError, e:
    parser.error(str(e))
  finally: