示例#1
0
  def testParseFromString(self):
    sample = rdf_standard.URI()
    url = "http://google.com:443/search?query=hi#anchor2"
    sample.ParseFromString(url)

    self.assertEqual(sample.transport, "http")
    self.assertEqual(sample.host, "google.com:443")
    self.assertEqual(sample.path, "/search")
    self.assertEqual(sample.query, "query=hi")
    self.assertEqual(sample.fragment, "anchor2")

    self.assertEqual(sample.SerializeToString(), url)
示例#2
0
    def testURI(self):
        sample = rdf_standard.URI(transport="http",
                                  host="google.com",
                                  path="/index",
                                  query="q=hi",
                                  fragment="anchor1")
        self.assertEqual(sample.transport, "http")
        self.assertEqual(sample.host, "google.com")
        self.assertEqual(sample.path, "/index")
        self.assertEqual(sample.query, "q=hi")
        self.assertEqual(sample.fragment, "anchor1")

        url = "http://google.com/index?q=hi#anchor1"
        self.assertEqual(sample.SerializeToString(), url)
示例#3
0
    def Parse(self, stat, file_obj, unused_knowledge_base):
        uris_to_parse = self.FindPotentialURIs(file_obj)
        uris = []

        for url_to_parse in uris_to_parse:
            url = rdf_standard.URI()
            url.ParseFromString(url_to_parse)

            # if no transport then url_to_parse wasn't actually a valid URL
            # either host or path also have to exist for this to be a valid URL
            if url.transport and (url.host or url.path):
                uris.append(url)

        filename = stat.pathspec.path
        cfg = {"filename": filename, "uris": uris}
        yield rdf_protodict.AttributedDict(**cfg)
示例#4
0
 def GenerateSample(self, number=0):
   return rdf_standard.URI(transport="http", host="%s.example.com" % number)