Пример #1
0
    def get(self):
        url = self.request.get("url")
        meta = {}

        fetcheddata = urlfetch.fetch(url, deadline=10)

        if fetcheddata.status_code == 200:
            # parse the data to get the basic information out.
            html = fetcheddata.content

            # get the data
            titleMatch = re.search(title, html, flags=re.IGNORECASE)
            if titleMatch:
                meta["name"] = titleMatch.group(
                    1)  # only consider the first title
            # get the data
            imageMatch = re.search(image, html, flags=re.IGNORECASE)
            if imageMatch:
                meta["image"] = "http:" + imageMatch.group(1)

        else:
            self.response.status_code = fetcheddata.status_code

        self.response.headers['Content-Type'] = "application/json"
        self.response.out.write(simplejson2.dumps(meta, ensure_ascii=False))
Пример #2
0
  def get(self):
    url = self.request.get("url")
    meta = {}
    
    fetcheddata = urlfetch.fetch(url, deadline = 10)
    meta["web_url"] = fetcheddata.final_url or url
    meta["urls"] = [meta["web_url"]]
    
    urlInfo = urlparse(meta["web_url"])
    baseUrl = urlInfo.scheme + "://" + urlInfo.netloc + "/"
    
    if fetcheddata.status_code == 200:
      # parse the data to get the basic information out.
      html = fetcheddata.content
      
      # get the data
      titleMatch = re.search(title, html, flags=re.IGNORECASE)
      if titleMatch:
        meta["name"] = titleMatch.group(1) # only consider the first title
        
      descriptionMatch = re.search(description, html, flags=re.IGNORECASE)
      if descriptionMatch:
        meta["description"] = descriptionMatch.group(3)[0:132]
       
      # parse the fav icons 
      meta["icons"] = dict([parseFavIcon(baseUrl, m) 
        for m in re.finditer(favicon, html, flags=re.IGNORECASE)
          if m is not None])
          
        
#       if "16" not in meta["icons"]:
#         meta["icons"]["16"] = meta["web_url"] + "/favicon.ico"
#         
#     else:
    self.response.status_code = fetcheddata.status_code

    self.response.headers['Content-Type'] = "application/json"
    self.response.out.write(simplejson2.dumps(meta, ensure_ascii=False))
Пример #3
0
  def get(self):
    url = self.request.get("url")
    meta = {}
    
    fetcheddata = urlfetch.fetch(url, deadline = 10)
    
    if fetcheddata.status_code == 200:
      # parse the data to get the basic information out.
      html = fetcheddata.content
      
      # get the data
      titleMatch = re.search(title, html, flags=re.IGNORECASE)
      if titleMatch:
        meta["name"] = titleMatch.group(1) # only consider the first title
      # get the data
      imageMatch = re.search(image, html, flags=re.IGNORECASE)
      if imageMatch:
        meta["image"] = "http:" + imageMatch.group(1)
      
    else:
      self.response.status_code = fetcheddata.status_code

    self.response.headers['Content-Type'] = "application/json"
    self.response.out.write(simplejson2.dumps(meta, ensure_ascii=False))