Пример #1
0
    def get_auto_refresh_url(self, page):
        page = page.lower()
        noscript = get_in(page, '<noscript>', '</noscript>')
        if not noscript:
            return None
        for meta in get_in_list(noscript, '<meta', '/>'):
            equiv = get_in(meta, 'http-equiv="', '"')
            if equiv and equiv == 'refresh':
                refresh = get_in(meta, 'content="', '"')
                ii = refresh.find(";")
                if ii != -1:
                    pause, newurl_spec = float(refresh[:ii]), refresh[ii + 1:]
                    jj = newurl_spec.find("=")
                    if jj != -1:
                        key, url = newurl_spec[:jj], newurl_spec[jj + 1:]
                    if key.strip().lower() != "url":
                        continue
                else:
                    continue

                if pause > 1E-3:
                    time.sleep(pause)
                url = iso_to_char(url)
                return url
        return None
Пример #2
0
 def get_auto_refresh_url(self, page):
     page = page.lower()
     noscript = get_in(page, '<noscript>', '</noscript>')
     if not noscript:
         return None
     for meta in get_in_list(noscript, '<meta', '/>'):
         equiv = get_in(meta, 'http-equiv="', '"')
         if equiv and equiv == 'refresh':
             refresh = get_in(meta, 'content="', '"')
             ii = refresh.find(";")
             if ii != -1:
                 pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
                 jj = newurl_spec.find("=")
                 if jj != -1:
                     key, url = newurl_spec[:jj], newurl_spec[jj+1:]
                 if key.strip().lower() != "url":
                     continue
             else:
                 continue
             
             if pause > 1E-3:
                 time.sleep(pause)
             url = iso_to_char(url)
             return url
     return None
Пример #3
0
    def auto_refresh(self, res):
        if res is None:
            return None
        lower_res = res.lower()
        if lower_res.find('onload="javascript:dosubmit();"') != -1:
            return self.web.submit(res)

        for meta in get_in_list(lower_res, '<meta', '/>'):
            if meta.find('http-equiv="refresh"') != -1:
                refresh = get_in(meta, 'content="', '"')
                ii = refresh.find(";")
                if ii != -1:
                    pause, newurl_spec = float(refresh[:ii]), refresh[ii + 1:]
                    jj = newurl_spec.find("=")
                    if jj != -1:
                        key, newurl = newurl_spec[:jj], newurl_spec[jj + 1:]
                    if key.strip().lower() != "url":
                        return res
                else:
                    return res

                if pause > 1E-3:
                    time.sleep(pause)
                newurl = iso_to_char(newurl)
                res = self.get_page(newurl)
                return res
        return res
Пример #4
0
 def auto_refresh(self, res):
     if res is None:
         return None
     lower_res = res.lower()
     if lower_res.find('onload="javascript:dosubmit();"') != -1:
         return self.web.submit(res)
         
     for meta in get_in_list(lower_res, '<meta', '/>'):
         if meta.find('http-equiv="refresh"') != -1:
             refresh = get_in(meta, 'content="', '"')
             ii = refresh.find(";")
             if ii != -1:
                 pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
                 jj = newurl_spec.find("=")
                 if jj != -1:
                     key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
                 if key.strip().lower() != "url":
                     return res
             else:
                 return res
             
             if pause > 1E-3:
                 time.sleep(pause)
             newurl = iso_to_char(newurl)
             res = self.get_page(newurl)
             return res
     return res