def __init__(self, url, proxy=None, trace=False): if not url.endswith('/'): url = url + '/' self.url = url if 'APPDATA' in os.environ: homepath = os.environ["APPDATA"] elif 'USERPROFILE' in os.environ: homepath = os.path.join(os.environ["USERPROFILE"], "Local Settings", "Application Data") elif 'HOME' in os.environ: homepath = os.environ["HOME"] else: homepath = '' self.cookie_file = os.path.join(homepath, ".post-review-cookies.txt") self._cj = cookielib.MozillaCookieJar(self.cookie_file) self._password_mgr = ReviewBoardHTTPPasswordMgr(self.url) self._opener = opener = urllib2.build_opener( urllib2.ProxyHandler(proxy), urllib2.UnknownHandler(), urllib2.HTTPHandler(), HttpErrorHandler(), urllib2.HTTPErrorProcessor(), urllib2.HTTPCookieProcessor(self._cj), urllib2.HTTPBasicAuthHandler(self._password_mgr), urllib2.HTTPDigestAuthHandler(self._password_mgr)) urllib2.install_opener(self._opener) self._trace = trace
def main(output_dir): # Create opener. opener = urllib2.OpenerDirector() opener.add_handler(urllib2.ProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) # Iterate over the files in the docs directory and copy them, as appropriate. for root, dirs, files in os.walk('.'): for file_name in files: if file_name.endswith('.soy') and not file_name.startswith('__'): # Strip the './' prefix, if appropriate. if root.startswith('./'): root = root[2:] # Construct the URL where the .soy file is being served. soy_file = file_name html_file = root + '/' + soy_file[:-len('.soy')] + '.html' url = 'http://localhost:9811/' + html_file # Fetch url and copy its contents to output_dir. req = urllib2.Request(url) res = opener.open(req) html = res.read() copy_to_output_dir(html_file, output_dir, html) elif file_name.endswith('.css') or file_name.endswith('.js'): # Copy the static resource to output_dir. relative_path = os.path.join(root, file_name) with open(relative_path) as resource_file: resource = resource_file.read() copy_to_output_dir(relative_path, output_dir, resource)
def request_url(self, url, retry_times=20): pageContent = None msg = '' while not pageContent and retry_times > 0: retry_times -= 1 header = random.choice(self.headers) proxy = {} if len(self.proxies) > 1: proxy = random.choice(self.proxies) proxyAdded = urllib2.ProxyHandler(proxy) cj = cookielib.CookieJar() ck_handler = urllib2.HTTPCookieProcessor(cj) if not self.enable_cookie: ck_handler = urllib2.UnknownHandler() proxyOpener = urllib2.build_opener(proxyAdded, ck_handler) proxyOpener.addheaders = header try: pageContent = self.decode_page( proxyOpener.open(url, timeout=20.0)) #print pageContent except Exception, e: #print "error in request url :", trace_info(),proxy,url msg = "request url error: " + trace_info() pageContent = None
def get_opener(cookiejar=None): opener = urllib2.OpenerDirector() opener.add_handler(urllib2.ProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) opener.add_handler(urllib2.HTTPSHandler()) if cookiejar: opener.add_handler(urllib2.HTTPCookieProcessor(cookiejar)) return opener
def _GetOpener(self): # Authentication code needs to know about 302 response. # So make OpenerDirector without HTTPRedirectHandler. opener = urllib2.OpenerDirector() opener.add_handler(urllib2.ProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar)) return opener
def __init__(self, user=None, password=None): super(CrawlSuite, self).__init__() self.opener = urllib2.OpenerDirector() self.opener.add_handler(urllib2.UnknownHandler()) self.opener.add_handler(urllib2.HTTPHandler()) self.opener.add_handler(urllib2.HTTPSHandler()) self.opener.add_handler(urllib2.HTTPCookieProcessor()) self.opener.add_handler(RedirectHandler()) self._authenticate(user, password) self.user = user
def __init__(self, proxy, verbose=0): self.proxy = proxy self.verbose = verbose self.opener = opener = urllib2.OpenerDirector() if proxy: opener.add_handler(urllib2.ProxyHandler({'http':self.proxy})) else: opener.add_handler(urllib2.ProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor())
def _GetOpener(self): """Returns an OpenerDirector that supports cookies and ignores redirects. Returns: A urllib2.OpenerDirector object. """ opener = urllib2.OpenerDirector() opener.add_handler(fancy_urllib.FancyProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) opener.add_handler(ContentEncodingHandler()) auth_domain = '' if 'AUTH_DOMAIN' in os.environ: auth_domain = os.environ['AUTH_DOMAIN'].lower() if self.save_cookies: if auth_domain == 'appscale': cookies_dir = os.path.expanduser( HttpRpcServer.APPSCALE_COOKIE_DIR) if not os.path.exists(cookies_dir): os.mkdir(cookies_dir) else: self.cookie_jar.filename = os.path.expanduser( HttpRpcServer.DEFAULT_COOKIE_FILE_PATH) if os.path.exists(self.cookie_jar.filename): try: self.cookie_jar.load() self.authenticated = True logger.debug("Loaded authentication cookies from %s", self.cookie_jar.filename) except (OSError, IOError, cookielib.LoadError), e: logger.debug( "Could not load authentication cookies; %s: %s", e.__class__.__name__, e) self.cookie_jar.filename = None else: try: fd = os.open(self.cookie_jar.filename, os.O_CREAT, 0600) os.close(fd) except (OSError, IOError), e: logger.debug("Could not create authentication cookies file " + \ "; %s: %s" % (e.__class__.__name__, e)) self.cookie_jar.filename = None
def _GetHTTPOpener(): """Create an http opener used to interact with Google's ClientLogin. Returns: An http opener capable of handling anything needed to interact with Google's ClientLogin. """ # Create an http opener capable of handling proxies, http and https. opener = urllib2.OpenerDirector() opener.add_handler(urllib2.ProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) opener.add_handler(urllib2.HTTPSHandler()) return opener
def setUp(self): super(HttpCase, self).setUp() self.registry.enter_test_mode() # setup a magic session_id that will be rollbacked self.session = openerp.http.root.session_store.new() self.session_id = self.session.sid self.session.db = get_db_name() openerp.http.root.session_store.save(self.session) # setup an url opener helper self.opener = urllib2.OpenerDirector() self.opener.add_handler(urllib2.UnknownHandler()) self.opener.add_handler(urllib2.HTTPHandler()) self.opener.add_handler(urllib2.HTTPSHandler()) self.opener.add_handler(urllib2.HTTPCookieProcessor()) self.opener.add_handler(RedirectHandler()) self.opener.addheaders.append(('Cookie', 'session_id=%s' % self.session_id))
def __init__(self, *args, **kargs): urllib2.OpenerDirector.__init__(self, *args, **kargs) #agregando soporte basico self.add_handler(urllib2.ProxyHandler()) self.add_handler(urllib2.UnknownHandler()) self.add_handler(urllib2.HTTPHandler()) self.add_handler(urllib2.HTTPDefaultErrorHandler()) self.add_handler(urllib2.HTTPRedirectHandler()) self.add_handler(urllib2.FTPHandler()) self.add_handler(urllib2.FileHandler()) self.add_handler(urllib2.HTTPErrorProcessor()) #Agregar soporte para cookies. (en este momento no es necesario, #pero uno nunca sabe si se puede llegar a nececitar) self.cj = cookielib.CookieJar() self.add_handler(urllib2.HTTPCookieProcessor(self.cj))
def _GetOpener(self): """Returns an OpenerDirector that supports cookies and ignores redirects. Returns: A urllib2.OpenerDirector object. """ opener = urllib2.OpenerDirector() opener.add_handler(urllib2.ProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar)) return opener
def _build_opener(self): # 创建一个opener用于配置请求 opener = urllib2.OpenerDirector() # disable 本地代理 opener.add_handler(urllib2.ProxyHandler()) # 对URL不符合要求抛出URLError opener.add_handler(urllib2.UnknownHandler()) # 发送http请求 opener.add_handler(urllib2.HTTPHandler()) # 建错误reponse抛出为HttpEerror opener.add_handler(urllib2.HTTPDefaultErrorHandler()) # 发送https请求 opener.add_handler(urllib2.HTTPSHandler()) # 对于 http response status code 不属于[200,300) # 转换为错误response opener.add_handler(urllib2.HTTPErrorProcessor()) self.opener = opener
def resolve(self,item,captcha_cb=None,select_cb=None): item = item.copy() util.init_urllib() url = self._url(item['url']) page = '' try: opener = urllib2.OpenerDirector() opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.UnknownHandler()) urllib2.install_opener(opener) request = urllib2.Request(url) request.add_header('User-Agent',util.UA) response= urllib2.urlopen(request) page = response.read() response.close() except urllib2.HTTPError, e: traceback.print_exc() return
def _GetOpener(self): """Returns an OpenerDirector that supports cookies and ignores redirects. Returns: A urllib2.OpenerDirector object. """ opener = urllib2.OpenerDirector() opener.add_handler(fancy_urllib.FancyProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(fancy_urllib.FancyHTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) opener.add_handler(ContentEncodingHandler()) if self.save_cookies: self.cookie_jar.filename = os.path.expanduser( HttpRpcServer.DEFAULT_COOKIE_FILE_PATH) if os.path.exists(self.cookie_jar.filename): try: self.cookie_jar.load() self.authenticated = True logger.debug("Loaded authentication cookies from %s", self.cookie_jar.filename) except (OSError, IOError, cookielib.LoadError), e: # Failed to load cookies. The target file path is bad. logger.debug( "Could not load authentication cookies; %s: %s", e.__class__.__name__, e) self.cookie_jar.filename = None else: # Create an empty cookie file. This must be created with the file # permissions set upfront in order to be secure. try: fd = os.open(self.cookie_jar.filename, os.O_CREAT, 0600) os.close(fd) except (OSError, IOError), e: # Failed to create cookie file. Don't try to save cookies. logger.debug( "Could not create authentication cookies file; %s: %s", e.__class__.__name__, e) self.cookie_jar.filename = None
def test_badly_named_methods(self): # test work-around for three methods that accidentally follow the # naming conventions for handler methods # (*_open() / *_request() / *_response()) # These used to call the accidentally-named methods, causing a # TypeError in real code; here, returning self from these mock # methods would either cause no exception, or AttributeError. from urllib2 import URLError o = OpenerDirector() meth_spec = [ [("do_open", "return self"), ("proxy_open", "return self")], [("redirect_request", "return self")], ] handlers = add_ordered_mock_handlers(o, meth_spec) o.add_handler(urllib2.UnknownHandler()) for scheme in "do", "proxy", "redirect": self.assertRaises(URLError, o.open, scheme + "://example.com/")
def __init__(self, user=None, password=None): super(CrawlSuite, self).__init__() registry = openerp.registry(tools.config['db_name']) try: # switch registry to test mode, so that requests can be made registry.enter_test_mode() self.opener = urllib2.OpenerDirector() self.opener.add_handler(urllib2.UnknownHandler()) self.opener.add_handler(urllib2.HTTPHandler()) self.opener.add_handler(urllib2.HTTPSHandler()) self.opener.add_handler(urllib2.HTTPCookieProcessor()) self.opener.add_handler(RedirectHandler()) self._authenticate(user, password) self.user = user finally: registry.leave_test_mode()
def _GetOpener(self): """Returns an OpenerDirector that supports cookies and ignores redirects. Returns: A urllib2.OpenerDirector object. """ opener = urllib2.OpenerDirector() opener.add_handler(fancy_urllib.FancyProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(fancy_urllib.FancyHTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) if self.save_cookies: self.cookie_jar.filename = os.path.expanduser( HttpRpcServer.DEFAULT_COOKIE_FILE_PATH) if os.path.exists(self.cookie_jar.filename): try: self.cookie_jar.load() self.authenticated = True logger.info("Loaded authentication cookies from %s", self.cookie_jar.filename) except (OSError, IOError, cookielib.LoadError), e: logger.debug( "Could not load authentication cookies; %s: %s", e.__class__.__name__, e) self.cookie_jar.filename = None else: try: fd = os.open(self.cookie_jar.filename, os.O_CREAT, 0600) os.close(fd) except (OSError, IOError), e: logger.debug( "Could not create authentication cookies file; %s: %s", e.__class__.__name__, e) self.cookie_jar.filename = None
def _GetOpener(self): """Returns an OpenerDirector that supports cookies and ignores redirects. Returns: A urllib2.OpenerDirector object. """ opener = urllib2.OpenerDirector() opener.add_handler(urllib2.ProxyHandler()) opener.add_handler(urllib2.UnknownHandler()) opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.HTTPDefaultErrorHandler()) opener.add_handler(urllib2.HTTPSHandler()) opener.add_handler(urllib2.HTTPErrorProcessor()) if self.save_cookies: self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies") self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file) if os.path.exists(self.cookie_file): try: self.cookie_jar.load() self.authenticated = True StatusUpdate("Loaded authentication cookies from %s" % self.cookie_file) except (cookielib.LoadError, IOError): # Failed to load cookies - just ignore them. pass else: # Create an empty cookie file with mode 600 fd = os.open(self.cookie_file, os.O_CREAT, 0600) os.close(fd) # Always chmod the cookie file os.chmod(self.cookie_file, 0600) else: # Don't save cookies across runs of update.py. self.cookie_jar = cookielib.CookieJar() opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar)) return opener
def url_inventory(self, request): ''' creates a csv file compiling the http status of artbase artworks and their urls and locations ''' context_instance = RequestContext(request) opts = self.model._meta admin_site = self.admin_site completed_message = None writer = csv.writer(open(os.path.join(settings.MEDIA_ROOT, "artbase/data/artbase_broken_urls_inventory_%s.csv" \ % datetime.date.today(), "wb"))) writer.writerow([ 'ARTWORK ID', 'ARTWORK TITLE', 'ARTWORK URL', 'URL STATUS', 'LOCATION', 'LOCATION STATUS', 'UPDATE NOTICE' ]) all_works = ArtworkStub.objects.filter(status="approved") handler = urllib2.UnknownHandler() opener = urllib2.build_opener(handler) urllib2.install_opener(opener) # timeout in seconds timeout = 15 socket.setdefaulttimeout(timeout) if request.method == "POST": if request.POST.get("run_inventory"): for work in all_works: url_response = None url_error_msg = None url_status = None location_response = None location_status = None location_error_msg = None updating_notice = None #a few hacks to make sure the url is formatted correctly if "/artbase/" in work.url: if "http://archive.rhizome.org" not in work.url: if "http://" not in work.url: work.url = "http://archive.rhizome.org%s" % work.url updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)" if "/artbase/" in work.location: if "http://archive.rhizome.org" not in work.location: if "http://" not in work.location: work.location = "http://archive.rhizome.org%s" % work.location updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)" if "http://" not in work.url: work.url = "http://%s" % work.url updating_notice = "Make sure has url including 'http://'" if "http://" not in work.location: work.location = "http://%s" % work.location updating_notice = "Make sure has url including 'http://'" try: url_response = urllib2.urlopen(work.url, timeout=15) except (urllib2.URLError, httplib.BadStatusLine, httplib.InvalidURL, httplib.HTTPException, httplib.UnknownProtocol), e: if hasattr(e, 'reason'): if isinstance(e.reason, socket.timeout): url_error_msg = 'Failed to reach server. TIMED OUT ' else: url_error_msg = 'Failed to reach server. Reason: %s ' % e.reason elif hasattr(e, 'code'): url_error_msg = "The server couldn't fulfill the request. Error code: %s" % e.code else: url_error_msg = "Failed to reach server!" except: url_error_msg = "Failed to reach server!"
""" def find_user_password(self, realm, authuri): creds = codetricks.stealVar("_temp_credentials") if creds is not None: return creds _restrictedURLOpener = urllib2.OpenerDirector() _restrictedURLOpener.add_handler(urllib2.HTTPRedirectHandler()) _restrictedURLOpener.add_handler(urllib2.HTTPHandler()) _restrictedURLOpener.add_handler(urllib2.HTTPSHandler()) _restrictedURLOpener.add_handler(urllib2.HTTPErrorProcessor()) _restrictedURLOpener.add_handler( urllib2.HTTPBasicAuthHandler(_UrlopenRemotePasswordMgr())) _restrictedURLOpener.add_handler(urllib2.FTPHandler()) _restrictedURLOpener.add_handler(urllib2.UnknownHandler()) _restrictedURLOpener.addheaders = [("user-agent", "GAVO DaCHS HTTP client")] def urlopenRemote(url, data=None, creds=(None, None)): """works like urllib2.urlopen, except only http, https, and ftp URLs are handled. The function also massages the error messages of urllib2 a bit. urllib2 errors always become IOErrors (which is more convenient within the DC). creds may be a pair of username and password. Those credentials will be presented in http basic authentication to any server that cares to ask. For both reasons, don't use any valuable credentials here. """
def __call__(self, value): try: super(RelativeURLValidator, self).__call__(value) except ValidationError as e: # Trivial case failed. Try for possible IDN domain if value: value = smart_text(value) scheme, netloc, path, query, fragment = urlparse.urlsplit( value) try: netloc = netloc.encode('idna') # IDN -> ACE except UnicodeError: # invalid domain part raise e url = urlparse.urlunsplit( (scheme, netloc, path, query, fragment)) super(RelativeURLValidator, self).__call__(url) else: raise else: url = value if self.verify_exists: broken_error = ValidationError( _(u'This URL appears to be a broken link.'), code='invalid_link') if url.startswith('http://') or url.startswith('ftp://'): headers = { "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", "Accept-Language": "en-us,en;q=0.5", "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", "Connection": "close", "User-Agent": self.user_agent, } url = url.encode('utf-8') try: req = urllib2.Request(url, None, headers) req.get_method = lambda: 'HEAD' #Create an opener that does not support local file access opener = urllib2.OpenerDirector() #Don't follow redirects, but don't treat them as errors either error_nop = lambda *args, **kwargs: True http_error_processor = urllib2.HTTPErrorProcessor() http_error_processor.http_error_301 = error_nop http_error_processor.http_error_302 = error_nop http_error_processor.http_error_307 = error_nop handlers = [ urllib2.UnknownHandler(), urllib2.HTTPHandler(), urllib2.HTTPDefaultErrorHandler(), urllib2.FTPHandler(), http_error_processor ] try: import ssl handlers.append(urllib2.HTTPSHandler()) except: #Python isn't compiled with SSL support pass map(opener.add_handler, handlers) if platform.python_version_tuple() >= (2, 6): opener.open(req, timeout=10) else: opener.open(req) except ValueError: raise ValidationError(_(u'Enter a valid URL.'), code='invalid') except: # urllib2.URLError, httplib.InvalidURL, etc. raise broken_error else: # Resolve the relative URL try: resolve(url) except Http404: raise broken_error
code='invalid_link') try: req = urllib2.Request(url, None, headers) req.get_method = lambda: 'HEAD' #Create an opener that does not support local file access opener = urllib2.OpenerDirector() #Don't follow redirects, but don't treat them as errors either error_nop = lambda *args, **kwargs: True http_error_processor = urllib2.HTTPErrorProcessor() http_error_processor.http_error_301 = error_nop http_error_processor.http_error_302 = error_nop http_error_processor.http_error_307 = error_nop handlers = [ urllib2.UnknownHandler(), urllib2.HTTPHandler(), urllib2.HTTPDefaultErrorHandler(), urllib2.FTPHandler(), http_error_processor ] try: import ssl handlers.append(urllib2.HTTPSHandler()) except: #Python isn't compiled with SSL support pass map(opener.add_handler, handlers) if platform.python_version_tuple() >= (2, 6): opener.open(req, timeout=10) else: opener.open(req)
broken_error = ValidationError( _(u'This URL appears to be a broken link.'), code='invalid_link') try: req = urllib2.Request(url, None, headers) req.get_method = lambda: 'HEAD' #Create an opener that does not support local file access opener = urllib2.OpenerDirector() #Don't follow redirects, but don't treat them as errors either error_nop = lambda *args, **kwargs: True http_error_processor = urllib2.HTTPErrorProcessor() http_error_processor.http_error_301 = error_nop http_error_processor.http_error_302 = error_nop http_error_processor.http_error_307 = error_nop handlers = [urllib2.UnknownHandler(), urllib2.HTTPHandler(), urllib2.HTTPDefaultErrorHandler(), urllib2.FTPHandler(), http_error_processor] try: import ssl except ImportError: # Python isn't compiled with SSL support pass else: handlers.append(urllib2.HTTPSHandler()) map(opener.add_handler, handlers) if platform.python_version_tuple() >= (2, 6): opener.open(req, timeout=10) else:
def run_report_and_create_csv(self, archived): ''' creates work and report ''' if archived: writer = csv.writer( open( os.path.join( settings.MEDIA_ROOT, "artbase/data/archived_broken_urls_inventory.csv"), "wb")) else: writer = csv.writer( open( os.path.join( settings.MEDIA_ROOT, "artbase/data/artbase_broken_urls_inventory.csv"), "wb")) writer.writerow([ 'ARTWORK ID', 'ARTWORK TITLE', 'ARTWORK URL', 'URL STATUS', 'LOCATION', 'LOCATION STATUS', 'UPDATE NOTICE' ]) if archived: all_works = ArtworkStub.objects.filter(status="approved").filter( location_type="cloned") else: all_works = ArtworkStub.objects.filter(status="approved") handler = urllib2.UnknownHandler() opener = urllib2.build_opener(handler) urllib2.install_opener(opener) # timeout in seconds timeout = 15 socket.setdefaulttimeout(timeout) for work in all_works: url_response = None url_error_msg = None url_status = None location_response = None location_status = None location_error_msg = None updating_notice = None #a few hacks to make sure the url is formatted correctly if "/artbase/" in work.url: if "http://archive.rhizome.org" not in work.url: if "http://" not in work.url: work.url = "http://archive.rhizome.org%s" % work.url updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)" if "/artbase/" in work.location: if "http://archive.rhizome.org" not in work.location: if "http://" not in work.location: work.location = "http://archive.rhizome.org%s" % work.location updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)" if "http://" not in work.url: work.url = "http://%s" % work.url updating_notice = "Make sure has url including 'http://'" if "http://" not in work.location: work.location = "http://%s" % work.location updating_notice = "Make sure has url including 'http://'" try: url_response = urllib2.urlopen(work.url, timeout=15) except (urllib2.URLError, httplib.BadStatusLine, httplib.InvalidURL, httplib.HTTPException, httplib.UnknownProtocol), e: if hasattr(e, 'reason'): if isinstance(e.reason, socket.timeout): url_error_msg = 'Failed to reach server. TIMED OUT ' else: url_error_msg = 'Failed to reach server. Reason: %s ' % e.reason elif hasattr(e, 'code'): url_error_msg = "The server couldn't fulfill the request. Error code: %s" % e.code else: url_error_msg = "Failed to reach server!" except: