def httpExists(url):
    host, path = urlparse.urlsplit(url)[1:3]
    if ':' in host:
        # port specified, try to use it
        host, port = host.split(':', 1)
        try:
            port = int(port)
        except ValueError:
            print 'invalid port number %r' % (port,)
            return False
    else:
        # no port specified, use default port
        port = None
    try:
        connection = httplib.HTTPConnection(host, port=port)
        connection.request("HEAD", path)
        resp = connection.getresponse()
        if resp.status == 200:       # normal 'found' status
            found = True
        elif resp.status == 302:     # recurse on temporary redirect
            found = httpExists(urlparse.urljoin(url,
                               resp.getheader('location', '')))
        else:                        # everything else -> not found
            print "Status %d %s : %s" % (resp.status, resp.reason, url)
            found = False
    except Exception, e:
        print e.__class__, e, url
        found = False
def httpExists(url):
    host,path = urlparse.urlsplit(url)[1:3]
    if ':' in host:
        #指定了端口,试图使用它
        host,port = host.split(":",1)
        try:
            port = int(port)
        except ValueError:
            print 'invalid port number %r' % (port,)
            return False
    else:
        port = None
    try:
        connection = httplib.HTTPConnection(host,port=port)
        connection.request("HEAD",path)
        resp = connection.getresponse()
        if resp.status == 200:
            found = True
        elif resp.status == 302:
            print url,resp.getheader('location','')
            found = httpExists(urlparse.urljoin(url,resp.getheader('location','')))
        else:
            print "Status %d %s : %s" % (resp.status,resp.reason,url)
    except Exception,e:
        print e.__class__,e,url
        found = False
def find_broken(urls):
    broken = []
    for i in urls:
        # any http request that doesn't return 200
        if httpExists(i) != 1:
            broken.append(i)
    return broken
    else:
        port = None
    try:
        connection = httplib.HTTPConnection(host,port=port)
        connection.request("HEAD",path)
        resp = connection.getresponse()
        if resp.status == 200:
            found = True
        elif resp.status == 302:
            print url,resp.getheader('location','')
            found = httpExists(urlparse.urljoin(url,resp.getheader('location','')))
        else:
            print "Status %d %s : %s" % (resp.status,resp.reason,url)
    except Exception,e:
        print e.__class__,e,url
        found = False
    return found

def _test():
    import doctest,httpExists
    return doctest.testmod(httpExists)

if __name__ == "__main__":
    #_test()
    print httpExists('http://www.baidu.com/a.html') 
    print httpExists('http://www.mumayi.com/1.html') 
    print httpExists('http://127.0.0.1/1.html')