示例#1
0
文件: url.py 项目: code2u/jsb
def geturl2(url, decode=False):
    """ use urllib2 to fetch an url. """
    logging.warn('fetching %s' % url)
    request = urllib2.Request(url)
    request.add_header('User-Agent', useragent())
    opener = urllib2.build_opener()
    result = opener.open(request)
    tmp = result.read()
    info = result.info()
    result.close()
    if decode:
        encoding = get_encoding(tmp)
        logging.info('%s encoding: %s' % (url, encoding))
        res = istr(fromenc(tmp, encoding, url))
    else: res = istr(tmp)
    res.info = info
    return res
示例#2
0
def geturl2(url, decode=False):
    """ use urllib2 to fetch an url. """
    logging.info('fetching %s' % url)
    request = urllib2.Request(url)
    request.add_header('User-Agent', useragent())
    opener = urllib2.build_opener()
    result = opener.open(request)
    tmp = result.read()
    info = result.info()
    result.close()
    if decode:
        encoding = get_encoding(tmp)
        logging.info('%s encoding: %s' % (url, encoding))
        res = istr(fromenc(tmp, encoding, url))
    else:
        res = istr(tmp)
    res.info = info
    return res
示例#3
0
def geturl2(url, decode=False):
    """ use urllib2 to fetch an url """
    rlog(10, 'url', 'fetching %s' % url)
    request = urllib2.Request(url)
    request.add_header('User-Agent', useragent())
    opener = urllib2.build_opener()
    result = opener.open(request)
    tmp = result.read()
    info = result.info()  # add header information to .info attribute
    result.close()
    if decode:
        encoding = get_encoding(tmp)
        rlog(0, 'url', '%s encoding: %s' % (url, encoding))
        res = istr(fromenc(tmp, encoding, url))
    else:
        res = istr(tmp)
    res.info = info
    return res
示例#4
0
def geturl2(url, decode=False):
    """ use urllib2 to fetch an url """
    rlog(10, 'url', 'fetching %s' % url)
    request = urllib2.Request(url)
    request.add_header('User-Agent', useragent())
    opener = urllib2.build_opener()
    result = opener.open(request)
    tmp = result.read()
    info = result.info() # add header information to .info attribute
    result.close()
    if decode:
        encoding = get_encoding(tmp)
        rlog(0, 'url', '%s encoding: %s' % (url, encoding))
        res = istr(fromenc(tmp, encoding, url))
    else:
        res = istr(tmp)
    res.info = info
    return res
示例#5
0
def geturl2(url, decode=False, timeout=5):
    """ use urllib2 to fetch an url. """
    global enabled
    if not enabled: raise URLNotEnabled(url)
    logging.warn('fetching %s' % url)
    request = urllib2.Request(url)
    request.add_header('User-Agent', useragent())
    opener = urllib2.build_opener()
    result = opener.open(request, timeout=timeout)
    tmp = result.read()
    info = result.info()
    result.close()
    if decode:
        encoding = get_encoding(tmp)
        logging.info('%s encoding: %s' % (url, encoding))
        res = istr(fromenc(tmp, encoding, url))
    else: res = istr(tmp)
    res.status = result.code
    res.info = info
    return res
示例#6
0
 def strip(self, some_html):
     """ strip html. """
     self.theString = u""
     self.feed(fromenc(some_html, "ascii"))
     self.close()
     return self.theString
示例#7
0
def striphtml(txt):
    """ strip html from txt """
    stripper = Stripper()
    txt = stripper.strip(fromenc(txt))
    return txt
示例#8
0
 def handle_data(self, data):
     """ data handler """
     self.theString += fromenc(data)
示例#9
0
def striphtml(txt):
    """ strip html from txt """
    stripper = Stripper()
    txt = stripper.strip(fromenc(txt))
    return txt
示例#10
0
 def handle_data(self, data):
     """ data handler """
     self.theString += fromenc(data)