Python headers示例，cons.headers Python示例

示例#1

0

显示文件

def getLocation(userName):
    req = urllib2.Request('https://flightaware.com/adsb/stats/user/'+userName)
    req.add_header('user-agent', cons.headers())
    html = urllib2.urlopen(req).read()
    loc = re.search(r'"latitude":(-?[0-9]*.[0-9]*),"longitude":(-?[0-9]*.[0-9]*),', html)
    location = (loc.group(1), loc.group(2))
    return location

示例#2

0

显示文件

def read_url(url, encoding="gb18030"):
    try:
        req = urllib.request.Request(url)
        req.add_header('user-agent', headers())
        content = urllib.request.urlopen(req, timeout=5).read()
        content = gzip.decompress(content).decode(encoding)
    except Exception as e:
        print(e)
        print(inspect.stack()[1][3] + ' occused error')
        sleep(5)
        req = urllib.request.Request(url)
        req.add_header('user-agent', headers())
        content = urllib.request.urlopen(req).read()
        content = gzip.decompress(content).decode("gb18030")  # 网页gb2312的编码要用这个
    soup = BeautifulSoup(content, "lxml")
    return soup

示例#3

0

显示文件

def getInfo(userId):
    req = urllib2.request('https://mm.taobao.com/self/aiShow.htm?&userId=%s' %
                          userId)
    req.add_header('user-agent', headers())
    html = urllib2.urlopen(req).read().decode('gbk').encode('utf-8')
    #print html
    return html

示例#4

0

显示文件

def getAlbumList(userId):
    req = urllib2.request(
        'https://mm.taobao.com/self/model_album.htm?&user_id=' % userId)
    req.add_header('user-agent', headers())
    html = urllib2.urlopen(req).read().decode('gbk').encode('utf-8')
    #print html

    reg = r'class="mm-first" href="//(.*?)"'
    return re.findall(reg, html)[::2]

示例#5

0

显示文件

def getPicture(userId, album_id):
    req = urllib.request.Request(
        'https://mm.taobao.com/album/json/get_album_photo_list.htm?user_id=%s&album_id=%s'
        % (userId, album_id))
    req.add_header('user-agent', headers())
    html = urllib.request.urlopen(req).read().decode('gbk')
    result = json.loads(html)
    for k in result['picList']:
        print(k['picUrl'])

示例#6

0

显示文件

def getUserName():
    req = urllib2.Request('https://flightaware.com/ajax/ignoreuser/adsb/adsb_stats.rvt?table=sites&start=0&length=1000')
    req.add_header('user-agent', cons.headers())
    html = urllib2.urlopen(req).read()
    dict = json.loads(html)
    # print dict['data'][0]['user_username']
    names = list()
    for item in dict['data']:
        names.append(item['user_username'])
    return names

示例#7

0

显示文件

def getUrlList():
    req = urllib.request.Request(
        'https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8'
    )
    req.add_header('user-agent', headers())
    html = urllib.request.urlopen(
        req,
        data=
        b'q&viewFlag=A&sortType=default&searchStyle=&searchRegion=city%3A&searchFansNum=&currentPage=1&pageSize=100'
    ).read().decode('gbk')
    # print(html)
    result = json.loads(html)
    return result['data']['searchDOList']

示例#8

0

显示文件

def getAlbumUrl(userId):
    req = urllib.request.Request(
        'https://mm.taobao.com/self/album/open_album_list.htm?_charset=utf-8&user_id%%20=%s'
        % userId)
    req.add_header('user-agent', headers())
    html = urllib.request.urlopen(req).read().decode('gbk')
    reg = r'<a class="mm-first" href="//(.*?)" target="_blank">'
    reg1 = r'mm.taobao.com/self/album_photo.htm?(.*?)&album_id=(.*?)&album_flag=0'
    albumList = re.findall(reg, html)[::2]
    albumId = []
    for j in albumList:
        # j.split('album_id')
        albumId.append(j.split('album_id=')[1].split('&')[0])
    return albumId

示例#9

0

显示文件

def getUrlList():
    req = urllib2.Request(
        'https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8'
    )
    req.add_header('user-agent', headers())
    #因为是POST,不是GET,所以需要data
    html = urllib2.urlopen(
        req,
        data=
        'q&viewFlag=A&sortType=default&searchStyle=&searchRegion=city%3A&searchFansNum=&currentPage=1&pageSize=100'
    )

    print 'sds'
    print html
    html.read().decode('gbk').encode('utf-8')
    print html

    # #返回数据是json(类似于字符串,可以用正则取出来,但是复杂点),我们需要里面数据的切片,所以先转化为dict,然后字典取值
    # #所以我们需要import json中的loads
    json = loads(html)
    return json['data']['searchDOList']

示例#10

0

显示文件

def getInfo(userId):
    req = urllib.request.Request(
        'https://mm.taobao.com/self/aiShow.htm?userId=%s' % userId)
    req.add_header('user-agent', headers())
    html = urllib.request.urlopen(req).read().decode('gbk')
    print(html)