def login(uid, pwd): op = getopener(headers) # 访问登陆界面,获取登陆所需的post_key op_key = op.open(pixiv_url_login) data = op_key.read() op_key.close() data = utils.ungzip(data).decode() # 初始化登陆所需提交的数据 pixiv_key = getpostkey(data) pixiv_id = uid pixiv_password = pwd pixiv_source = 'accounts' post_data = { 'pixiv_id': pixiv_id, 'password': pixiv_password, 'post_key': pixiv_key, 'source': pixiv_source } post_data = urllib.parse.urlencode(post_data).encode('utf-8') # 提交登录数据 op_login = op.open(pixiv_url_login_post, post_data) op_login.close() # 返回带cookie管理的opener return op
def openurl(url, timeout, use_gzip=True, headers={}): doc = None try: request = urllib2.Request( url=url, headers=headers) if use_gzip: request.add_header('Accept-encoding', 'gzip') connection = urllib2.urlopen(request, timeout=timeout) doc = html.document_fromstring(utils.ungzip(connection)) connection.close() return doc except Exception, e: raise "open url: %s error(%s): %s" % (url, e.errno, e.strerror)
def download_first(op, mode, picDir): visit = pixiv_url_ranking + '?' + urllib.parse.urlencode({'mode': mode}) tt = None items = None with op.open(visit) as f: if f.status == 200: html = utils.ungzip(f.read()).decode() tt = get_tt(html) items = analysis_html(html) if items: download_illustration(op, items, picDir) return tt
def get_picture_stream(self, url, illust_id, ungzip=True): referer = "%s/member_illust.php?mode=big&illust_id=%s" % (self.host, illust_id) request = urllib2.Request( url=url, headers={ 'Accept-Encoding': 'gzip, deflate', 'Accept': 'image/png,image/*;q=0.8,*/*;q=0.5', 'Referer': referer, }) response = urllib2.urlopen(request) if not ungzip: tmp_stream = utils.ungzip(response) else: tmp_stream = StringIO(response.read()).getvalue() response.close() return tmp_stream
def download_more(op, mode, p, fm, tt, picDir): visit = pixiv_url_ranking + '?' + urllib.parse.urlencode({ 'mode': mode, 'p': p, 'format': fm, 'tt': tt }) items = None with op.open(visit) as f: if f.status == 200: js = utils.ungzip(f.read()).decode() items = analysis_json(js) if items: download_illustration(op, items, picDir)
def process_source(source): fname = None _buffer = utils.retrieve(source) (_, tmpfname) = tempfile.mkstemp() tmpf = open(tmpfname, "w") tmpf.write(_buffer.getvalue()) tmpf.close() archive = utils.get_archive_type(tmpfname) if archive == "gzip": fname = utils.ungzip(tmpfname) os.remove(tmpfname) elif archive == "bzip": fname = utils.unbzip(tmpfname) os.remove(tmpfname) elif archive is False: fname = tmpfname return (fname, True)
def download_illustration(op, no, picDir): print("正在下载中……") params = urllib.parse.urlencode({'type': query_type, 'no': no}) visit = pixiv_url_ranking_area + '?' + params op_visit = op.open(visit) html = utils.ungzip(op_visit.read()).decode() op_visit.close() items = analysis(html) for item in items: try: with op.open(item.originalUrl1) as op_img1: if op_img1.status == 200: with open( os.path.join(picDir, item.originalUrl1.split('/')[-1]), 'wb') as o: o.write(op_img1.read()) print('插图已成功下载 -> %s' % item.get_info()) except Exception as e: try: with op.open(item.originalUrl2) as op_img2: if op_img2.status == 200: with open( os.path.join(picDir, item.originalUrl2.split('/')[-1]), 'wb') as o: o.write(op_img2.read()) print('插图已成功下载 -> %s' % item.get_info()) except Exception as e: pass # 等待1秒,爬得太快容易被发现( ̄▽ ̄)" time.sleep(1) print("下载完成!")