def parse_data_nokey(self, response): """ 解析并保存无提取码类型的第一级目录/文件 :param response: :return: """ try: data = json.loads(response.text) if data['errno'] != 0: logger.error('parse no-key data fail, url:{}, errno:{}'.format( response.url, str(data['errno']))) return for file in data['list']: yield FileItem(url=response.meta['shorturl'], pwd=None, expiredtype=response.meta['expiredtype'], fs_id=file['fs_id'], parent_id=0, size=file['size'], isdir=int(file['isdir']), local_ctime=file['local_ctime'], local_mtime=file['local_mtime'], md5=file['md5'], path=file['path'], server_ctime=file['server_ctime'], server_filename=file['server_filename'], share_id=data['share_id'], uk=data['uk']) if int(file['isdir']) == 1: url = 'https://pan.baidu.com/share/list?uk={}&shareid={}&order=other&desc=1&showempty=0&web=1&' \ 'dir=/sharelink{}-{}/{}&channel=chunlei&web=1&app_id=250528'.format(data['uk'], data['share_id'], data['uk'], file['fs_id'], file['server_filename']) meta = { 'uk': data['uk'], 'share_id': data['share_id'], 'fs_id': file['fs_id'], 'parent_id': file['fs_id'], 'filepath': file['server_filename'] } yield Request(url=url, dont_filter=True, callback=self.parse_dir, meta=meta) yield UserItem(url=response.meta['shorturl'], pwd=None, share_username=response.meta['share_username'], share_photo=response.meta['share_photo'], ctime=response.meta['ctime']) logger.info( 'parse no-key first data succ, url:{}, key:{}, share_id:{}, uk:{}' .format(response.url, self.pwd, data['share_id'], data['uk'])) except Exception as e: logger.error( 'parse no-key first data fail: exception, url:{}, err_msg:{}'. format(response.url, e))
def parse_dir(self, response): """ 解析目录 :param response: :return: """ try: data = json.loads(response.text) if data['errno'] != 0: logger.error('parse dir data fail, url: %s, errno:%s' % (response.url, str(data['errno']))) for file in data['list']: yield FileItem(url=None, pwd=None, expiredtype=None, fs_id=file['fs_id'], parent_id=response.meta['parent_id'], size=file['size'], isdir=int(file['isdir']), local_ctime=file['local_ctime'], local_mtime=file['local_mtime'], md5=file['md5'] if 'md5' in file else None, path=file['path'], server_ctime=file['server_ctime'], server_filename=file['server_filename'], share_id=None, uk=None) logger.info('parse data succ, fs_id:{}, parent_id:{}'.format( file['fs_id'], response.meta['parent_id'])) if int(file['isdir']) == 1: url = 'https://pan.baidu.com/share/list?uk={}&shareid={}&order=other&desc=1&showempty=0&web=1&' \ 'dir=/sharelink{}-{}/{}&channel=chunlei&web=1&app_id=250528'.format(response.meta['uk'], response.meta['share_id'], response.meta['uk'], response.meta['fs_id'], response.meta['filepath'] + '/' + file['server_filename']) meta = { 'uk': response.meta['uk'], 'share_id': response.meta['share_id'], 'fs_id': response.meta['fs_id'], 'parent_id': file['fs_id'], 'filepath': response.meta['filepath'] + '/' + file['server_filename'] } yield Request(url=url, cookies=self.cookies, dont_filter=True, callback=self.parse_dir, meta=meta) except Exception as e: logger.error( 'parse dir data fail: exception, url:{}, err_msg:{}'.format( response.url, e))
def parse_data(self, response): pattern = r'window.yunData = ([\s\S]*?});' data = json.loads(re.search(pattern, response.text).group(1)) files = data.get("file_list", []) if len(files) < 1: logging.error("len(files) < 1 %s", response.url) return for file in files: yield FileItem( url=response.url, fs_id=file["fs_id"], server_filename=file["server_filename"], size=int(file['size']), server_mtime=int(file["server_mtime"]), server_ctime=int(file["server_ctime"]), local_mtime=int(file["local_mtime"]), local_ctime=int(file["local_ctime"]), isdir=int(file["isdir"]), isdelete=int(file["isdelete"]), status=int(file["status"]), category=int(file["category"]), share=int(file["category"]), path_md5=file["path_md5"], path=file["path"], parent_path=unquote(file["parent_path"]), md5=file["md5"], thumbs=file.get("thumbs"), dCnt=int(data["dCnt"]), ctime=int(data["ctime"]), expiredType=data["expiredType"], expires=int(data["ctime"]) + data["expiredType"] if data["expiredType"] > 0 else 0, sharesuk=data["sharesuk"], shareid=data["shareid"], pansuk=data["pansuk"], uk=data["uinfo"]['uk'], last_updated=datetime.datetime.utcnow() ) yield UserItem( uname=data["uinfo"]['uname'], avatar_url=data["uinfo"]['avatar_url'], uk=data["uinfo"]['uk'], third=data["uinfo"]['third'], relation_type=data["uinfo"]['relation_type'], last_updated=datetime.datetime.utcnow() )
def parse_data(self, response): try: data = json.loads(response.text) if data['errno'] != 0: logging.error("数据接口错误,errno: {}, url: {}", data["errno"], response.url) return for file in data['list']: yield FileItem( url=response.meta['shorturl'], fs_id=file["fs_id"], server_filename=file["server_filename"], size=int(file['size']), server_mtime=int(file["server_mtime"]), server_ctime=int(file["server_ctime"]), local_mtime=int(file["local_mtime"]), local_ctime=int(file["local_ctime"]), isdir=int(file["isdir"]), category=int(file["category"]), path=file["path"], md5=file["md5"], thumbs=file.get("thumbs"), ctime=response.meta['ctime'], expiredType=response.meta['expiredType'], expires=response.meta['ctime'] + response.meta['expiredType'] if response.meta['expiredType'] > 0 else 0, shareid=data["share_id"], uk=data["uk"], last_updated=datetime.datetime.utcnow() ) yield UserItem( uname=response.meta['share_username'], avatar_url=response.meta['share_photo'], uk=data["uk"], last_updated=datetime.datetime.utcnow() ) except: logging.error("数据解析错误 %s", response.url) traceback.print_exc()