Exemplo n.º 1
0
    def parse_data_nokey(self, response):
        """
        解析并保存无提取码类型的第一级目录/文件
        :param response:
        :return:
        """
        try:
            data = json.loads(response.text)
            if data['errno'] != 0:
                logger.error('parse no-key data fail, url:{}, errno:{}'.format(
                    response.url, str(data['errno'])))
                return

            for file in data['list']:
                yield FileItem(url=response.meta['shorturl'],
                               pwd=None,
                               expiredtype=response.meta['expiredtype'],
                               fs_id=file['fs_id'],
                               parent_id=0,
                               size=file['size'],
                               isdir=int(file['isdir']),
                               local_ctime=file['local_ctime'],
                               local_mtime=file['local_mtime'],
                               md5=file['md5'],
                               path=file['path'],
                               server_ctime=file['server_ctime'],
                               server_filename=file['server_filename'],
                               share_id=data['share_id'],
                               uk=data['uk'])

                if int(file['isdir']) == 1:
                    url = 'https://pan.baidu.com/share/list?uk={}&shareid={}&order=other&desc=1&showempty=0&web=1&' \
                            'dir=/sharelink{}-{}/{}&channel=chunlei&web=1&app_id=250528'.format(data['uk'], data['share_id'],
                                data['uk'], file['fs_id'], file['server_filename'])
                    meta = {
                        'uk': data['uk'],
                        'share_id': data['share_id'],
                        'fs_id': file['fs_id'],
                        'parent_id': file['fs_id'],
                        'filepath': file['server_filename']
                    }
                    yield Request(url=url,
                                  dont_filter=True,
                                  callback=self.parse_dir,
                                  meta=meta)

            yield UserItem(url=response.meta['shorturl'],
                           pwd=None,
                           share_username=response.meta['share_username'],
                           share_photo=response.meta['share_photo'],
                           ctime=response.meta['ctime'])
            logger.info(
                'parse no-key first data succ, url:{}, key:{}, share_id:{}, uk:{}'
                .format(response.url, self.pwd, data['share_id'], data['uk']))
        except Exception as e:
            logger.error(
                'parse no-key first data fail: exception, url:{}, err_msg:{}'.
                format(response.url, e))
Exemplo n.º 2
0
 def parse_dir(self, response):
     """
     解析目录
     :param response:
     :return:
     """
     try:
         data = json.loads(response.text)
         if data['errno'] != 0:
             logger.error('parse dir data fail, url: %s, errno:%s' %
                          (response.url, str(data['errno'])))
         for file in data['list']:
             yield FileItem(url=None,
                            pwd=None,
                            expiredtype=None,
                            fs_id=file['fs_id'],
                            parent_id=response.meta['parent_id'],
                            size=file['size'],
                            isdir=int(file['isdir']),
                            local_ctime=file['local_ctime'],
                            local_mtime=file['local_mtime'],
                            md5=file['md5'] if 'md5' in file else None,
                            path=file['path'],
                            server_ctime=file['server_ctime'],
                            server_filename=file['server_filename'],
                            share_id=None,
                            uk=None)
             logger.info('parse data succ, fs_id:{}, parent_id:{}'.format(
                 file['fs_id'], response.meta['parent_id']))
             if int(file['isdir']) == 1:
                 url = 'https://pan.baidu.com/share/list?uk={}&shareid={}&order=other&desc=1&showempty=0&web=1&' \
                       'dir=/sharelink{}-{}/{}&channel=chunlei&web=1&app_id=250528'.format(response.meta['uk'], response.meta['share_id'],
                             response.meta['uk'], response.meta['fs_id'], response.meta['filepath'] + '/' + file['server_filename'])
                 meta = {
                     'uk':
                     response.meta['uk'],
                     'share_id':
                     response.meta['share_id'],
                     'fs_id':
                     response.meta['fs_id'],
                     'parent_id':
                     file['fs_id'],
                     'filepath':
                     response.meta['filepath'] + '/' +
                     file['server_filename']
                 }
                 yield Request(url=url,
                               cookies=self.cookies,
                               dont_filter=True,
                               callback=self.parse_dir,
                               meta=meta)
     except Exception as e:
         logger.error(
             'parse dir data fail: exception, url:{}, err_msg:{}'.format(
                 response.url, e))
Exemplo n.º 3
0
    def parse_data(self, response):
        pattern = r'window.yunData = ([\s\S]*?});'
        data = json.loads(re.search(pattern, response.text).group(1))
        files = data.get("file_list", [])
        if len(files) < 1:
            logging.error("len(files) < 1 %s", response.url)
            return

        for file in files:
            yield FileItem(
                url=response.url,
                fs_id=file["fs_id"],
                server_filename=file["server_filename"],
                size=int(file['size']),
                server_mtime=int(file["server_mtime"]),
                server_ctime=int(file["server_ctime"]),
                local_mtime=int(file["local_mtime"]),
                local_ctime=int(file["local_ctime"]),
                isdir=int(file["isdir"]),
                isdelete=int(file["isdelete"]),
                status=int(file["status"]),
                category=int(file["category"]),
                share=int(file["category"]),
                path_md5=file["path_md5"],
                path=file["path"],
                parent_path=unquote(file["parent_path"]),
                md5=file["md5"],
                thumbs=file.get("thumbs"),
                dCnt=int(data["dCnt"]),
                ctime=int(data["ctime"]),
                expiredType=data["expiredType"],
                expires=int(data["ctime"]) + data["expiredType"] if data["expiredType"] > 0 else 0,
                sharesuk=data["sharesuk"],
                shareid=data["shareid"],
                pansuk=data["pansuk"],
                uk=data["uinfo"]['uk'],
                last_updated=datetime.datetime.utcnow()
            )

        yield UserItem(
            uname=data["uinfo"]['uname'],
            avatar_url=data["uinfo"]['avatar_url'],
            uk=data["uinfo"]['uk'],
            third=data["uinfo"]['third'],
            relation_type=data["uinfo"]['relation_type'],
            last_updated=datetime.datetime.utcnow()
        )
Exemplo n.º 4
0
    def parse_data(self, response):
        try:
            data = json.loads(response.text)
            if data['errno'] != 0:
                logging.error("数据接口错误,errno: {}, url: {}", data["errno"], response.url)
                return

            for file in data['list']:
                yield FileItem(
                    url=response.meta['shorturl'],
                    fs_id=file["fs_id"],
                    server_filename=file["server_filename"],
                    size=int(file['size']),
                    server_mtime=int(file["server_mtime"]),
                    server_ctime=int(file["server_ctime"]),
                    local_mtime=int(file["local_mtime"]),
                    local_ctime=int(file["local_ctime"]),
                    isdir=int(file["isdir"]),
                    category=int(file["category"]),
                    path=file["path"],
                    md5=file["md5"],
                    thumbs=file.get("thumbs"),
                    ctime=response.meta['ctime'],
                    expiredType=response.meta['expiredType'],
                    expires=response.meta['ctime'] + response.meta['expiredType'] if response.meta['expiredType'] > 0 else 0,
                    shareid=data["share_id"],
                    uk=data["uk"],
                    last_updated=datetime.datetime.utcnow()
                )

            yield UserItem(
                uname=response.meta['share_username'],
                avatar_url=response.meta['share_photo'],
                uk=data["uk"],
                last_updated=datetime.datetime.utcnow()
            )
        except:
            logging.error("数据解析错误 %s", response.url)
            traceback.print_exc()