def getUpdate(self): url = 'https://www.pixiv.net/touch/ajax_api/ajax_api.php?mode=ranking&mode_rank=daily&date=&content=illust&p=1' referer = __config__.referer['dailyIllust'] isUpdate = False try: res = requests.get(url, proxies=__config__.proxies, headers={ 'user-agent': getUserAget(), 'referer': referer }, timeout=10) res.encoding = 'utf-8' if res.status_code == 200: resJson = json.loads(res.text) if len(resJson): data = resJson[0] isUpdate = data['date'] == getBeforeDate(1) pass else: raise Exception('res_code: ' + str(res.status_code)) pass except requests.exceptions.RequestException as e: writeErrorLog('getUpdate: 请求出错') except Exception as e: writeErrorLog('getUpdate: 其他出错, ' + traceback.format_exc()) pass return isUpdate
def requestsUser(self, userId): url = 'https://www.pixiv.net/touch/ajax/user/details?id=' + userId try: res = requests.get(url, proxies=__config__.proxies, headers={ 'user-agent': getUserAget(), 'referer': url }) res.encoding = 'utf-8' if res.status_code == 200: resJson = json.loads(res.text) if 'user_details' in resJson: userModel = pixivUser(resJson['user_details']) userModel.downloadHeadImg() userModel.insertDb() pass else: raise Exception('res_code: ' + str(res.status_code)) pass except requests.exceptions.RequestException as e: writeErrorLog('requestsUser: 连接url: ' + url + ' 出错, 20s后重试') self.time = self.time + 1 if self.time < 5: time.sleep(random.randint(5, 10)) self.requestsUser(userId) else: self.time = 0 pass except Exception as e: writeErrorLog('requestsUser: '******'url, ' + traceback.format_exc()) pass
def downloadHeadImg(self): try: res = requests.get(self.headimg, proxies=__config__.proxies, headers={ 'referer': 'https://www.pixiv.net/member.php?id=' + str(self.modelData['id']) }) if res.status_code == 200: fileName = self.modelData['id'] + '_head' + os.path.splitext( self.headimg)[1] filePath = __config__.download['baseHeadPath'] fullPath = os.path.join(filePath, fileName) if os.path.exists(filePath) == False: os.makedirs(filePath) pass fp = open(fullPath, 'wb') fp.write(res.content) fp.close() writeErrorLog('当前头像: ' + str(self.modelData['id']) + '下载成功') else: raise Exception('res_code: ' + str(res.status_code)) pass except requests.exceptions.RequestException as e: raise Exception('头像请求失败') except Exception as e: error = open('errorHeadimg.txt', 'a', encoding='utf-8') error.write(self.modelData['id'] + ' ' + self.headimg + '\n') writeErrorLog('当前头像: ' + str(self.modelData['id']) + '下载失败, ' + traceback.format_exc()) pass
def insertDb(self): try: items = self.modelData.items() key = [i[0] for i in items] item = ["'" + re.sub(r"'", "''", str(i[1])) + "'" for i in items] dbCursor.execute("INSERT INTO ranking(" + ','.join(key) + ") VALUES (" + ','.join(item) + ")") except Exception as e: error = open('errorRanking.txt', 'a', encoding='utf-8') error.write(json.dumps(self.modelData) + '\n') writeErrorLog('当前排行: 写入数据库失败, ' + json.dumps(self.modelData) + ', ' + traceback.format_exc()) pass
def insertDb(self): try: items = self.modelData.items() key = [i[0] for i in items] item = ["'" + re.sub(r"'", "''", str(i[1])) + "'" for i in items] dbCursor.execute("INSERT INTO illust(" + ','.join(key) + ") VALUES (" + ','.join(item) + ")") writeErrorLog('当前作品: ' + str(self.modelData['id']) + '已写入数据库') except Exception as e: error = open('errorArticle.txt', 'a', encoding='utf-8') error.write(self.modelData['id'] + '\n') writeErrorLog('当前作品: ' + str(self.modelData['id']) + '写入数据库失败, ' + traceback.format_exc()) pass
def requestsArticle(self, url, articleId): referer = 'https://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + str( articleId) try: res = requests.get(url, proxies=__config__.proxies, headers={ 'user-agent': getUserAget(), 'referer': referer, 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8' }) res.encoding = 'utf-8' if res.status_code == 200: resJson = json.loads(res.text) if 'body' in resJson: articleModel = pixivArticle( resJson['body']['illust_details']) articleModel.downloadImg() articleModel.insertDb() pass else: raise Exception('res_code: ' + str(res.status_code)) pass except requests.exceptions.RequestException as e: writeErrorLog('requestsArticle: 连接url: ' + url + ' 出错, 20s后重试') self.time = self.time + 1 if self.time < 5: time.sleep(random.randint(5, 10)) self.requestsArticle(url, articleId) else: self.time = 0 pass except Exception as e: writeErrorLog('requestsArticle:' + 'url, ' + traceback.format_exc()) pass
def downloadImg(self, url, id, fileNameWithExt): headers = { 'referer': 'https://www.pixiv.net/member_illust.php?mode=big&illust_id=' + str(id), 'user-agent': getUserAget() } try: res = requests.get(url, proxies=__config__.proxies, headers=headers) if res.status_code == 200: filePath = os.path.join(__config__.download['basePath'], str(id)) if os.path.exists(filePath) == False: os.makedirs(filePath) pass fullPath = os.path.join(filePath, fileNameWithExt) fp = open(fullPath, 'wb') fp.write(res.content) fp.close() self.getThumbnailAndMiddleImg(fullPath, fileNameWithExt) writeErrorLog('当前作品: ' + str(id) + '下载成功' + ', url: ' + url) pass except requests.exceptions.RequestException as e: if self.__retry_times < 5: time.sleep(20) self.__retry_times = self.__retry_times + 1 self.downloadImg(url, id) else: error = open('errorDownload.txt', 'a', encoding='utf-8') error.write(str(id) + '\n') writeErrorLog('当前作品: ' + str(id) + '下载失败,已写入errorDownload.txt' + ', url: ' + url) pass except Exception as e: error = open('errorDownload.txt', 'a', encoding='utf-8') error.write(str(id) + '\n') writeErrorLog('当前作品: ' + str(id) + '下载失败,已写入errorDownload.txt' + ', url: ' + url + ', ' + traceback.format_exc()) pass
def requestsRankUrl(self, mode, content, date, page): baseUrl = 'https://www.pixiv.net/touch/ajax_api/ajax_api.php' referer = __config__.referer['dailyIllust'] if page > 1: referer = referer + '&p=' + str(page - 1) pass fullUrl = baseUrl + '?' + qsStringify({ 'mode': 'ranking', 'mode_rank': mode, 'content_rank': content, 'p': page, 'date': date }) try: writeErrorLog('mode: ' + mode + ' page: ' + str(page)) res = requests.get(fullUrl, proxies=__config__.proxies, headers={ 'user-agent': getUserAget(), 'referer': referer }, timeout=10) res.encoding = 'utf-8' if res.status_code == 200: resJson = json.loads(res.text) for item in resJson: if item.get('user_id') and item.get('illust_id'): rankModel = pixivRank(item, mode, content) rankModel.insertDb() time.sleep(0.5) dbCursor.execute("SELECT * FROM user WHERE id = " + item['user_id']) if not dbCursor.fetchall(): self.requestsUser(item['user_id']) time.sleep(random.randint(5, 10)) else: writeErrorLog('当前用户: ' + str(item['user_id']) + '已存在') time.sleep(0.5) pass dbCursor.execute("SELECT * FROM illust WHERE id = " + item['illust_id']) if not dbCursor.fetchall(): artRefer = 'https://www.pixiv.net/ranking.php?mode=' + mode + '&content=' + content + '&p=' + str( page) artUrl = 'https://www.pixiv.net/touch/ajax/illust/details?illust_id=' + str( item['illust_id']) + '&ref=' + artRefer self.requestsArticle(artUrl, item['illust_id']) time.sleep(random.randint(5, 10)) else: writeErrorLog('当前作品: ' + str(item['illust_id']) + '已存在') time.sleep(0.5) pass pass pass else: raise Exception('res_code: ' + str(res.status_code)) pass except requests.exceptions.RequestException as e: writeErrorLog('requestsRankUrl: 连接url: ' + fullUrl + ' 出错, 20s后重试') self.time = self.time + 1 if self.time < 5: time.sleep(random.randint(5, 10)) self.requestsRankUrl(mode, content, date, page) else: self.time = 0 pass except Exception as e: writeErrorLog('requestsRankUrl: ' + 'fullUrl, ' + traceback.format_exc()) pass
from pixivUtils import writeErrorLog, getBeforeDate from pixivConfig import pixivConfig import time import configparser url = pixivUrl() isUpdate = False while isUpdate == False: isUpdate = url.getUpdate() if not isUpdate: time.sleep(60) pass pass writeErrorLog('排行已更新,开始下载') # 更新排行日期 def updateRankDate(): __config__ = pixivConfig() conf = configparser.ConfigParser() conf.add_section('date') conf.set('date', 'update', getBeforeDate(1)) # 写入文件 with open(__config__.rank['updatePath'], 'r+') as conffile: conf.write(conffile) pass