forked from allanpk716/BiliBiliDownloader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PreProcess.py
120 lines (104 loc) · 5.06 KB
/
PreProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -*- coding: utf-8 -*-
import os
import operator as op
from VideoInfo import VideoInfo
from UperInfo import UperInfo
import asyncio
from Common import RandomSleep
from ruia_pyppeteer import PyppeteerRequest as Request
from Items import PageItem, BiliBiliItem, UserItem, VideoCountItem
class PreProcess():
def __init__(self, **kwargs):
super().__init__()
# --------------------------------------------------------------------------------------
# 日志
if 'logger' in kwargs:
self.logger = kwargs['logger']
# --------------------------------------------------------------------------------------
# 并凑出 UP 主的 Space Url
if 'uperList' in kwargs:
self.uperList = kwargs['uperList']
else:
info = "Error:You Need Set uperList"
self.logger.error(info)
raise Exception(info)
# --------------------------------------------------------------------------------------
self.logger.info("PreProcess Init Done.")
# 扫描本地已经下载的信息
def ScanLoclInfo(self, saveRootPath):
for fpath, dirnames, fnames in os.walk(saveRootPath):
# fpath 当前文件夹 root
# dirnames 当前文件夹中包含的子文件夹名称列表,不包含路径
# fnames 当前文件夹中的子文件列表,不包含路径
folderName = ''
if fpath:
folderName = os.path.split(fpath)[1]
nowUper = None
for fname in fnames:
if nowUper == None:
for uper in self.uperList:
if uper.UserName != folderName:
continue
else:
nowUper = uper
if nowUper == None:
continue
extensionName = os.path.splitext(fname)[-1]
if op.eq(extensionName, '.flv') == True or op.eq(extensionName, '.mp4') == True:
vi = VideoInfo('')
# 将本地已经下载的文件,去除后缀名
vi.loaclFileName = fname.replace(extensionName, '')
vi.isDownloaded = True
nowUper.VideoInfoDic_loaclFileName[vi.loaclFileName] = vi
self.logger.info('ScanLoclInfo Done.')
self.logger.info("ScanLoclInfo Result"+ "----" * 20)
for uper in self.uperList:
self.logger.info('Local ' + uper.UserName + ' Got ' + str(len(uper.VideoInfoDic_loaclFileName)) + " Videos.")
def ProcessOneUper(self, uper):
self.logger.info('Analysis ' + uper.UserName + ' MainVideoPage Start···')
pageInfo, videoCountInfo, userInfo = asyncio.get_event_loop().run_until_complete(self.GetPageInfo(uper.MainVideoPageUrl))
if pageInfo is None:
uper.PageCount = 0
else:
uper.PageCount = pageInfo.count
uper.NeedDownloadFilmCount = int(videoCountInfo.count)
self.logger.info('Analysis ' + uper.UserName + ' MainVideoPage Done.')
def Process(self):
# 找到每个 UP 主有的页数
for uper in self.uperList:
# 必须找到总页数
pgAllTime = 0
while (uper.PageCount <= 0):
if pgAllTime > 10:
raise Exception("Error:Try get " + uper.UserName + ' PageCount fail.')
self.ProcessOneUper(uper)
RandomSleep()
pgAllTime = pgAllTime + 1
self.logger.info('GetPageInfo Done.')
# --------------------------------------------------------------------------------------
# 把所有需要遍历的 page ,每一个 UP 主
# 开启循环,把所有页遍历一次
for uper in self.uperList:
for index in range(1, uper.PageCount + 1):
nowPageUrl = "https://space.bilibili.com/{0}/video?tid=0&page={1}&keyword=&order=pubdate".format(uper.UserId, index)
uper.PageList.append(nowPageUrl)
self.logger.info('PreProcess.Process Done.')
async def GetPageInfo(self, url):
try:
self.logger.info("Requesting " + url)
request = Request(url, load_js=True)
response = await request.fetch()
self.logger.info("fetched " + url)
self.logger.info("PageItem.get_item start")
pageInfo = await PageItem.get_item(html=response.html)
self.logger.info("PageItem.get_item end")
self.logger.info("VideoCountItem.get_item start")
VideoCountInfo = await VideoCountItem.get_item(html=response.html)
self.logger.info("VideoCountItem.get_item end")
self.logger.info("UserItem.get_item start")
userInfo = await UserItem.get_item(html=response.html)
self.logger.info("UserItem.get_item end")
except Exception as ex:
self.logger.error('GetPageInfo Error: ' + str(ex))
return None, None, None
return pageInfo, VideoCountInfo, userInfo