def __init__(self, mode=0): self.mode = mode # (0: training, 1: testing, 2: real world implementation) self.MLP = 0 # multi-layer-perceptron self.RNN = 1 # recurrent-neural-network self.CNN = 2 # convolution-neural-network self.training = 0 # training phase self.testing = 1 # testing phase self.real_world = 2 # real world implementation self.paths = None self.number_of_samples = None self.number_of_classes = None self.image_width = None self.image_height = None self.image_type = None self.video_path = None self.video_scale = None self.video_overlap_size = None self.video_patch_size = None self.Initial() self.Data = Dataset(self.paths, self.number_of_samples, self.number_of_classes, self.image_width, self.image_height, self.image_type) self.VidData = VideoData(self.video_path, self.video_patch_size, self.video_overlap_size)
def get_video_list(self, limit=0): max_loops = 50 max_results = 50 if limit: max_loops = math.floor(limit / 50) max_results = limit % 50 end_of_videos = False videos = [] timestamp = str(datetime.utcnow().replace(microsecond=0).isoformat()) \ + "Z" while not end_of_videos: result_limit = max_results if not max_loops else 50 result = self.service.search().list( part="snippet", channelId=self.channel_id, maxResults=result_limit, order="date", type='video', publishedBefore=timestamp).execute() for video in result.get("items", []): videos.append(VideoData()) videos[-1].id = video.get("id").get("videoId") snippet = video.get("snippet", []) videos[-1].title = snippet.get("title") videos[-1].published = snippet.get("publishedAt") # Loop End max_loops = max_loops - 1 if len(result.get("items", [])) < max_results or max_loops < 0: end_of_videos = True else: timestamp = videos[-1].published datestamp = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") datestamp = datestamp - timedelta(0, 1) timestamp = str(datestamp.replace(microsecond=0).isoformat()) \ + "Z" return videos
def characterArt_demo(): video_url = 'https://www.bilibili.com/video/av79839682' video = VideoData(video_url) print('正在获取弹幕') video.getDanmaku() print('正在获取评论') video.getVideoComment() video_title = video.video_info['title'] # video_title = '「鬼灭之刃」温柔到让人哭泣的心——心碎者的柔歌' video_path = 'C:\\Users\\William\\Desktop\\bili\\videos_data\\{}'.format( video_title) os.chdir(video_path) img_file = 'cover.jpg' danmaku_file = 'danmaku.txt' replies_file = 'replies.txt' print('正在生成字符画') characterart.createCharacterArt(img_file, danmaku_file, replies_file) print('已成功保存字符画')
from videodata import VideoData video_url = 'https://www.bilibili.com/video/av80565276' # 指定需要爬取的视频url video = VideoData(video_url) # 初始化视频对象,生成对应的文件夹并获取基本信息与封面 video.getDanmaku() # 获取视频的弹幕 video.getVideoComment() # 获取视频的评论 video.getVideoTags() # 获取视频标签 video.getVideoData() # 获取弹幕评论和标签 import bilibilidata # getRank(url=None, rank_type='all') bilibilidata.getRank() # 默认获取全站排行榜 # 指定rank_type参数:all、origin、bangumi、cinema、rookie bilibilidata.getRank(rank_type='bangumi') # 指定排行榜url rank_url = 'https://www.bilibili.com/ranking/cinema/23/0/3' bilibilidata.getRank(url=rank_url)
def tagsWordcloud_demo(): bilibilidata.getRank() os.chdir(r'C:\Users\William\Desktop\bili\bilibili_data') rank_data = pd.read_csv('all.csv') print('成功获取排行榜数据') rank_url = rank_data.url rank_title = rank_data.title url_head = r'https:' for i in range(len(rank_url)): rank_url[i] = url_head + rank_url[i] # 爬取排行榜每一个视频的基本信息 print('正在爬取排行榜每一个视频的基本信息') process_num = 1 for each in rank_url: try: os.chdir(r'C:\Users\William\Desktop\bili') video = VideoData(each) video.getVideoTags() print(process_num) process_num += 1 except: process_num += 1 # 读取每一个视频的标签 tags = list() os.chdir(r'C:\Users\William\Desktop\bili') for each in rank_title: try: os.chdir( r'C:\Users\William\Desktop\bili\videos_data\{}'.format(each)) with open('tags.txt', encoding='utf-8') as file: tags.append(file.readlines()) except Exception as exc: pass tags_txt = str() for each_video_tags in tags: for each_tag in each_video_tags: tags_txt += each_tag tags_txt = tags_txt.replace('\n', ' ') # 加载停用词 path = 'C:\\Users\\William\\Desktop\\bili\\百度停用词表.txt' with open(path, encoding='utf-8') as file: file_content = file.readlines() stopwords = set() for i in range(len(file_content)): file_content[i] = file_content[i].replace("\n", "") stopwords.add(file_content[i]) # 生成视频标签词云 wc = WordCloud(background_color='white', font_path=font_path, stopwords=stopwords, width=1920, height=1080, scale=2) wc.generate(tags_txt) plt.imshow(wc) plt.axis('off') os.chdir('C:\\Users\\William\\Desktop\\bili') plt.savefig('./排行榜标签词云.png')
def getDanmaku_demo(): video_url = 'https://www.bilibili.com/video/av78273046' video3 = VideoData(video_url) video3.getDanmaku()
def getVideoComment_demo(): video_url = 'https://www.bilibili.com/bangumi/play/ep285946' video2 = VideoData(video_url) video2.getVideoComment()
def getVideoInfo_demo(): video_url = 'https://www.bilibili.com/video/av68733672' video1 = VideoData(video_url)