def listen_ss(self, p: str, time_str: str, interval_sec: int = 60): """ 此函数独立计算等待时间,不与BangumiController.py使用. 预定时间来滚动获取新番的最新弹幕. 在初始化时使用同一季的任意一集的url即可. 如果是已经发布的集数, time_str填写当前时间即可. 填写时间的目的是为了减少不必要的检查"目标集数是否可用"的次数, 减少被ban的概率. 对于未来的集数, 脚本会在到时间之后启动, 即使番剧推迟几分钟公开也不会报错. 在检测到相应的剧集可以观看时开始获取弹幕. 通过计算相邻两次获取的弹幕增量, 动态调整获取弹幕的时间间隔. :param p: 视频分p,即集数,未发布也可(只要在初始化时是处于同一个系列的就可以" :param time_str: 视频更新时间,格式为 "yyyy-mm-ddThh:mm",例如 "2020-01-02T03:04" :param interval_sec: 每次获取间隔的初始时间, 时间 > 10秒 :return: """ target_time = Converter.str_to_timestamp(time_str) interval_sec = max(interval_sec, 11) sec_wait = max(11, target_time - int(time.time())) print("wait:", sec_wait, "seconds") time.sleep(sec_wait - 10) # 循环监测视频是否可用 while True: url = "https://www.bilibili.com/bangumi/play/" + self.ssid response = Spider.get_html(url) ep_json = self.get_epinfo_in_html(response) new_series = ep_json['epList'] if len(new_series) >= int(p): print("符合条件开始获取") time.sleep(5) target_ep = new_series[int(p)-1]["id"] new_url = "https://www.bilibili.com/bangumi/play/ep" + str(target_ep) self._get_info_ep(new_url) break print("未找到相应剧集,等待", interval_sec, "秒") time.sleep(interval_sec) previous_danmu = None while True: content_bytes = Spider.get_current_danmu(self.cid, self.url) now = datetime.fromtimestamp(time.time(), timezone(timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S') print(now, "获取了弹幕") with open(self.fileName + '_latest_' + str(int(time.time())) + '.xml', 'wb') as f: f.write(content_bytes) danmu = DanmuFile.init_from_str(content_bytes.decode('utf-8')) if previous_danmu is not None: _, inc, _ = DanmuCombinator.diff(previous_danmu, danmu) ratio = len(inc) / int(danmu.max_limit) print("时间比例:", ratio, ) if ratio > 0.5: interval_sec = int(interval_sec / 5) print("时间间隔修改为:", interval_sec) if ratio < 0.3: interval_sec = min(int(interval_sec * 1.5), 1800) print("时间间隔修改为:", interval_sec) previous_danmu = danmu time.sleep(int(interval_sec))
def listen_ss_once(self): content_bytes = Spider.get_current_danmu(self.cid, self.url) if content_bytes is None: return -1 now = datetime.fromtimestamp(time.time(), timezone( timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S') print('[TASK]', now, "获取:[", self.title, "]") with open(self.fileName + '_latest_' + str(int(time.time())) + '.xml', 'wb') as f: f.write(content_bytes) danmu = DanmuFile.init_from_str(content_bytes.decode('utf-8')) ratio = -1 if self.danmu_set is not None: dep, inc, com = DanmuCombinator.diff(self.danmu_set, danmu) dep_int, inc_int, com_int = len(dep), len(inc), len(com) print("[TASK] 原有弹幕数[", dep_int + com_int, "], 新增弹幕数[", inc_int, end=' ], ') ratio = inc_int / int(danmu.max_limit) print("算得新增比例:[", format(ratio, '0.5f'), "]") else: print("[TASK]首次获取") self.danmu_set = danmu self.timeProgress = int(time.time()) return ratio
def _get_current_danmu(self): content_bytes = Spider.get_current_danmu(self.cid, self.url) if content_bytes is None: return False # 将要与历史弹幕整合的弹幕文件 with open(self.fileName+'.xml', 'wb') as f: f.write(content_bytes) # 当前弹幕池(上限数量)的弹幕 with open(self.fileName+'_latest.xml', 'wb') as f: f.write(content_bytes) return True
def _get_info_av(self, url: str): html = Spider.get_html(url) pattern = re.compile(r'"cid":(\d+),"page":%s' % self.page) pattern1 = re.compile(r'"title":"(.*?)","pubdate":(\d+)') self.cid = re.search(pattern, html).group(1) self.title, timeUnix_str = re.search(pattern1, html).groups() self.timeUnix = int(timeUnix_str) folder = "harvest/" + self.no + '_' + DanmuMaster.process_filename(self.title) + "/" if not os.path.exists(folder): os.mkdir(folder) file_name = DanmuMaster.process_filename(self.no + '_' + self.title + '_p' + self.page) self.fileName = folder + file_name
def _get_history_danmu(self, date: str): """ send history danmu request in specific date_str, return xml_str :param date: date string in 'YYYY-MM-DD' format :return: xml string in UTF-8 encoding """ content_bytes = Spider.get_history_danmu(self.cid, self.url, date, self.cookie_path) xml_str = content_bytes.decode('utf-8') with open(self.fileName + '_' + date + '.xml', 'wb') as f: f.write(content_bytes) print('data length', len(xml_str)) return xml_str
def check_ep_exist(self): response = Spider.get_html(self.url) if response is None: print("[WARNING] 未获取到:", self.title) return False ep_json = self.get_epinfo_in_html(response) ep_int = int(self.no[2:]) new_series = ep_json['epList'] for ep in new_series: if ep['id'] == ep_int: self.init_from_ep_json(ep_json, ep_int, self.cookie_path) print("[TASK] 新剧集:", ep_json['h1Title'], "已放出") return True return False
def _get_info_ep( self, url: str, ): html = None none_times = 0 while html is None and none_times < 5: html = Spider.get_html(url) none_times += 1 if html is None: print("多次请求番剧信息失败") exit(1) ep_json = self.get_epinfo_in_html(html) self._resolve_ep_json(ep_json)
def _get_current_danmu(self): content_bytes = None none_count = 0 while content_bytes is None and none_count < 5: content_bytes = Spider.get_history_danmu(self.cid, self.url, date, self.cookie_path) none_count += 1 if content_bytes is None: print("多次请求失败.") return False # 将要与历史弹幕整合的弹幕文件 with open(self.fileName + '.xml', 'wb') as f: f.write(content_bytes) # 当前弹幕池(上限数量)的弹幕 with open(self.fileName + '_latest.xml', 'wb') as f: f.write(content_bytes) return True
def _get_info_ep(self, url: str,): html = Spider.get_html(url) ep_json = self.get_epinfo_in_html(html) self._resolve_ep_json(ep_json)
def _get_history_month(self, month: str): content_bytes = Spider.get_history_month(self.cid, self.url, month, self.cookie_path) json_str = content_bytes.decode('utf-8') return json_str