def sendPhoto(url, item): result = Result() src = findSrc(item) if not src: return result.imgs = [src] result.cap = item.find('span', itemprop='caption').text album_sender.send(channel, url, result)
def getAlbum(url, force_cache=True, word_limit=200, paragraph_limit=3, append_source=False, append_url=True): content = _getArticle(url, force_cache=force_cache).text album = AlbumResult() for item in content.findAll('img'): path = item.get('src') if not path: continue try: cached_url.get(path, mode='b', force_cache=True) img = Image.open(cached_url.getFilePath(path)) except: continue w, h = img.size file_size = os.stat(cached_url.getFilePath(path)).st_size if 36000 < file_size < 36200 and w == 1080 and h == 1080: # 界面文化题头 continue if 27000 < file_size < 27300 and w == 640 and h == 640: # 思想市场 continue if w == 750 and h == 234: # 界面文化题头 continue if 6000 < file_size < 9000 and w == 347 and h == 347: # 界面文化题头 continue if 87000 < file_size < 91000 and w == 900 and h == 500: # 美国华人杂谈题头 continue if 53000 < file_size < 56000 and w == 795 and h == 504: # 微信foot continue if 57000 < file_size < 61000 and w == 1011 and h == 282: # 短史记题头 continue if w * 0.25 < h < w * 4 and min(w, h) > 100 and max(w, h) > 300: # print(file_size, w, h) album.imgs.append(item.get('src')) break for tag in ['img', 'br']: for item in content.findAll(tag): item.replace_with('\n\n') for item in content.findAll('p'): item.append('\n\n') title = '【%s】\n\n' % getTitle(url) lines = content.text.split('\n') lines = [line.strip() for line in lines] lines = [line for line in lines if isGoodLine(line)] if paragraph_limit < 5: lines = [line for line in lines if not line or len(line) > 20] lines = cutCaptionHtml('\n'.join(lines), word_limit).strip().strip('\ufeff').strip() lines = lines.split('\n') lines = lines[:paragraph_limit * 2] album.cap_html_v2 = title + '\n'.join(lines).strip() if append_url: album.cap_html_v2 += '\n\n' + url if append_source: album.url = url return album
def get(url): r = Result() r.url = url content = cached_url.get(url, force_cache=True) soup = BeautifulSoup(content, 'html.parser') r.title = soup.find('meta', {'property': 'og:title'})['content'] r.cap_html = r.title r.imgs = list(getImages(content)) return r
def get(path, force_cache=False, content=None): path = path.replace('m.douban.com', 'www.douban.com') content = content or getContent(path, force_cache=force_cache) b = BeautifulSoup(str(content), features='html.parser') result = Result() result.imgs = getImgs(b, path) result.cap = getCap(b, path) result.video = getVideo(b) result.url = path return result
def get(path): parts = path.split('/') channel = parts[3] post_id = int(parts[4]) post = webgram.getPost(channel, post_id) result = Result() result.url = path result.cap_html = getCap(post) result.imgs = getImgs(post) result.video = post.getVideo() return result
def cut(update, context): msg = update.effective_message if msg.chat_id == debug_group.id or msg.media_group_id: return file = msg.document or (msg.photo and msg.photo[-1]) file_path = (file and file.get_file().file_path) or msg.text or '' if not file_path.startswith('http'): return result = AlbumResult() result.cap = msg.caption_markdown or msg.text_markdown or '' result.imgs = [file_path] album_sender.send_v2(msg.chat, result, send_all=True, size_factor=2.1)
def get(path, json=None): wid = getWid(path) r = Result() if not json: try: json = yaml.load(cached_url.get(prefix + wid), Loader=yaml.FullLoader) json = json['data'] except: return r if 'test' in sys.argv: with open('tmp/%s.json' % wid, 'w') as f: f.write(str(json)) r.imgs = getImages(json) or getImages(json.get('retweeted_status', {})) r.cap_html = json['text'] r.title = json.get('status_title') r.cap_html_v2 = getCap(json) r.video = getVideo(json) or getVideo(json.get('retweeted_status', {})) r.wid = json.get('id') r.rwid = json.get('retweeted_status', {}).get('id', '') r.hash = getHash(json) r.url = path return r
def testPicBot(): result = Result() result.cap_html = 'test' print(result) r = album_sender.send_v2(chat, result, send_all=True, time_sleep=5) print(r)