示例#1
0
def sendPhoto(url, item):
    result = Result()
    src = findSrc(item)
    if not src:
        return
    result.imgs = [src]
    result.cap = item.find('span', itemprop='caption').text
    album_sender.send(channel, url, result)
示例#2
0
def getAlbum(url,
             force_cache=True,
             word_limit=200,
             paragraph_limit=3,
             append_source=False,
             append_url=True):
    content = _getArticle(url, force_cache=force_cache).text
    album = AlbumResult()
    for item in content.findAll('img'):
        path = item.get('src')
        if not path:
            continue
        try:
            cached_url.get(path, mode='b', force_cache=True)
            img = Image.open(cached_url.getFilePath(path))
        except:
            continue
        w, h = img.size
        file_size = os.stat(cached_url.getFilePath(path)).st_size
        if 36000 < file_size < 36200 and w == 1080 and h == 1080:  # 界面文化题头
            continue
        if 27000 < file_size < 27300 and w == 640 and h == 640:  # 思想市场
            continue
        if w == 750 and h == 234:  # 界面文化题头
            continue
        if 6000 < file_size < 9000 and w == 347 and h == 347:  # 界面文化题头
            continue
        if 87000 < file_size < 91000 and w == 900 and h == 500:  # 美国华人杂谈题头
            continue
        if 53000 < file_size < 56000 and w == 795 and h == 504:  # 微信foot
            continue
        if 57000 < file_size < 61000 and w == 1011 and h == 282:  # 短史记题头
            continue
        if w * 0.25 < h < w * 4 and min(w, h) > 100 and max(w, h) > 300:
            # print(file_size, w, h)
            album.imgs.append(item.get('src'))
            break
    for tag in ['img', 'br']:
        for item in content.findAll(tag):
            item.replace_with('\n\n')
    for item in content.findAll('p'):
        item.append('\n\n')
    title = '【%s】\n\n' % getTitle(url)
    lines = content.text.split('\n')
    lines = [line.strip() for line in lines]
    lines = [line for line in lines if isGoodLine(line)]
    if paragraph_limit < 5:
        lines = [line for line in lines if not line or len(line) > 20]
    lines = cutCaptionHtml('\n'.join(lines),
                           word_limit).strip().strip('\ufeff').strip()
    lines = lines.split('\n')
    lines = lines[:paragraph_limit * 2]
    album.cap_html_v2 = title + '\n'.join(lines).strip()
    if append_url:
        album.cap_html_v2 += '\n\n' + url
    if append_source:
        album.url = url
    return album
示例#3
0
def get(url):
    r = Result()
    r.url = url
    content = cached_url.get(url, force_cache=True)
    soup = BeautifulSoup(content, 'html.parser')
    r.title = soup.find('meta', {'property': 'og:title'})['content']
    r.cap_html = r.title
    r.imgs = list(getImages(content))
    return r
示例#4
0
def get(path, force_cache=False, content=None):
    path = path.replace('m.douban.com', 'www.douban.com')
    content = content or getContent(path, force_cache=force_cache)
    b = BeautifulSoup(str(content), features='html.parser')
    result = Result()
    result.imgs = getImgs(b, path)
    result.cap = getCap(b, path)
    result.video = getVideo(b)
    result.url = path
    return result
示例#5
0
def get(path):
    parts = path.split('/')
    channel = parts[3]
    post_id = int(parts[4])
    post = webgram.getPost(channel, post_id)
    result = Result()
    result.url = path
    result.cap_html = getCap(post)
    result.imgs = getImgs(post)
    result.video = post.getVideo()
    return result
示例#6
0
def cut(update, context):
    msg = update.effective_message
    if msg.chat_id == debug_group.id or msg.media_group_id:
        return

    file = msg.document or (msg.photo and msg.photo[-1])
    file_path = (file and file.get_file().file_path) or msg.text or ''
    if not file_path.startswith('http'):
        return

    result = AlbumResult()
    result.cap = msg.caption_markdown or msg.text_markdown or ''
    result.imgs = [file_path]

    album_sender.send_v2(msg.chat, result, send_all=True, size_factor=2.1)
示例#7
0
def get(path, json=None):
    wid = getWid(path)
    r = Result()
    if not json:
        try:
            json = yaml.load(cached_url.get(prefix + wid),
                             Loader=yaml.FullLoader)
            json = json['data']
        except:
            return r
    if 'test' in sys.argv:
        with open('tmp/%s.json' % wid, 'w') as f:
            f.write(str(json))
    r.imgs = getImages(json) or getImages(json.get('retweeted_status', {}))
    r.cap_html = json['text']
    r.title = json.get('status_title')
    r.cap_html_v2 = getCap(json)
    r.video = getVideo(json) or getVideo(json.get('retweeted_status', {}))
    r.wid = json.get('id')
    r.rwid = json.get('retweeted_status', {}).get('id', '')
    r.hash = getHash(json)
    r.url = path
    return r
示例#8
0
def testPicBot():
    result = Result()
    result.cap_html = 'test'
    print(result)
    r = album_sender.send_v2(chat, result, send_all=True, time_sleep=5)
    print(r)