def getUrl(item, post_link): note = item.find('div', class_='note-block') if (note and note.get('data-url')) or matchKey(post_link, ['https://book.douban.com/review/', 'https://www.douban.com/note/']): note = (note and note.get('data-url')) or post_link return export_to_telegraph.export(note, force=True) or note url_block = item.find('div', class_='url-block') if url_block: url = url_block.find('a')['href'] return clearUrl(export_to_telegraph.export(url) or url)
def processNote(note, channels): if not db.existing.add(note): return note = export_to_telegraph.export(note, force=True) or note for channel in channels: time.sleep(5) channel.send_message(note)
def sendLink(site, link, fixed_channel = None): simplified = None telegraph = None album_result = None additional_info = getAdditionalInfo(site) channels = list(db.sub.channels(site, tele.bot)) message = link success = True for channel, config in channels: if fixed_channel and channel.id != fixed_channel: continue blocked_keys = getMatchedKey( link + export_to_telegraph.getTitle(link) + export_to_telegraph.getTitle(link, toSimplified=True), blocklist.get(channel.id, [])) if blocked_keys: message += ' filtered: ' + ' '.join(blocked_keys) continue if not album_result and '.douban.' in link and '/note/' not in link: album_result = web_2_album.get(link, force_cache = True) if album_result.imgs: album_result.cap = cutCaptionHtml(album_result.cap, 800) else: album_result.cap = cutCaptionHtml(album_result.cap, 2000) if not album_result and 'to_album' in config: album_result = export_to_telegraph.getAlbum(link) if not simplified and 'to_simplify' in config: simplified = export_to_telegraph.export(link, force_cache = True, force=True, toSimplified=True) or link if not telegraph and not album_result and 'to_telegraph' in config: telegraph = export_to_telegraph.export(link, force_cache = True, force=True) or link message = link if 'to_simplify' in config: message = simplified if 'to_telegraph' in config: message = telegraph try: if album_result: album_sender.send_v2(channel, album_result) else: channel.send_message(message + additional_info, parse_mode='HTML') except Exception as e: print(e) success = False debug_group.send_message('send fail: %s %d %s' % (link, channel.id, e)) log(message or link, site, [item[0] for item in channels]) return success
def getTelegraph(msg, url): source_id, _, _ = getSource(msg) if source_id not in TELEGRAPH_TOKENS: msgTelegraphToken(msg) export_to_telegraph.token = TELEGRAPH_TOKENS[source_id] return export_to_telegraph.export(url, throw_exception = True, force = True, toSimplified = 'bot_simplify' in msg.text, noSourceLink = str(msg.chat_id) in no_source_link._db.items)
def getTelegraph(msg, url): source_id, _, _ = getSource(msg) if source_id not in telegraph_tokens: msgTelegraphToken(msg) export_to_telegraph.token = telegraph_tokens[source_id] return export_to_telegraph.export( url, throw_exception=True, force=True, toSimplified=('bot_simplify' in msg.text or msg.text.endswith(' s')), noSourceLink=str(msg.chat_id) in no_source_link._db.items)
def adhoc(): female_channel = tele.bot.get_chat(-1001162153695) with open('所有链接.txt') as f: for link in f.readlines(): try: r = export_to_telegraph.export(link.strip(), force=True, throw_exception=True) female_channel.send_message(r) except Exception as e: print(e) traceback.print_tb(e)
def export(): for s in source: for link, _ in link_extractor.getLinks(s): if link in existing: continue r = export_to_telegraph.export(link, force=True, toSimplified=True, throw_exception=True) taiwan_channel.send_message(r) add(link) break # only add one for each source
def sendLink(site, link, fixed_channel=None): simplified = None telegraph = None album_result = None sent = False for channel, config in db.sub.channels(site, tele.bot): if fixed_channel and channel.id != fixed_channel: continue if not simplified and 'to_simplify' in config: simplified = export_to_telegraph.export( link, force_cache=True, force=True, toSimplified=True) or link if '.douban.' in link and '/note/' not in link: album_result = web_2_album.get(link, force_cache=True) if not telegraph and not album_result and 'to_telegraph' in config: telegraph = export_to_telegraph.export( link, force_cache=True, force=True) or link message = link if 'to_simplify' in config: message = simplified if 'to_telegraph' in config: message = telegraph result = [1] * 10 try: if album_result: result = album_sender.send_v2(channel, album_result) else: result = [channel.send_message(message)] except Exception as e: print(e) debug_group.send_message('send fail: ' + str(channel.id) + ' ' + str(e)) finally: if sent: post_len = len(result) time.sleep((post_len**2) / 2 + post_len * 10) sent = True
def formatContent(content, url_info): for url in url_info: if url not in content: continue real_url = url_info[url] if 'photo' in real_url.split('/'): content = content.replace(url, '') continue telegraph_url = export_to_telegraph.export(real_url) if telegraph_url: content = replaceUrl(content, url, telegraph_url) continue if len(real_url) < len(url) + 10: content = replaceUrl(content, url, trimUrl(real_url)) continue content = replaceUrl(content, url, trimUrl(url)) return content
def test(): count = 0 prefix = 'https://squatting2047.com/page/' for page in range(2, 7): url = prefix + str(page) for link in link_extractor.getLinks(url): if link in existing: continue existing.add(link) count += 1 if count < 10: continue simplified = export_to_telegraph.export(link, force_cache = True, force=True, toSimplified=True) debug_group.send_message(simplified) if count > 20: os.system('open %s -g' % simplified)
def getParsedText(text): result = '' for item in text: if item.name in set(['br']): result += '\n' continue if item.name == 'i': if item.text: result += '<i>' + item.text + '</i>' continue if item.name == 'a': telegraph_url = export_to_telegraph.export(item['href']) if telegraph_url: item['href'] = telegraph_url del item['rel'] if 'http' in item.text: item.contents[0].replaceWith(telegraph_url) if str(item).startswith('原文') and 'telegra' in result: return result result += str(item) return result
def getTelegraph(msg, url): user_id = msg.from_user.id if user_id not in TELEGRAPH_TOKENS: msgTelegraphToken(msg) export_to_telegraph.token = TELEGRAPH_TOKENS[user_id] return export_to_telegraph.export(url, True, force = True) # DEBUG, remove second param when go prod