def download_file(update, context, file_type): """ This function downloads files from Telegram. :param update: :param context: :param file_type: `photo` or `voice`. Type of downloaded file. :return: old filename and new_filename if file_type is correct else False. filename - path to file, new_filename - filename with order number of file in database. """ user_id = update.effective_user.id if file_type == 'voice': file_id = update.message.voice.file_id filename = f'{user_id}_voice.ogg' path = CONFIG['VOICE_FOLDER_PATH'] elif file_type == 'photo': file_id = update.message.photo[-1].file_id filename = f'{user_id}_photo.jpg' path = CONFIG['PHOTO_FOLDER_PATH'] else: return False new_file = context.bot.get_file(file_id) new_file.download(os.path.join(path, filename)) conn, cursor = create_conn() new_filename = os.path.join(path, get_name(cursor, user_id, file_type, f'{file_type}_path')) close_conn(conn) return filename, new_filename
def message_processing(update, context): """ Function saves voice messages in wav format with simple rate 16MHz and photos if a face is detected there. All path store in database `bot`. :return: None """ logger.info(f'Waiting for message_processing function for ' f'{update.effective_user.name} at ' f'{update.effective_message.date}') user_id = update.effective_user.id conn, cursor = create_conn() if update.message.voice: filename, new_filename = download_file(update, context, 'voice') new_filename = f'{new_filename}.wav' convert(os.path.join(CONFIG['VOICE_FOLDER_PATH'], filename), new_filename) insert_data(conn, cursor, 'voice', user_id, 'audio_path', new_filename) answer_text = 'Thanks, I\'ve saved this voice message to my database.' elif update.message.photo: filename, new_filename = download_file(update, context, 'photo') new_filename = f'{new_filename}.jpg' PHOTO_FOLDER_PATH = CONFIG['PHOTO_FOLDER_PATH'] if check_face(f'{PHOTO_FOLDER_PATH}/{user_id}_photo.jpg', new_filename): insert_data(conn, cursor, 'photo', user_id, 'photo_path', new_filename) answer_text = ('I saved this photo in the database because I\'ve ' 'detected the face here.') else: answer_text = ('I didn\'t save this photo in my database, because ' 'I haven\'t found the face here.') else: context.bot.send_sticker(chat_id=update.effective_chat.id, sticker=CONFIG['STICKER_PATH']) answer_text = 'Send me a voice message or a photo, please.' context.bot.send_message(parse_mode=ParseMode.MARKDOWN, chat_id=update.effective_chat.id, text=answer_text) close_conn(conn) logger.info(f'Answer ready for {update.effective_user.name} ' f'at {update.effective_message.date}')
def db_sha1(domain, family, bookname): conn = db.create_conn(domain = domain, family = family) cursor = db.use_db(conn, domain, family) q = 'SELECT img_sha1 FROM image WHERE img_name = %s' cursor.execute(q, [bookname]) data = cursor.fetchall() cursor.close() conn.close() return data[0][0] if len(data) else None
def db_sha1(domain, family, bookname): conn = db.create_conn(domain=domain, family=family) cursor = db.use_db(conn, domain, family) q = 'SELECT img_sha1 FROM image WHERE img_name = %s' cursor.execute(q, [bookname]) data = cursor.fetchall() cursor.close() conn.close() return data[0][0] if len(data) else None
def get_images_credit(cursor, images): if not len(images): return [] images = [x.replace(' ', '_') for x in images] results = [] conn = db.create_conn(domain='commons', family='wiki') common_cursor = db.use_db(conn, 'commons', 'wiki') for r in get_images_credit_db(common_cursor, images): results.append(r[0]) common_cursor.close() conn.close() for r in get_images_credit_db(cursor, images): results.append(r[0]) return results
def get_images_credit(cursor, images): if not len(images): return [] images = [ x.replace(' ', '_') for x in images ] results = [] conn = db.create_conn(domain = 'commons', family = 'wiki') common_cursor = db.use_db(conn, 'commons', 'wiki') for r in get_images_credit_db(common_cursor, images): results.append(r[0]) common_cursor.close() conn.close() for r in get_images_credit_db(cursor, images): results.append(r[0]) return results
def handle_scan_query(params, start_response): text = common_html.get_head( 'pages without scan', css='shared.css').encode('utf-8') + '\n <body>\n' if params['lang']: try: offset = int(params.get('offset', 0)) limit = min(500, int(params.get('limit', 500))) lang = params['lang'] conn = db.create_conn(domain=lang, family='wikisource') cursor = db.use_db(conn, domain=lang, family='wikisource') ns = ws_category.domain_urls[lang][0] page_ids = disamb_page(cursor) | page_with_scan(ns, cursor) all_p = all_pages(cursor) result = [(unicode(x[0], 'utf-8'), x[1]) for x in all_p if x[2] not in page_ids] text += 'Total: ' + str(len(result)) + '<br />' next_link = prev_next_link(False, len(result), lang, limit, offset) prev_link = prev_next_link(True, len(result), lang, limit, offset) text += prev_link + ' ' + next_link + '<br /><br />' result = result[offset:offset + limit] for x in result: text += u'<a href="//%s.wikisource.org/wiki/%s">' % ( lang, x[0]) + x[0].replace('_', ' ') + u'</a>, ' + str( x[1]) + u'<br />' text += u'<br />' + prev_link + ' ' + next_link cursor.close() conn.close() ret_code = '200 OK' except: utils.print_traceback() ret_code = '500 Internal Server Error' text = '<h1>' + ret_code + '</h1>' else: ret_code = '400 Bad Request' text = '<h1>' + ret_code + '</h1>' text += ' </body>\n</html>' return return_response(start_response, text.encode('utf-8'), False, ret_code, 'text/html')
def handle_scan_query(params, start_response): text = common_html.get_head("pages without scan", css="shared.css").encode("utf-8") + "\n <body>\n" if params["lang"]: try: offset = int(params.get("offset", 0)) limit = min(500, int(params.get("limit", 500))) lang = params["lang"] conn = db.create_conn(domain=lang, family="wikisource") cursor = db.use_db(conn, domain=lang, family="wikisource") ns = ws_category.domain_urls[lang][0] page_ids = disamb_page(cursor) | page_with_scan(ns, cursor) all_p = all_pages(cursor) result = [(unicode(x[0], "utf-8"), x[1]) for x in all_p if x[2] not in page_ids] text += "Total: " + str(len(result)) + "<br />" next_link = prev_next_link(False, len(result), lang, limit, offset) prev_link = prev_next_link(True, len(result), lang, limit, offset) text += prev_link + " " + next_link + "<br /><br />" result = result[offset : offset + limit] for x in result: text += ( u'<a href="//%s.wikisource.org/wiki/%s">' % (lang, x[0]) + x[0].replace("_", " ") + u"</a>, " + str(x[1]) + u"<br />" ) text += u"<br />" + prev_link + " " + next_link cursor.close() conn.close() ret_code = "200 OK" except: utils.print_traceback() ret_code = "500 Internal Server Error" text = "<h1>" + ret_code + "</h1>" else: ret_code = "400 Bad Request" text = "<h1>" + ret_code + "</h1>" text += " </body>\n</html>" return return_response(start_response, text.encode("utf-8"), False, ret_code, "text/html")
def get_credit(domain, family, books, pages, images): conn = db.create_conn(domain=domain, family=family) ns = get_source_ns(domain, family) cursor = db.use_db(conn, domain, family) books_name = [] results = {} for book in books: contribs = get_book_credit(domain, family, cursor, book, ns) merge_contrib(results, contribs) books_name.append(get_index_ns(domain, family) + ':' + book) contribs = get_pages_credit(cursor, pages + books_name, ns) merge_contrib(results, contribs) for user_name in get_images_credit(cursor, images): results.setdefault(user_name, default_userdict()) results[user_name]['count'] += 1 conn.close() cursor.close() return results
def get_credit(domain, family, books, pages, images): conn = db.create_conn(domain = domain, family = family) ns = get_source_ns(domain, family) cursor = db.use_db(conn, domain, family) books_name = [] results = {} for book in books: contribs = get_book_credit(domain, family, cursor, book, ns) merge_contrib(results, contribs) books_name.append(get_index_ns(domain, family) + ':' + book) contribs = get_pages_credit(cursor, pages + books_name, ns) merge_contrib(results, contribs) for user_name in get_images_credit(cursor, images): results.setdefault(user_name, default_userdict()) results[user_name]['count'] += 1 conn.close() cursor.close() return results
def get_stats(domains): import urllib res = {} for dom in domains: print dom conn = db.create_conn(domain = dom, family = 'wikisource') cursor = db.use_db(conn, domain = dom, family = 'wikisource') ns = urls[dom][0] q="select /* SLOW_OK */ count(page_id) as num from page where page_namespace=%d and page_is_redirect=0"%ns cursor.execute(q) row = cursor.fetchone () num_pages = int(row[0]) if len(urls[dom])>1: cat3 = urllib.unquote(urls[dom][1]) cat4 = urllib.unquote(urls[dom][2]) cursor.execute(catreq(cat3,ns)) row = cursor.fetchall()[0] num_q3 = int(row[0]) cursor.execute(catreq(cat4,ns)) row = cursor.fetchall()[0] num_q4 = int(row[0]) else: num_q3 = 0 num_q4 = 0 if len(urls[dom])>3: cat0 = urls[dom][3] cat2 = urls[dom][4] cursor.execute(catreq(cat0,ns)) row = cursor.fetchall()[0] num_q0 = int(row[0]) cursor.execute(catreq(cat2,ns)) row = cursor.fetchall()[0] num_q2 = int(row[0]) else: num_q0 = 0 num_q2 = 0 q = "select /* SLOW_OK */ count(distinct tl_from) as num from templatelinks left join page on page_id=tl_from where tl_namespace=%d and page_namespace=0;"%ns cursor.execute(q) row = cursor.fetchone () num_trans = int(row[0]) cursor.execute ("select /* SLOW_OK */ count(distinct page_id) from page where page_namespace=0 and page_is_redirect=0;") row = cursor.fetchone () num_texts = int(row[0]) #disambiguation pages # first try the __DISAMBIG__ keyword cursor.execute("select count(page_title) from page where page_namespace = 0 and page_is_redirect = 0 and page_id in (select pp_page from page_props where pp_propname = 'disambiguation')") row = cursor.fetchone () num_disambig = int(row[0]) if num_disambig == 0: #then test if the message is a template... q = "select /* SLOW_OK */ count(page_title) from page left join templatelinks on page_id=tl_from where page_namespace=0 and tl_namespace=10 and tl_title in ( select pl_title from page left join pagelinks on page_id=pl_from where pl_namespace=10 and page_namespace=8 and page_title='Disambiguationspage' )" cursor.execute (q) row = cursor.fetchone () num_disambig = int(row[0]) if num_disambig==0 and disambiguations.get(dom) : q = "select /* SLOW_OK */ count(page_title) from page left join templatelinks on page_id=tl_from where page_namespace=0 and tl_namespace=10 and tl_title='%s'"%disambiguations.get(dom) cursor.execute (q) row = cursor.fetchone () num_disambig = int(row[0]) if dom=='no': import pywikibot qq = "select /* SLOW_OK */ page_title from page where page_namespace=0 and page_is_redirect=0 and page_id not in ( select distinct tl_from from templatelinks left join page on page_id=tl_from where tl_namespace=104 and page_namespace=0 ) and page_id not in ( %s );" % q.replace("count(page_title)","page_id") cursor.execute(qq) rows = cursor.fetchall() site = pywikibot.getSite(dom,fam='wikisource') f = codecs.open(os.path.expanduser('~/public_html/data/nakedtexts_')+dom+'.html','w',"utf-8") f.write("<html><head></head><body>") f.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />") f.write("<b>Naked texts at "+dom+".wikisource.org</b> (%d)</br/>"%len(rows) ) f.write("<ul>") for row in rows: pagename = row[0] page = pywikibot.Page(site, pagename.decode('utf8')) page_path = site.nice_get_address(page.title(asUrl = True)) page_url = "http://"+dom+".wikisource.org"+page_path s="<li><a href=\"%s\">%s</a></li>"%(page_url,page.title()) f.write(s) f.write("</ul>") f.write("</body>") res[dom] = (num_pages, num_q0, num_q2, num_q3, num_q4, num_trans, num_texts, num_disambig) cursor.close () conn.close () return res
from time import * import random print('PRILER BOT') log = logging.getLogger('aiogram') logging.basicConfig(level=logging.INFO) bot = Bot(token=token, parse_mode=ParseMode.MARKDOWN) storage = MemoryStorage() dp = Dispatcher(bot, storage=storage) loop = asyncio.get_event_loop() conn = loop.run_until_complete(create_conn(**DB)) prepared_query = loop.run_until_complete(gen_prepared_query(conn)) class CheckFilter(BoundFilter): key = 'is_admin' def __init__(self, is_admin): self.is_admin = is_admin async def check(self, message: types.Message): member = await bot.get_chat_member(message.chat.id, message.from_user.id) return member.is_chat_admin() == self.is_admin
def get_stats(domains): import urllib res = {} for dom in domains: print dom conn = db.create_conn(domain=dom, family='wikisource') cursor = db.use_db(conn, domain=dom, family='wikisource') ns = urls[dom][0] q = "select /* SLOW_OK */ count(page_id) as num from page where page_namespace=%d and page_is_redirect=0" % ns cursor.execute(q) row = cursor.fetchone() num_pages = int(row[0]) if len(urls[dom]) > 1: cat3 = urllib.unquote(urls[dom][1]) cat4 = urllib.unquote(urls[dom][2]) cursor.execute(catreq(cat3, ns)) row = cursor.fetchall()[0] num_q3 = int(row[0]) cursor.execute(catreq(cat4, ns)) row = cursor.fetchall()[0] num_q4 = int(row[0]) else: num_q3 = 0 num_q4 = 0 if len(urls[dom]) > 3: cat0 = urls[dom][3] cat2 = urls[dom][4] cursor.execute(catreq(cat0, ns)) row = cursor.fetchall()[0] num_q0 = int(row[0]) cursor.execute(catreq(cat2, ns)) row = cursor.fetchall()[0] num_q2 = int(row[0]) else: num_q0 = 0 num_q2 = 0 q = "select /* SLOW_OK */ count(distinct tl_from) as num from templatelinks left join page on page_id=tl_from where tl_namespace=%d and page_namespace=0;" % ns cursor.execute(q) row = cursor.fetchone() num_trans = int(row[0]) cursor.execute( "select /* SLOW_OK */ count(distinct page_id) from page where page_namespace=0 and page_is_redirect=0;" ) row = cursor.fetchone() num_texts = int(row[0]) #disambiguation pages # first try the __DISAMBIG__ keyword cursor.execute( "select count(page_title) from page where page_namespace = 0 and page_is_redirect = 0 and page_id in (select pp_page from page_props where pp_propname = 'disambiguation')" ) row = cursor.fetchone() num_disambig = int(row[0]) if num_disambig == 0: #then test if the message is a template... q = "select /* SLOW_OK */ count(page_title) from page left join templatelinks on page_id=tl_from where page_namespace=0 and tl_namespace=10 and tl_title in ( select pl_title from page left join pagelinks on page_id=pl_from where pl_namespace=10 and page_namespace=8 and page_title='Disambiguationspage' )" cursor.execute(q) row = cursor.fetchone() num_disambig = int(row[0]) if num_disambig == 0 and disambiguations.get(dom): q = "select /* SLOW_OK */ count(page_title) from page left join templatelinks on page_id=tl_from where page_namespace=0 and tl_namespace=10 and tl_title='%s'" % disambiguations.get( dom) cursor.execute(q) row = cursor.fetchone() num_disambig = int(row[0]) if dom == 'no': import pywikibot qq = "select /* SLOW_OK */ page_title from page where page_namespace=0 and page_is_redirect=0 and page_id not in ( select distinct tl_from from templatelinks left join page on page_id=tl_from where tl_namespace=104 and page_namespace=0 ) and page_id not in ( %s );" % q.replace( "count(page_title)", "page_id") cursor.execute(qq) rows = cursor.fetchall() site = pywikibot.getSite(dom, fam='wikisource') f = codecs.open( os.path.expanduser('~/public_html/data/nakedtexts_') + dom + '.html', 'w', "utf-8") f.write("<html><head></head><body>") f.write( "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />" ) f.write("<b>Naked texts at " + dom + ".wikisource.org</b> (%d)</br/>" % len(rows)) f.write("<ul>") for row in rows: pagename = row[0] page = pywikibot.Page(site, pagename.decode('utf8')) page_path = site.nice_get_address(page.title(asUrl=True)) page_url = "http://" + dom + ".wikisource.org" + page_path s = "<li><a href=\"%s\">%s</a></li>" % (page_url, page.title()) f.write(s) f.write("</ul>") f.write("</body>") res[dom] = (num_pages, num_q0, num_q2, num_q3, num_q4, num_trans, num_texts, num_disambig) cursor.close() conn.close() return res
def open_db(domain, family, cursor_class = None): conn = db.create_conn(domain = domain, family = family) cursor = db.use_db(conn, domain, family, cursor_class) return conn, cursor
def open_db(domain, family, cursor_class=None): conn = db.create_conn(domain=domain, family=family) cursor = db.use_db(conn, domain, family, cursor_class) return conn, cursor