def socket_connect_auth_user(): g.db=db_session() v, client=get_logged_in_user() if client or not v: send("Authentication required") disconnect() g.db.close() return if v.is_suspended: send("You're banned and can't access chat right now.") disconnect() g.db.close() return if v.id in SIDS: SIDS[v.id].append(request.sid) else: SIDS[v.id]=[request.sid] emit("status", {'status':"connected"}) g.db.close()
def socket_disconnect_user(): g.db = db_session() v, client = get_logged_in_user() if not v: g.db.close() return try: SIDS[v.id].remove(request.sid) except ValueError: pass for room in rooms(): leave_room(room) if room not in v_rooms(v): send(f"← @{v.username} has left the chat", to=room) board = get_from_fullname(room, graceful=True) if board: update_chat_count(board) if v.username in TYPING.get(board.fullname, []): TYPING[board.fullname].remove(v.username) emit('typing', {'users': TYPING[board.fullname]}, to=board.fullname) g.db.close()
def del_function(): db = db_session() delete_file(name) new_post.is_banned = True db.add(new_post) db.commit() ma = ModAction(kind="ban_post", user_id=1, note="banned image", target_submission_id=new_post.id) db.add(ma) db.commit() db.close()
def wrapper(*args, **kwargs): g.db=db_session() v, client=get_logged_in_user() if not v: send("You are not logged in") g.db.close() return if request.sid not in SIDS.get(v.id, []): if v.id in SIDS: SIDS[v.id].append(request.sid) else: SIDS[v.id]=[request.sid] f(*args, v, **kwargs) g.db.close()
def wrapper(*args, **kwargs): g.db = db_session() v, client = get_logged_in_user() if client or not v: send("Not logged in") return if v.is_suspended: send("You're banned and can't access chat right now.") if request.sid not in SIDS.get(v.id, []): if v.id in SIDS: SIDS[v.id].append(request.sid) else: SIDS[v.id] = [request.sid] f(*args, v, **kwargs) g.db.close()
def info_packet(username, method="html"): print(f"starting {username}") packet = {} with app.test_request_context("/my_info"): db = db_session() g.timestamp = int(time.time()) g.db = db user = get_user(username) print('submissions') #submissions post_ids = db.query(Submission.id).filter_by( author_id=user.id).order_by(Submission.id.desc()).all() posts = get_posts([i[0] for i in post_ids], v=user) packet["posts"] = { 'html': lambda: render_template("userpage.html", v=None, u=user, listing=posts, page=1, next_exists=False), 'json': lambda: [x.self_download_json for x in posts] } print('comments') comment_ids = db.query(Comment.id).filter_by( author_id=user.id).order_by(Comment.id.desc()).all() comments = get_comments([i[0] for i in comment_ids], v=user) packet["comments"] = { 'html': lambda: render_template("userpage_comments.html", v=None, u=user, comments=comments, page=1, next_exists=False), 'json': lambda: [x.self_download_json for x in comments] } print('post_upvotes') upvote_query = db.query(Vote.submission_id).filter_by( user_id=user.id, vote_type=1).order_by(Vote.id.desc()).all() upvote_posts = get_posts([i[0] for i in upvote_query], v=user) upvote_posts = [i for i in upvote_posts] for post in upvote_posts: post.__dict__['voted'] = 1 packet['upvoted_posts'] = { 'html': lambda: render_template( "home.html", v=None, listing=posts, page=1, next_exists=False), 'json': lambda: [x.json_core for x in upvote_posts] } print('post_downvotes') downvote_query = db.query(Vote.submission_id).filter_by( user_id=user.id, vote_type=-1).order_by(Vote.id.desc()).all() downvote_posts = get_posts([i[0] for i in downvote_query], v=user) packet['downvoted_posts'] = { 'html': lambda: render_template( "home.html", v=None, listing=posts, page=1, next_exists=False), 'json': lambda: [x.json_core for x in downvote_posts] } print('comment_upvotes') upvote_query = db.query(CommentVote.comment_id).filter_by( user_id=user.id, vote_type=1).order_by(CommentVote.id.desc()).all() upvote_comments = get_comments([i[0] for i in upvote_query], v=user) packet["upvoted_comments"] = { 'html': lambda: render_template("notifications.html", v=None, comments=upvote_comments, page=1, next_exists=False), 'json': lambda: [x.json_core for x in upvote_comments] } print('comment_downvotes') downvote_query = db.query(CommentVote.comment_id).filter_by( user_id=user.id, vote_type=-1).order_by(CommentVote.id.desc()).all() downvote_comments = get_comments([i[0] for i in downvote_query], v=user) packet["downvoted_comments"] = { 'html': lambda: render_template("notifications.html", v=None, comments=downvote_comments, page=1, next_exists=False), 'json': lambda: [x.json_core for x in downvote_comments] } # blocked_users=db.query(UserBlock.target_id).filter_by(user_id=user.id).order_by(UserBlock.id.desc()).all() # users=[get_account(base36encode(x[0])) for x in blocked_users] # packet["blocked_users"]={ # "html":lambda:render_template # "json":lambda:[x.json_core for x in users] # } send_mail(user.email, "Your Ruqqus Data", "Your Ruqqus data is attached.", "Your Ruqqus data is attached.", files={ f"{user.username}_{entry}.{method}": io.StringIO(convert_file(packet[entry][method]())) for entry in packet }) print("finished")
from sqlalchemy import * from ruqqus.__main__ import app, db_session from ruqqus import classes from ruqqus.mail import send_mail from ruqqus.helpers.get import get_user from flask import render_template db = db_session() title = input("Title: ") subject = input("Email subject: ") x = db.query(classes.user.User).filter( classes.user.User.is_activated == True, or_(classes.user.User.is_banned == 0, classes.user.User.unban_utc > 0), classes.user.User.is_deleted == False, classes.user.User.email != None) total = x.count() print(f"total mail to send: {total}") i = 0 unable = 0 success = 0 for user in x.order_by(classes.user.User.id.asc()).all(): #for user in [get_user('captainmeta4', nSession=db)]: i += 1 # for user in db.query(classes.user.User).filter_by(id=7).all(): try: with app.app_context(): html = render_template("email/mailing.html",
category = relationship("Category", lazy="joined") @property def visible(self): return self._visible if self._visible in [True, False ] else self.category.visible @property def json(self): return {"id": self.id, "category_id": self.cat_id, "name": self.name} CATEGORIES = [ i for i in db_session().query(Category).order_by(Category.name.asc()).all() ] # class GuildCategory(Base, Stndrd, Age_times): # __tablename__ = "guildcategories" # id = Column(BigInteger, primary_key=True) # subcat_id = Column(Integer, ForeignKey("subcategories.id"), default=0) # board_id = Column(Integer, ForeignKey("boards.id"), default=0) # created_utc = Column(integer, default=0) # subcat = relationship("SubCategory", lazy="joined") # board = relationship("Board", lazy="joined") # def __init__(self, *args, **kwargs): # if "created_utc" not in kwargs: # kwargs["created_utc"] = int(time.time())
def thumbnail_thread(pid): db = db_session() post = get_post(pid, session=db) #step 1: see if post is image #print("thumbnail thread") domain_obj = post.domain_obj if domain_obj and domain_obj.show_thumbnail: x = requests.get(post.url, headers=headers) if x.headers.get("Content-Type", "/").split("/")[0] == "image": #image post, using submitted url name = f"posts/{post.base36id}/thumb.png" tempname = name.replace("/", "_") with open(tempname, "wb") as file: for chunk in x.iter_content(1024): file.write(chunk) aws.upload_from_file(name, tempname, resize=(375, 227)) post.has_thumb = True post.is_image = True db.add(post) db.commit() return x = requests.get(post.url, headers=headers) if x.status_code != 200 or not x.headers["Content-Type"].startswith( ("text/html", "image/")): #print(f'not html post, status {x.status_code}') return if x.headers["Content-Type"].startswith("image/"): pass #submitted url is image elif x.headers["Content-Type"].startswith("text/html"): soup = BeautifulSoup(x.content, 'html.parser') metas = ["ruqqus:thumbnail", "twitter:image", "og:image", "thumbnail"] for meta in metas: img = soup.find('meta', attrs={"name": meta, "content": True}) if not img: img = soup.find('meta', attrs={ 'property': meta, 'content': True }) if not img: continue try: x = requests.get(img['content'], headers=headers) except: continue break if not img or not x or x.status_code != 200: imgs = soup.find_all('img', src=True) if imgs: #print("using <img> elements") pass else: #print('no image in doc') return #Loop through all images in document until we find one that works (and isn't svg) for img in imgs: src = img["src"] #print("raw src: "+src) #convert src into full url if src.startswith("https://"): pass elif src.startswith("http://"): src = f"https://{src.split('http://')}" elif src.startswith('//'): src = f"https:{src}" elif src.startswith('/'): parsed_url = urlparse(post.url) src = f"https://{parsed_url.netloc}/{src.lstrip('/')}" else: src = f"{post.url}{'/' if not post.url.endswith('/') else ''}{src}" #print("full src: "+src) #load asset x = requests.get(src, headers=headers) if x.status_code != 200: #print('not 200, next') continue type = x.headers.get("Content-Type", "") if not type.startswith("image/"): #print("not an image, next") continue if type.startswith("image/svg"): #print("svg image, next") continue i = PILimage.open(BytesIO(x.content)) if i.width < 30 or i.height < 30: continue break name = f"posts/{post.base36id}/thumb.png" tempname = name.replace("/", "_") with open(tempname, "wb") as file: for chunk in x.iter_content(1024): file.write(chunk) aws.upload_from_file(name, tempname, resize=(375, 227)) post.has_thumb = True db.add(post) db.commit() #db.close() try: remove(tempname) except FileNotFoundError: pass
def check_csam_url(url, v, delete_content_function): parsed_url = urlparse(url) if parsed_url.netloc != BUCKET: return headers = {"User-Agent": "Ruqqus webserver"} for i in range(10): x = requests.get(url, headers=headers) if x.status_code in [200, 451]: break else: time.sleep(20) db = db_session() if x.status_code == 451: v.ban_reason = "Sexualizing Minors" v.is_banned = 1 db.add(v) for alt in v.alts_threaded(db): alt.ban_reason = "Sexualizing Minors" alt.is_banned = 1 db.add(alt) delete_content_function() db.commit() db.close() delete_file(parsed_url.path.lstrip('/')) return tempname = f"test_from_url_{parsed_url.path}" tempname = tempname.replace('/', '_') with open(tempname, "wb") as file: for chunk in x.iter_content(1024): file.write(chunk) h = check_phash(db, tempname) if h: now = int(time.time()) unban = now + 60 * 60 * 24 * h.ban_time if h.ban_time else 0 # ban user and alts v.ban_reason = h.ban_reason v.is_banned = 1 v.unban_utc = unban db.add(v) for alt in v.alts_threaded(db): alt.ban_reason = h.ban_reason alt.is_banned = 1 alt.unban_utc = unban db.add(alt) delete_content_function() db.commit() # nuke aws delete_file(parsed_url.path.lstrip('/')) remove(tempname) db.close()
def check_csam(post): # Relies on Cloudflare's photodna implementation # 451 returned by CF = positive match # ignore non-link posts if not post.url: return parsed_url = urlparse(post.url) if parsed_url.netloc != BUCKET: return headers = {"User-Agent": "Ruqqus webserver"} for i in range(10): x = requests.get(post.url, headers=headers) if x.status_code in [200, 451]: break else: time.sleep(20) db = db_session() if x.status_code == 451: # ban user and alts post.author.ban_reason = "Sexualizing Minors" post.author.is_banned = 1 db.add(v) for alt in post.author.alts_threaded(db): alt.ban_reason = "Sexualizing Minors" alt.is_banned = 1 db.add(alt) # remove content post.is_banned = True db.add(post) db.commit() # nuke aws delete_file(parsed_url.path.lstrip('/')) db.close() return #check phash tempname = f"test_post_{post.base36id}" with open(tempname, "wb") as file: for chunk in x.iter_content(1024): file.write(chunk) h = check_phash(db, tempname) if h: now = int(time.time()) unban = now + 60 * 60 * 24 * h.ban_time if h.ban_time else 0 # ban user and alts post.author.ban_reason = h.ban_reason post.author.is_banned = 1 post.author.unban_utc = unban db.add(v) for alt in post.author.alts_threaded(db): alt.ban_reason = h.ban_reason alt.is_banned = 1 alt.unban_utc = unban db.add(alt) # remove content post.is_banned = True db.add(post) db.commit() # nuke aws delete_file(parsed_url.path.lstrip('/')) remove(tempname) db.close()
def submit_post(v): title = request.form.get("title", "").lstrip().rstrip() title = title.lstrip().rstrip() title = title.replace("\n", "") title = title.replace("\r", "") title = title.replace("\t", "") url = request.form.get("url", "") board = get_guild(request.form.get('board', 'general'), graceful=True) if not board: board = get_guild('general') if not title: return {"html": lambda: (render_template("submit.html", v=v, error="Please enter a better title.", title=title, url=url, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": "Please enter a better title"}, 400) } # if len(title)<10: # return render_template("submit.html", # v=v, # error="Please enter a better title.", # title=title, # url=url, # body=request.form.get("body",""), # b=board # ) elif len(title) > 500: return {"html": lambda: (render_template("submit.html", v=v, error="500 character limit for titles.", title=title[0:500], url=url, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": "500 character limit for titles"}, 400) } parsed_url = urlparse(url) if not (parsed_url.scheme and parsed_url.netloc) and not request.form.get( "body") and not request.files.get("file", None): return {"html": lambda: (render_template("submit.html", v=v, error="Please enter a url or some text.", title=title, url=url, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": "`url` or `body` parameter required."}, 400) } # sanitize title title = bleach.clean(title, tags=[]) # Force https for submitted urls if request.form.get("url"): new_url = ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=parsed_url.query, fragment=parsed_url.fragment) url = urlunparse(new_url) else: url = "" body = request.form.get("body", "") # check for duplicate dup = g.db.query(Submission).join(Submission.submission_aux).filter( Submission.author_id == v.id, Submission.deleted_utc == 0, Submission.board_id == board.id, SubmissionAux.title == title, SubmissionAux.url == url, SubmissionAux.body == body ).first() if dup: return redirect(dup.permalink) # check for domain specific rules parsed_url = urlparse(url) domain = parsed_url.netloc # check ban status domain_obj = get_domain(domain) if domain_obj: if not domain_obj.can_submit: if domain_obj.reason==4: v.ban(days=30, reason="Digitally malicious content") elif domain_obj.reason==7: v.ban(reason="Sexualizing minors") return {"html": lambda: (render_template("submit.html", v=v, error=BAN_REASONS[domain_obj.reason], title=title, url=url, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": BAN_REASONS[domain_obj.reason]}, 400) } # check for embeds if domain_obj.embed_function: try: embed = eval(domain_obj.embed_function)(url) except BaseException: embed = "" else: embed = "" else: embed = "" # board board_name = request.form.get("board", "general") board_name = board_name.lstrip("+") board_name = board_name.rstrip() board = get_guild(board_name, graceful=True) if not board: board = get_guild('general') if board.is_banned: return {"html": lambda: (render_template("submit.html", v=v, error=f"+{board.name} has been banned.", title=title, url=url, body=request.form.get( "body", ""), b=get_guild("general", graceful=True) ), 403), "api": lambda: (jsonify({"error": f"403 Forbidden - +{board.name} has been banned."})) } if board.has_ban(v): return {"html": lambda: (render_template("submit.html", v=v, error=f"You are exiled from +{board.name}.", title=title, url=url, body=request.form.get( "body", ""), b=get_guild("general") ), 403), "api": lambda: (jsonify({"error": f"403 Not Authorized - You are exiled from +{board.name}"}), 403) } if (board.restricted_posting or board.is_private) and not ( board.can_submit(v)): return {"html": lambda: (render_template("submit.html", v=v, error=f"You are not an approved contributor for +{board.name}.", title=title, url=url, body=request.form.get( "body", ""), b=get_guild(request.form.get("board", "general"), graceful=True ) ), 403), "api": lambda: (jsonify({"error": f"403 Not Authorized - You are not an approved contributor for +{board.name}"}), 403) } # similarity check now = int(time.time()) cutoff = now - 60 * 60 * 24 similar_posts = g.db.query(Submission).options( lazyload('*') ).join( Submission.submission_aux ).filter( #or_( # and_( Submission.author_id == v.id, SubmissionAux.title.op('<->')(title) < app.config["SPAM_SIMILARITY_THRESHOLD"], Submission.created_utc > cutoff # ), # and_( # SubmissionAux.title.op('<->')(title) < app.config["SPAM_SIMILARITY_THRESHOLD"]/2, # Submission.created_utc > cutoff # ) #) ).all() if url: similar_urls = g.db.query(Submission).options( lazyload('*') ).join( Submission.submission_aux ).filter( #or_( # and_( Submission.author_id == v.id, SubmissionAux.url.op('<->')(url) < app.config["SPAM_URL_SIMILARITY_THRESHOLD"], Submission.created_utc > cutoff # ), # and_( # SubmissionAux.url.op('<->')(url) < app.config["SPAM_URL_SIMILARITY_THRESHOLD"]/2, # Submission.created_utc > cutoff # ) #) ).all() else: similar_urls = [] threshold = app.config["SPAM_SIMILAR_COUNT_THRESHOLD"] if v.age >= (60 * 60 * 24 * 7): threshold *= 3 elif v.age >= (60 * 60 * 24): threshold *= 2 if max(len(similar_urls), len(similar_posts)) >= threshold: text = "Your Ruqqus account has been suspended for 1 day for the following reason:\n\n> Too much spam!" send_notification(v, text) v.ban(reason="Spamming.", days=1) for alt in v.alts: if not alt.is_suspended: alt.ban(reason="Spamming.", days=1) for post in similar_posts + similar_urls: post.is_banned = True post.is_pinned = False post.ban_reason = "Automatic spam removal. This happened because the post's creator submitted too much similar content too quickly." g.db.add(post) ma=ModAction( user_id=1, target_submission_id=post.id, kind="ban_post", board_id=post.board_id, note="spam" ) g.db.add(ma) g.db.commit() return redirect("/notifications") # catch too-long body if len(str(body)) > 10000: return {"html": lambda: (render_template("submit.html", v=v, error="10000 character limit for text body.", title=title, url=url, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": "10000 character limit for text body."}, 400) } if len(url) > 2048: return {"html": lambda: (render_template("submit.html", v=v, error="2048 character limit for URLs.", title=title, url=url, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": "2048 character limit for URLs."}, 400) } # render text body=preprocess(body) with CustomRenderer() as renderer: body_md = renderer.render(mistletoe.Document(body)) body_html = sanitize(body_md, linkgen=True) # Run safety filter bans = filter_comment_html(body_html) if bans: ban = bans[0] reason = f"Remove the {ban.domain} link from your post and try again." if ban.reason: reason += f" {ban.reason_text}" #auto ban for digitally malicious content if any([x.reason==4 for x in bans]): v.ban(days=30, reason="Digitally malicious content is not allowed.") abort(403) return {"html": lambda: (render_template("submit.html", v=v, error=reason, title=title, url=url, body=request.form.get( "body", ""), b=board ), 403), "api": lambda: ({"error": reason}, 403) } # check spam soup = BeautifulSoup(body_html, features="html.parser") links = [x['href'] for x in soup.find_all('a') if x.get('href')] if url: links = [url] + links for link in links: parse_link = urlparse(link) check_url = ParseResult(scheme="https", netloc=parse_link.netloc, path=parse_link.path, params=parse_link.params, query=parse_link.query, fragment='') check_url = urlunparse(check_url) badlink = g.db.query(BadLink).filter( literal(check_url).contains( BadLink.link)).first() if badlink: if badlink.autoban: text = "Your Ruqqus account has been suspended for 1 day for the following reason:\n\n> Too much spam!" send_notification(v, text) v.ban(days=1, reason="spam") return redirect('/notifications') else: return {"html": lambda: (render_template("submit.html", v=v, error=f"The link `{badlink.link}` is not allowed. Reason: {badlink.reason}.", title=title, url=url, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": f"The link `{badlink.link}` is not allowed. Reason: {badlink.reason}"}, 400) } # check for embeddable video domain = parsed_url.netloc if url: repost = g.db.query(Submission).join(Submission.submission_aux).filter( SubmissionAux.url.ilike(url), Submission.board_id == board.id, Submission.deleted_utc == 0, Submission.is_banned == False ).order_by( Submission.id.asc() ).first() else: repost = None if request.files.get('file') and not v.can_submit_image: abort(403) # offensive is_offensive = False for x in g.db.query(BadWord).all(): if (body and x.check(body)) or x.check(title): is_offensive = True break #politics is_politics=False for x in g.db.query(PoliticsWord).all(): if (body and x.check(body)) or x.check(title): is_politics = True break new_post = Submission( author_id=v.id, domain_ref=domain_obj.id if domain_obj else None, board_id=board.id, original_board_id=board.id, over_18=( bool( request.form.get( "over_18", "") ) or board.over_18 ), post_public=not board.is_private, repost_id=repost.id if repost else None, is_offensive=is_offensive, is_politics=is_politics, app_id=v.client.application.id if v.client else None, creation_region=request.headers.get("cf-ipcountry") ) g.db.add(new_post) g.db.flush() new_post_aux = SubmissionAux(id=new_post.id, url=url, body=body, body_html=body_html, embed_url=embed, title=title ) g.db.add(new_post_aux) g.db.flush() vote = Vote(user_id=v.id, vote_type=1, submission_id=new_post.id ) g.db.add(vote) g.db.flush() g.db.refresh(new_post) # check for uploaded image if request.files.get('file'): #check file size if request.content_length > 16 * 1024 * 1024 and not v.has_premium: g.db.rollback() abort(413) file = request.files['file'] if not file.content_type.startswith('image/'): return {"html": lambda: (render_template("submit.html", v=v, error=f"Image files only.", title=title, body=request.form.get( "body", ""), b=board ), 400), "api": lambda: ({"error": f"Image files only"}, 400) } name = f'post/{new_post.base36id}/{secrets.token_urlsafe(8)}' upload_file(name, file) # thumb_name=f'posts/{new_post.base36id}/thumb.png' #upload_file(name, file, resize=(375,227)) # update post data new_post.url = f'https://{BUCKET}/{name}' new_post.is_image = True new_post.domain_ref = 1 # id of i.ruqqus.com domain g.db.add(new_post) #csam detection def del_function(db): delete_file(name) new_post.is_banned=True db.add(new_post) db.commit() ma=ModAction( kind="ban_post", user_id=1, note="banned image", target_submission_id=new_post.id ) db.add(ma) db.commit() csam_thread=threading.Thread(target=check_csam_url, args=(f"https://{BUCKET}/{name}", v, lambda:del_function(db=db_session()) ) ) csam_thread.start() g.db.commit() # spin off thumbnail generation and csam detection as new threads if new_post.url or request.files.get('file'): new_thread = threading.Thread(target=thumbnail_thread, args=(new_post.base36id,) ) new_thread.start() # expire the relevant caches: front page new, board new cache.delete_memoized(frontlist) g.db.commit() cache.delete_memoized(Board.idlist, board, sort="new") # print(f"Content Event: @{new_post.author.username} post # {new_post.base36id}") return {"html": lambda: redirect(new_post.permalink), "api": lambda: jsonify(new_post.json) }
def thumbnail_thread(pid, debug=False): db = db_session() post = get_post(pid, graceful=True, session=db) if not post: # account for possible follower lag time.sleep(60) post = get_post(pid, session=db) # step 1: see if post is image #print("thumbnail thread") domain_obj = post.domain_obj headers={"User-Agent": f"Ruqqus thumbnail acquisition for post {post.base36id}"} if debug: print(f"domain_obj {domain_obj}") if domain_obj: print(f"show thumb {domain_obj.show_thumbnail}") if domain_obj and domain_obj.show_thumbnail: if debug: print("trying direct url as image") if post.embed_url and post.embed_url.startswith("https://"): fetch_url=post.embed_url else: fetch_url=post.url try: x = requests.get(fetch_url, headers=headers) except: if debug: print("error connecting") return if x.status_code>=400: return if x.headers.get("Content-Type", "/").split("/")[0] == "image": # image post, using submitted url name = f"posts/{post.base36id}/thumb.png" tempname = name.replace("/", "_") with open(tempname, "wb") as file: for chunk in x.iter_content(1024): file.write(chunk) aws.upload_from_file(name, tempname, resize=(375, 227)) post.has_thumb = True post.is_image = True db.add(post) db.commit() return if debug: print("not direct image") try: x = requests.get(post.url, headers=headers) except: if debug: print("error connecting") return if x.status_code != 200 or not x.headers["Content-Type"].startswith( ("text/html", "image/")): if debug: print(f'not html post, status {x.status_code}') return if x.headers["Content-Type"].startswith("image/"): if debug: print("submitted url is image, use that") pass # submitted url is image elif x.headers["Content-Type"].startswith("text/html"): if debug: print("parsing html doc") soup = BeautifulSoup(x.content, 'html.parser') #get meta title and description try: meta_title=soup.find('title') if meta_title: post.submission_aux.meta_title=str(meta_title.string) meta_desc = soup.find('meta', attrs={"name":"description"}) if meta_desc: post.submission_aux.meta_description=meta_desc['content'] if meta_title or meta_desc: db.add(post.submission_aux) except: pass metas = ["ruqqus:thumbnail", "twitter:image", "og:image", "thumbnail" ] for meta in metas: if debug: print(f"Looking for meta tag: {meta}") img = soup.find('meta', attrs={"name": meta, "content": True}) if not img: img = soup.find( 'meta', attrs={ 'property': meta, 'content': True}) if not img: continue try: if debug: print(f"image load attempt from meta tag {meta}") x = requests.get(img['content'], headers=headers) except BaseException: if debug: print("unable to connect") continue break if debug: print(img) print(x) if not img or not x or x.status_code != 200: if debug: print("no meta tags, looking for img") imgs = soup.find_all('img', src=True) if debug: print(f"found {len(imgs)} img elements") if imgs: #print("using <img> elements") pass else: #print('no image in doc') return # Loop through all images in document until we find one that works # (and isn't svg) for img in imgs: src = img["src"] #print("raw src: "+src) # convert src into full url if src.startswith("https://"): pass elif src.startswith("http://"): src = f"https://{src.split('http://')[1]}" elif src.startswith('//'): src = f"https:{src}" elif src.startswith('/'): parsed_url = urlparse(post.url) src = f"https://{parsed_url.netloc}{src}" else: src = f"{post.url}{'/' if not post.url.endswith('/') else ''}{src}" #print("full src: "+src) # load asset if debug: print(f"attempting asset load {src}") x = requests.get(src, headers=headers) if x.status_code != 200: if debug: print(f"status code {x.status_code}, try next") #print('not 200, next') continue type = x.headers.get("Content-Type", "") if not type.startswith("image/"): if debug: print(f"bad type {type}, try next") #print("not an image, next") continue if type.startswith("image/svg"): if debug: print("svg, try next") #print("svg image, next") continue i = PILimage.open(BytesIO(x.content)) if i.width < 30 or i.height < 30: if debug: print("image too small, next") continue break else: if debug: print("meta tag found, no need to look for img tags") name = f"posts/{post.base36id}/thumb.png" tempname = name.replace("/", "_") with open(tempname, "wb") as file: for chunk in x.iter_content(1024): file.write(chunk) aws.upload_from_file(name, tempname, resize=(375, 227)) post.has_thumb = True db.add(post) db.commit() # db.close() try: remove(tempname) except FileNotFoundError: pass
def get_logged_in_user(): try: db = g.db except AttributeError: db = db_session() if request.path.startswith("/api/v1"): token = request.headers.get("Authorization") if not token: #let admins hit api/v1 from browser # x=request.session.get('user_id') # nonce=request.session.get('login_nonce') # if not x or not nonce: # return None, None # user=g.db.query(User).filter_by(id=x).first() # if not user: # return None, None # if user.admin_level >=3 and nonce>=user.login_nonce: # return user, None return None, None token = token.split() if len(token) < 2: return None, None token = token[1] if not token: return None, None client = db.query(ClientAuth).filter( ClientAuth.access_token == token, ClientAuth.access_token_expire_utc > int(time.time())).first() if app.config[ "SERVER_NAME"] == "dev.ruqqus.com" and client.user.admin_level < 2 and not client.user.has_premium: x = (None, None) else: x = (client.user, client) if client else (None, None) elif "user_id" in session: uid = session.get("user_id") nonce = session.get("login_nonce", 0) if not uid: x = (None, None) v = db.query(User).options( joinedload(User.moderates).joinedload( ModRelationship.board), #joinedload(Board.reports), joinedload(User.subscriptions).joinedload(Subscription.board) # joinedload(User.notifications) ).filter_by(id=uid, is_deleted=False).first() if app.config[ "SERVER_NAME"] == "dev.ruqqus.com" and v.admin_level < 2 and not v.has_premium: x = (None, None) if v and (nonce < v.login_nonce): x = (None, None) else: x = (v, None) else: x = (None, None) if x[0]: x[0].client = x[1] return x
def info_packet(username, method="html"): print(f"starting {username}") packet={} with app.test_request_context("/my_info"): db=db_session() g.timestamp=int(time.time()) g.db=db user=get_user(username) print('submissions') #submissions post_ids=db.query(Submission.id).filter_by(author_id=user.id).order_by(Submission.created_utc.desc()).all() post_ids=[i[0] for i in post_ids] print(f'have {len(post_ids)} ids') posts=get_posts(post_ids, v=user) print('have posts') packet["posts"]={ 'html':lambda:render_template("userpage.html", v=None, u=user, listing=posts, page=1, next_exists=False), 'json':lambda:[x.self_download_json for x in posts] } print('comments') comment_ids=db.query(Comment.id).filter_by(author_id=user.id).order_by(Comment.created_utc.desc()).all() comment_ids=[x[0] for x in comment_ids] print(f"have {len(comment_ids)} ids") comments=get_comments(comment_ids, v=user) print('have comments') packet["comments"]={ 'html':lambda:render_template("userpage_comments.html", v=None, u=user, comments=comments, page=1, next_exists=False), 'json':lambda:[x.self_download_json for x in comments] } print('post_upvotes') upvote_query=db.query(Vote.submission_id).filter_by(user_id=user.id, vote_type=1).order_by(Vote.id.desc()).all() upvote_posts=get_posts([i[0] for i in upvote_query], v=user) upvote_posts=[i for i in upvote_posts] for post in upvote_posts: post.__dict__['voted']=1 packet['upvoted_posts']={ 'html':lambda:render_template("home.html", v=None, listing=posts, page=1, next_exists=False), 'json':lambda:[x.json_core for x in upvote_posts] } print('post_downvotes') downvote_query=db.query(Vote.submission_id).filter_by(user_id=user.id, vote_type=-1).order_by(Vote.id.desc()).all() downvote_posts=get_posts([i[0] for i in downvote_query], v=user) packet['downvoted_posts']={ 'html':lambda:render_template("home.html", v=None, listing=posts, page=1, next_exists=False), 'json':lambda:[x.json_core for x in downvote_posts] } print('comment_upvotes') upvote_query=db.query(CommentVote.comment_id).filter_by(user_id=user.id, vote_type=1).order_by(CommentVote.id.desc()).all() upvote_comments=get_comments([i[0] for i in upvote_query], v=user) packet["upvoted_comments"]={ 'html':lambda:render_template("notifications.html", v=None, comments=upvote_comments, page=1, next_exists=False), 'json':lambda:[x.json_core for x in upvote_comments] } print('comment_downvotes') downvote_query=db.query(CommentVote.comment_id).filter_by(user_id=user.id, vote_type=-1).order_by(CommentVote.id.desc()).all() downvote_comments=get_comments([i[0] for i in downvote_query], v=user) packet["downvoted_comments"]={ 'html':lambda:render_template("notifications.html", v=None, comments=downvote_comments, page=1, next_exists=False), 'json':lambda:[x.json_core for x in downvote_comments] } # blocked_users=db.query(UserBlock.target_id).filter_by(user_id=user.id).order_by(UserBlock.id.desc()).all() # users=[get_account(base36encode(x[0])) for x in blocked_users] # packet["blocked_users"]={ # "html":lambda:render_template # "json":lambda:[x.json_core for x in users] # } send_mail( user.email, "Your Ruqqus Data", "Your Ruqqus data is attached.", "Your Ruqqus data is attached.", files={f"{user.username}_{entry}.{method}": io.StringIO(convert_file(str(packet[entry][method]()))) for entry in packet} ) print("finished") # @app.route("/my_info", methods=["POST"]) # @auth_required # @validate_formkey # def my_info_post(v): # if not v.is_activated: # return redirect("/settings/security") # method=request.values.get("method","html") # if method not in ['html','json']: # abort(400) # thread=threading.Thread(target=info_packet, args=(v.username,), kwargs={'method':method}) # thread.setDaemon(True) # thread.start() # #info_packet(g.db, v) # return "started" # @app.route("/my_info", methods=["GET"]) # @auth_required # def my_info_get(v): # return render_template("my_info.html", v=v)
def thumbnail_thread(pid, debug=False): #define debug print function def print_(x): if debug: try: print(x) except: pass db = db_session() post = get_post(pid, graceful=True, session=db) if not post: # account for possible follower lag time.sleep(60) post = get_post(pid, session=db) #First, determine the url to go off of #This is the embed url, if the post is allowed to be embedded, and the embedded url starts with http # if post.domain_obj and post.domain_obj.show_thumbnail: # print_("Post is likely hosted image") # fetch_url=post.url # elif post.embed_url and post.embed_url.startswith("https://"): # print_("Post is likely embedded content") # fetch_url=post.embed_url # else: # print_("Post is article content") # fetch_url=post.url fetch_url=post.url #get the content #mimic chrome browser agent headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"} try: print_(f"loading {fetch_url}") x=requests.get(fetch_url, headers=headers) except: print_(f"unable to connect to {fetch_url}") db.close() return False, "Unable to connect to source" if x.status_code != 200: db.close() return False, f"Source returned status {x.status_code}." #if content is image, stick with that. Otherwise, parse html. if x.headers.get("Content-Type","").startswith("text/html"): #parse html, find image, load image soup=BeautifulSoup(x.content, 'html.parser') #parse html #first, set metadata try: meta_title=soup.find('title') if meta_title: post.submission_aux.meta_title=str(meta_title.string)[0:500] meta_desc = soup.find('meta', attrs={"name":"description"}) if meta_desc: post.submission_aux.meta_description=meta_desc['content'][0:1000] if meta_title or meta_desc: db.add(post.submission_aux) db.commit() except Exception as e: print(f"Error while parsing for metadata: {e}") pass #create list of urls to check thumb_candidate_urls=[] #iterate through desired meta tags meta_tags = [ "ruqqus:thumbnail", "twitter:image", "og:image", "thumbnail" ] for tag_name in meta_tags: print_(f"Looking for meta tag: {tag_name}") tag = soup.find( 'meta', attrs={ "name": tag_name, "content": True } ) if not tag: tag = soup.find( 'meta', attrs={ 'property': tag_name, 'content': True } ) if tag: thumb_candidate_urls.append(expand_url(post.url, tag['content'])) #parse html doc for <img> elements for tag in soup.find_all("img", attrs={'src':True}): thumb_candidate_urls.append(expand_url(post.url, tag['src'])) #now we have a list of candidate urls to try for url in thumb_candidate_urls: print_(f"Trying url {url}") try: image_req=requests.get(url, headers=headers) except: print_(f"Unable to connect to candidate url {url}") continue if image_req.status_code >= 400: print_(f"status code {x.status_code}") continue if not image_req.headers.get("Content-Type","").startswith("image/"): print_(f'bad type {image_req.headers.get("Content-Type","")}, try next') continue if image_req.headers.get("Content-Type","").startswith("image/svg"): print_("svg, try next") continue image = PILimage.open(BytesIO(image_req.content)) if image.width < 30 or image.height < 30: print_("image too small, next") continue print_("Image is good, upload it") break else: #getting here means we are out of candidate urls (or there never were any) print_("Unable to find image") db.close() return False, "No usable images" elif x.headers.get("Content-Type","").startswith("image/"): #image is originally loaded fetch_url print_("post url is direct image") image_req=x image = PILimage.open(BytesIO(x.content)) else: print_(f'Unknown content type {x.headers.get("Content-Type")}') db.close() return False, f'Unknown content type {x.headers.get("Content-Type")} for submitted content' print_(f"Have image, uploading") name = f"posts/{post.base36id}/thumb.png" tempname = name.replace("/", "_") with open(tempname, "wb") as file: for chunk in image_req.iter_content(1024): file.write(chunk) aws.upload_from_file(name, tempname, resize=(375, 227)) post.has_thumb = True db.add(post) db.commit() db.close() try: remove(tempname) except FileNotFoundError: pass return True, "Success"