def url(self): return urlunparse(ParseResult(**self.params))
def base_url(self) -> str: """Returns the base url without query string or fragments.""" return urlunparse( ParseResult(self.scheme, self.host, self.path, '', '', ''))
def get_app(config, _app=None, with_external_mods=True, with_flask_admin=True): # Make sure app is a singleton if _app is not None: return _app app = Flask(__name__) # set default client encoding for SQLAlchemy connection parsed_url = urlparse(config["SQLALCHEMY_DATABASE_URI"]) config["SQLALCHEMY_DATABASE_URI"] = ParseResult( parsed_url.scheme, parsed_url.netloc, parsed_url.path, parsed_url.params, urlencode({"client_encoding": "utf-8"}), parsed_url.fragment, ).geturl() app.config.update(config) # Bind app to DB DB.init_app(app) # pass parameters to the usershub authenfication sub-module, DONT CHANGE THIS app.config["DB"] = DB # pass the ID_APP to the submodule to avoid token conflict between app on the same server app.config["ID_APP"] = app.config["ID_APPLICATION_GEONATURE"] with app.app_context(): from geonature.utils.logs import mail_handler if app.config["MAILERROR"]["MAIL_ON_ERROR"]: logging.getLogger().addHandler(mail_handler) # DB.create_all() from pypnusershub.routes import routes app.register_blueprint(routes, url_prefix="/auth") from pypnnomenclature.routes import routes app.register_blueprint(routes, url_prefix="/nomenclatures") if with_flask_admin: from pypnnomenclature.admin import admin from geonature.core.gn_permissions.routes import routes app.register_blueprint(routes, url_prefix="/permissions") from geonature.core.gn_permissions.backoffice.views import routes app.register_blueprint(routes, url_prefix="/permissions_backoffice") from geonature.core.routes import routes app.register_blueprint(routes, url_prefix="") from geonature.core.users.routes import routes app.register_blueprint(routes, url_prefix="/users") from geonature.core.gn_synthese.routes import routes app.register_blueprint(routes, url_prefix="/synthese") from geonature.core.gn_meta.routes import routes app.register_blueprint(routes, url_prefix="/meta") from geonature.core.ref_geo.routes import routes app.register_blueprint(routes, url_prefix="/geo") from geonature.core.gn_exports.routes import routes app.register_blueprint(routes, url_prefix="/exports") from geonature.core.auth.routes import routes app.register_blueprint(routes, url_prefix="/gn_auth") from geonature.core.gn_monitoring.routes import routes app.register_blueprint(routes, url_prefix="/gn_monitoring") from geonature.core.gn_commons.routes import routes app.register_blueprint(routes, url_prefix="/gn_commons") # errors from geonature.core.errors import routes app.wsgi_app = ReverseProxied(app.wsgi_app, script_name=config["API_ENDPOINT"]) CORS(app, supports_credentials=True) # Chargement des mosdules tiers if with_external_mods: for conf, manifest, module in list_and_import_gn_modules(app): app.register_blueprint(module.backend.blueprint.blueprint, url_prefix=conf["MODULE_URL"]) _app = app return app
def _buildurl(scheme="", netloc="", path="", params="", query="", fragment=""): struct = ParseResult(scheme, netloc, path, params, query, fragment) return urlunparse(struct)
def setUp(self): webapp.app.config['TESTING'] = True # to get better error messages self.app = webapp.app.test_client() # set some defaults test_results = [ { 'content': 'first test content', 'title': 'First Test', 'url': 'http://first.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'startpage', 'parsed_url': ParseResult(scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment=''), # noqa }, { 'content': 'second test content', 'title': 'Second Test', 'url': 'http://second.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'youtube', 'parsed_url': ParseResult(scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment=''), # noqa }, ] timings = [{ 'engine': 'startpage', 'total': 0.8, 'load': 0.7 }, { 'engine': 'youtube', 'total': 0.9, 'load': 0.6 }] def search_mock(search_self, *args): search_self.result_container = Mock( get_ordered_results=lambda: test_results, answers=dict(), corrections=set(), suggestions=set(), infoboxes=[], unresponsive_engines=set(), results=test_results, results_number=lambda: 3, results_length=lambda: len(self.test_results), get_timings=lambda: timings, redirect_url=None) self.setattr4test(Search, 'search', search_mock) def get_current_theme_name_mock(override=None): if override: return override return 'legacy' self.setattr4test(webapp, 'get_current_theme_name', get_current_theme_name_mock) self.maxDiff = None # to see full diffs
required=False, type='str', default='http://localhost:8080/'), value=dict(required=False, type='int', default=None), command=dict(required=False, type='str', default=None))) # 接続先のURIに埋め込むパラメータの収集 params = dict() for key in ['command', 'value']: if module.params[key] is not None: params[key] = module.params[key] # 接続先のURIの構築 parsed = urlparse(module.params['base_url']) url = urlunparse( ParseResult(scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path, params='', query=urlencode(params), fragment='')) # HTTPリクエスト発行 request = Request(url) try: with urlopen(request) as response: body = response.read() module.exit_json(changed=True, msg=body.decode('utf-8'), ansible_facts={'url': url}) except URLError as err: module.fail_json(msg=str(err.reason), ansible_facts={'url': url})
def autoversion_filter(filename): asset_version = get_asset_version(filename) if asset_version is None: return filename original = urlparse(filename)._asdict() original.update(query=original.get('query') + '&v=' + asset_version) return ParseResult(**original).geturl()
async def url_resolver(*_args, **_kwargs): return ParseResult( scheme="", netloc="", path="", params="", query="", fragment="" )
def url_root(self) -> str: return urlunparse( ParseResult( self.scheme, self.host, self.path.rsplit('/', 1)[0] + '/', '', '', '', ), )
from os.path import join from page_loader.html import prepare import pytest from requests import exceptions from page_loader.url import is_local, parse HTML = 'tests/fixtures/index.html' NAME_HTML = 'ru-hexlet-io-courses.html' EXPECT_HTML = 'tests/fixtures/download_index.html' IMG = 'tests/fixtures/nodejs.png' URL_IMG = 'https://ru.hexlet.io/assets/professions/nodejs.png' PARSE_URL_IMG = ParseResult( scheme='', netloc='', path='/assets/professions/nodejs.png', params='', query='', fragment='', ) URL = 'https://ru.hexlet.io/courses' NAME_DIR = 'ru-hexlet-io-courses_files' NAME_IMG = 'ru-hexlet-io-assets-professions-nodejs.png' URL_JS = 'https://ru.hexlet.io/packs/js/runtime.js' NAME_JS = "ru-hexlet-io-packs-js-runtime.js" URL_CSS = 'https://ru.hexlet.io/assets/application.css' NAME_CSS = "ru-hexlet-io-assets-application.css" ASSETS = { join(NAME_DIR, NAME_CSS): URL_CSS, join(NAME_DIR, NAME_HTML): URL, join(NAME_DIR, NAME_IMG): URL_IMG, join(NAME_DIR, NAME_JS): URL_JS
def url_preprocess(parsed_url): if (not parsed_url.netloc in MIRRORS): raise Exception("Supported domains are: %s" % MIRRORS) return ParseResult(parsed_url.scheme, MAIN_DOMAIN, parsed_url.path, None, parsed_url.query, None)
def _decorated(request, *args, **kwargs): competition_id = kwargs.pop("competition_id", None) division_id = kwargs.pop("division_id", None) season_id = kwargs.pop("season_id", None) stage_id = kwargs.pop("stage_id", None) team_id = kwargs.pop("team_id", None) venue_id = kwargs.pop("venue_id", None) ground_id = kwargs.pop("ground_id", None) pool_id = kwargs.pop("pool_id", None) match_id = kwargs.pop("match_id", None) datestr = kwargs.pop("datestr", None) timestr = kwargs.pop("timestr", None) if competition_id: competition = get_object_or_404(Competition, pk=competition_id) kwargs["competition"] = competition if season_id: season = get_object_or_404( competition.seasons.select_related( "competition", ).prefetch_related( "divisions__rank_division", "referees__person__user", "referees__club", "timeslots", ), pk=season_id, ) kwargs["season"] = season if division_id: division = get_object_or_404( season.divisions.select_related("season__competition"), pk=division_id, ) kwargs["division"] = division if stage_id: stage = get_object_or_404( division.stages.select_related( "division__season__competition"), pk=stage_id, ) kwargs["stage"] = stage if team_id: team = get_object_or_404( stage.undecided_teams.select_related( "stage__division__season__competition"), pk=team_id, ) kwargs["team"] = team if pool_id: pool = get_object_or_404( stage.pools.select_related( "stage__division__season__competition"), pk=pool_id, ) kwargs["pool"] = pool if match_id: match = get_object_or_404( stage.matches.select_related( "stage__division__season__competition"), pk=match_id, ) kwargs["match"] = match elif team_id: team = get_object_or_404( division.teams.select_related( "division__season__competition"), pk=team_id, ) kwargs["team"] = team elif venue_id: venue = get_object_or_404( season.venues.select_related("season__competition"), pk=venue_id) kwargs["venue"] = venue if ground_id: ground = get_object_or_404( venue.grounds.select_related( "venue__season__competition"), pk=ground_id, ) kwargs["ground"] = ground if match_id: match = get_object_or_404(ground.matches, pk=match_id) kwargs["match"] = match elif match_id: match = get_object_or_404(venue.matches, pk=match_id) kwargs["match"] = match if datestr: kwargs["date"] = parse(datestr).date() if timestr: kwargs["time"] = datetime.time( *time.strptime(timestr, "%H%M")[3:5]) kwargs["base_url"] = ParseResult( "https" if request.is_secure() else "http", request.get_host(), "/", "", "", "", ).geturl() extra_context = kwargs.pop("extra_context", {}) extra_context.update(kwargs) kwargs["extra_context"] = extra_context return f(request, *args, **kwargs)
def strip_scheme(url): from urllib.parse import urlparse, ParseResult parsed_result = urlparse(url) return urlparse(args.target), ParseResult( *(('', '') + parsed_result[2:])).geturl()
def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.""" if not base: return urlparse(url) if not url: return base bscheme, bnetloc, bpath, bparams, bquery, bfragment = base scheme, netloc, path, params, query, fragment = \ urlparse(url, bscheme, allow_fragments) if scheme != bscheme or scheme not in uses_relative: return urlparse(url) if scheme in uses_netloc: if netloc: return ParseResult(scheme, netloc, path, params, query, fragment) netloc = bnetloc if not path and not params: path = bpath params = bparams if not query: query = bquery return ParseResult(scheme, netloc, path, params, query, fragment) base_parts = bpath.split('/') if base_parts[-1] != '': # the last item is not a directory, so will not be taken into account # in resolving the relative path del base_parts[-1] # for rfc3986, ignore all base path should the first character be root. if path[:1] == '/': segments = path.split('/') else: segments = base_parts + path.split('/') # filter out elements that would cause redundant slashes on re-joining # the resolved_path segments[1:-1] = filter(None, segments[1:-1]) resolved_path = [] for seg in segments: if seg == '..': try: resolved_path.pop() except IndexError: # ignore any .. segments that would otherwise cause an IndexError # when popped from resolved_path if resolving for rfc3986 pass elif seg == '.': continue else: resolved_path.append(seg) if segments[-1] in ('.', '..'): # do some post-processing here. if the last segment was a relative dir, # then we need to append the trailing '/' resolved_path.append('') return ParseResult(scheme, netloc, '/'.join(resolved_path) or '/', params, query, fragment)
def handleLogin(): current_app.logger.info('handleLogin [%s]' % request.method) me = None redirectURI = '%s/success' % current_app.config['BASEURL'] fromURI = request.args.get('from_uri') current_app.logger.info('redirectURI [%s] fromURI [%s]' % (redirectURI, fromURI)) form = LoginForm(me='', client_id=current_app.config['CLIENT_ID'], redirect_uri=redirectURI, from_uri=fromURI) if form.validate_on_submit(): current_app.logger.info('me [%s]' % form.me.data) me = 'https://%s/' % baseDomain(form.me.data, includeScheme=False) scope = '' authEndpoints = ninka.indieauth.discoverAuthEndpoints(me) if 'authorization_endpoint' in authEndpoints: authURL = None for url in authEndpoints['authorization_endpoint']: authURL = url break if authURL is not None: if me == current_app.config['BASEURL']: scope = 'post update delete' url = ParseResult( authURL.scheme, authURL.netloc, authURL.path, authURL.params, urllib.urlencode({ 'me': me, 'redirect_uri': form.redirect_uri.data, 'client_id': form.client_id.data, 'scope': scope, 'response_type': 'id' }), authURL.fragment).geturl() if current_app.dbRedis is not None: key = 'login-%s' % me data = current_app.dbRedis.hgetall(key) if data and 'token' in data: # clear any existing auth data current_app.dbRedis.delete('token-%s' % data['token']) current_app.dbRedis.hdel(key, 'token') current_app.dbRedis.hset( key, 'auth_url', ParseResult(authURL.scheme, authURL.netloc, authURL.path, '', '', '').geturl()) current_app.dbRedis.hset(key, 'from_uri', form.from_uri.data) current_app.dbRedis.hset(key, 'redirect_uri', form.redirect_uri.data) current_app.dbRedis.hset(key, 'client_id', form.client_id.data) current_app.dbRedis.hset(key, 'scope', scope) current_app.dbRedis.expire( key, current_app.config['AUTH_TIMEOUT'] ) # expire in N minutes unless successful current_app.logger.info('redirecting to [%s]' % url) return redirect(url) else: return 'insert fancy no auth endpoint found error message here', 403 templateContext = {} templateContext['title'] = 'Sign In' templateContext['form'] = form return render_template('login.jinja', **templateContext)
def urlunsplit(components): _lst = list(components) _lst[4] = urlencode(_lst[4]) return ParseResult(*_lst).geturl()
def _normalise_url(parsed, remove_frag: bool = True): d: dict = parsed._asdict() d["scheme"] = d["scheme"].lower() d["netloc"] = d["netloc"].lower() d["fragment"] = "" return ParseResult(**d)
def __init__(self, username, password, site='https://tutor-qa.openstax.org', email=None, email_username=None, email_password=None, driver_type='chrome', capabilities=None, pasta_user=None, wait_time=DEFAULT_WAIT_TIME, opera_driver='', existing_driver=None, **kwargs): """ Base user constructor. username (string): website username password (string): website password site (string): website URL driver_type (string): web browser type pasta_user (PastaSauce): optional API access for saucelabs capabilities (dict): browser settings; copy object to avoid overwrite Defaults: DesiredCapabilities.ANDROID.copy() DesiredCapabilities.CHROME.copy() DesiredCapabilities.EDGE.copy() DesiredCapabilities.FIREFOX.copy() DesiredCapabilities.HTMLUNIT.copy() DesiredCapabilities.HTMLUNITWITHJS.copy() DesiredCapabilities.INTERNETEXPLORER.copy() DesiredCapabilities.IPAD.copy() DesiredCapabilities.IPHONE.copy() DesiredCapabilities.ORERA.copy() DesiredCapabilities.PHANTOMJS.copy() DesiredCapabilities.SAFARI.copy() Keys: platform browserName version javascriptEnabled wait (int): standard time, in seconds, to wait for Selenium commands opera_driver (string): Chromium location """ self.username = username self.password = password parse = list( urlparse( site if urlparse(site).scheme else '%s%s' % ('//', site) ) ) parse[0] = b'https' for index, value in enumerate(parse): parse[index] = value.decode('utf-8') if isinstance(value, bytes) \ else value parse = ParseResult(*parse) self.url = parse.geturl() self.email = email self.email_username = email_username self.email_password = email_password self.assign = Assignment() super(User, self).__init__(driver_type=driver_type, capabilities=capabilities, pasta_user=pasta_user, wait_time=wait_time, opera_driver=opera_driver, existing_driver=existing_driver, **kwargs)
def url_root(self) -> str: return urlunparse( ParseResult(self.scheme, self.host, self.path.rsplit("/", 1)[0] + "/", "", "", "") )
from tartiflette_plugin_scalars.url import URL @pytest.mark.parametrize( "input_val,exception,output_val", [ (False, TypeError, None), ("", ValueError, None), ("dailymtion", ValueError, None), ( "https://www.dailymotion.com/play", None, ParseResult( scheme="https", netloc="www.dailymotion.com", path="/play", params="", query="", fragment="", ), ), ( ParseResult( scheme="https", netloc="www.dailymotion.com", path="/play", params="", query="", fragment="", ), None, ParseResult(
def prepend_url(url): p = urlparse(url, 'http') netloc = p.netloc or p.path path = p.path if p.netloc else '' p = ParseResult('http', netloc, path, *p[3:]) return p.geturl()
#! /usr/bin/env python3 import json import os from selenium import webdriver from selenium.webdriver.chrome.options import Options from urllib.parse import urlunparse, ParseResult if __name__ == '__main__': user_agent = "Mozilla/5.0 (Linux; Android 4.1.2; SHL21 Build/S4011) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36" opts = Options() opts.add_argument("user-agent=" + user_agent) driver = webdriver.Chrome(chrome_options=opts) try: url = urlunparse( ParseResult(scheme='file', netloc='', path=os.path.abspath('test/missing_image.html'), params='', query='', fragment='')) driver.get(url) driver.save_screenshot('chrome_screenshot.png') for line in driver.get_log('browser'): print(line['message']) finally: driver.close()
def setUp(self): # skip init function (no external HTTP request) def dummy(*args, **kwargs): pass self.setattr4test(searx.search.processors, 'initialize_processor', dummy) from searx import webapp # pylint disable=import-outside-toplevel webapp.app.config['TESTING'] = True # to get better error messages self.app = webapp.app.test_client() # set some defaults test_results = [ { 'content': 'first test content', 'title': 'First Test', 'url': 'http://first.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'startpage', 'parsed_url': ParseResult(scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment=''), # noqa }, { 'content': 'second test content', 'title': 'Second Test', 'url': 'http://second.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'youtube', 'parsed_url': ParseResult(scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment=''), # noqa }, ] timings = [{ 'engine': 'startpage', 'total': 0.8, 'load': 0.7 }, { 'engine': 'youtube', 'total': 0.9, 'load': 0.6 }] def search_mock(search_self, *args): search_self.result_container = Mock( get_ordered_results=lambda: test_results, answers=dict(), corrections=set(), suggestions=set(), infoboxes=[], unresponsive_engines=set(), results=test_results, results_number=lambda: 3, results_length=lambda: len(test_results), get_timings=lambda: timings, redirect_url=None, engine_data={}) self.setattr4test(Search, 'search', search_mock) def get_current_theme_name_mock(override=None): if override: return override return 'oscar' self.setattr4test(webapp, 'get_current_theme_name', get_current_theme_name_mock) self.maxDiff = None # to see full diffs
def removeUrlScheme(url: str) -> str: return ParseResult('', *urlparse(url)[1:]).geturl()[2:]
call = call_string(function_name, arguments) result = webbrowser_result_carver.result( function_name, arguments) print("assert", call, "==", call_value(result)) except Exception: continue from urllib.parse import SplitResult, ParseResult, urlparse, urlsplit if __name__ == '__main__': assert urlparse(url='http://www.example.com', scheme='', allow_fragments=True) == ParseResult( scheme='http', netloc='www.example.com', path='', params='', query='', fragment='') assert urlsplit(url='http://www.example.com', scheme='', allow_fragments=True) == SplitResult( scheme='http', netloc='www.example.com', path='', query='', fragment='') ### Exercise 2: Abstracting Arguments if __name__ == '__main__':
def change_url_scheme(url, scheme): return urlunparse(ParseResult(scheme, *(urlparse(url))[1:]))
def host_url(self) -> str: return urlunparse(ParseResult(self.scheme, self.host, '', '', '', ''))
def submit_post(v): title = request.form.get("title", "") url = request.form.get("url", "") board = get_guild(request.form.get('board', 'general'), graceful=True) if not board: board = get_guild('general') if re.match('^\s*$', title): return render_template("submit.html", v=v, error="Please enter a better title.", title=title, url=url, body=request.form.get("body", ""), b=board) # if len(title)<10: # return render_template("submit.html", # v=v, # error="Please enter a better title.", # title=title, # url=url, # body=request.form.get("body",""), # b=board # ) elif len(title) > 500: return render_template("submit.html", v=v, error="500 character limit for titles.", title=title[0:500], url=url, body=request.form.get("body", ""), b=board) parsed_url = urlparse(url) if not (parsed_url.scheme and parsed_url.netloc) and not request.form.get( "body") and not request.files.get("file", None): return render_template("submit.html", v=v, error="Please enter a URL or some text.", title=title, url=url, body=request.form.get("body", ""), b=board) #sanitize title title = sanitize(title, linkgen=False) #check for duplicate dup = g.db.query(Submission).filter_by(title=title, author_id=v.id, url=url, is_deleted=False, board_id=board.id).first() if dup: return redirect(dup.permalink) #check for domain specific rules parsed_url = urlparse(url) domain = parsed_url.netloc # check ban status domain_obj = get_domain(domain) if domain_obj: if not domain_obj.can_submit: return render_template("submit.html", v=v, error=BAN_REASONS[domain_obj.reason], title=title, url=url, body=request.form.get("body", ""), b=get_guild(request.form.get( "board", "general"), graceful=True)) #check for embeds if domain_obj.embed_function: try: embed = eval(domain_obj.embed_function)(url) except: embed = "" else: embed = "" else: embed = "" #board board_name = request.form.get("board", "general") board_name = board_name.lstrip("+") board_name = board_name.rstrip() board = get_guild(board_name, graceful=True) if not board: board = get_guild('general') if board.is_banned: return render_template("submit.html", v=v, error=f"+{board.name} has been demolished.", title=title, url=url, body=request.form.get("body", ""), b=get_guild("general", graceful=True)), 403 if board.has_ban(v): return render_template("submit.html", v=v, error=f"You are exiled from +{board.name}.", title=title, url=url, body=request.form.get("body", ""), b=get_guild("general")), 403 if (board.restricted_posting or board.is_private) and not (board.can_submit(v)): return render_template( "submit.html", v=v, error=f"You are not an approved contributor for +{board.name}.", title=title, url=url, body=request.form.get("body", ""), b=get_guild(request.form.get("board", "general"), graceful=True)) user_id = v.id user_name = v.username #Force https for submitted urls if request.form.get("url"): new_url = ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=parsed_url.query, fragment=parsed_url.fragment) url = urlunparse(new_url) else: url = "" #now make new post body = request.form.get("body", "") #catch too-long body if len(str(body)) > 10000: return render_template("submit.html", v=v, error="10000 character limit for text body", title=title, text=str(body)[0:10000], url=url, b=get_guild(request.form.get( "board", "general"), graceful=True)), 400 if len(url) > 2048: return render_template("submit.html", v=v, error="URLs cannot be over 2048 characters", title=title, text=body[0:2000], b=get_guild(request.form.get( "board", "general"), graceful=True)), 400 with CustomRenderer() as renderer: body_md = renderer.render(mistletoe.Document(body)) body_html = sanitize(body_md, linkgen=True) #check for embeddable video domain = parsed_url.netloc if url: repost = g.db.query(Submission).filter( Submission.url.ilike(url)).filter_by( board_id=board.id, is_deleted=False, is_banned=False).order_by(Submission.id.asc()).first() else: repost = None if request.files.get('file') and not v.can_submit_image: abort(403) new_post = Submission( title=title, url=url, author_id=user_id, body=body, body_html=body_html, embed_url=embed, domain_ref=domain_obj.id if domain_obj else None, board_id=board.id, original_board_id=board.id, over_18=(bool(request.form.get("over_18", "")) or board.over_18), post_public=not board.is_private, #author_name=user_name, #guild_name=board.name, repost_id=repost.id if repost else None) new_post.determine_offensive() g.db.add(new_post) g.db.commit() g.db.begin() vote = Vote(user_id=user_id, vote_type=1, submission_id=new_post.id) g.db.add(vote) #check for uploaded image if request.files.get('file'): file = request.files['file'] name = f'post/{new_post.base36id}/{secrets.token_urlsafe(8)}' upload_file(name, file) #update post data new_post.url = f'https://{BUCKET}/{name}' new_post.is_image = True new_post.domain_ref = 1 #id of i.ruqqus.com domain g.db.add(new_post) #spin off thumbnail generation and csam detection as new threads elif new_post.url: new_thread = threading.Thread(target=thumbnail_thread, args=(new_post.base36id, )) new_thread.start() csam_thread = threading.Thread(target=check_csam, args=(new_post, )) csam_thread.start() #expire the relevant caches: front page new, board new #cache.delete_memoized(frontlist, sort="new") cache.delete_memoized(Board.idlist, board, sort="new") #print(f"Content Event: @{new_post.author.username} post {new_post.base36id}") return redirect(new_post.permalink)
def submit_post(v): title=request.form.get("title","") title=title.lstrip().rstrip() url=request.form.get("url","") board=get_guild(request.form.get('board','general'), graceful=True) if not board: board=get_guild('general') if not title: return render_template("submit.html", v=v, error="Please enter a better title.", title=title, url=url, body=request.form.get("body",""), b=board ) # if len(title)<10: # return render_template("submit.html", # v=v, # error="Please enter a better title.", # title=title, # url=url, # body=request.form.get("body",""), # b=board # ) elif len(title)>500: return render_template("submit.html", v=v, error="500 character limit for titles.", title=title[0:500], url=url, body=request.form.get("body",""), b=board ) parsed_url=urlparse(url) if not (parsed_url.scheme and parsed_url.netloc) and not request.form.get("body") and not request.files.get("file",None): return render_template("submit.html", v=v, error="Please enter a URL or some text.", title=title, url=url, body=request.form.get("body",""), b=board ) #sanitize title title=bleach.clean(title) #Force https for submitted urls if request.form.get("url"): new_url=ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=parsed_url.query, fragment=parsed_url.fragment) url=urlunparse(new_url) else: url="" body=request.form.get("body","") #check for duplicate dup = g.db.query(Submission).join(Submission.submission_aux).filter( Submission.author_id==v.id, Submission.is_deleted==False, Submission.board_id==board.id, SubmissionAux.title==title, SubmissionAux.url==url, SubmissionAux.body==body ).first() if dup: return redirect(dup.permalink) #check for domain specific rules parsed_url=urlparse(url) domain=parsed_url.netloc # check ban status domain_obj=get_domain(domain) if domain_obj: if not domain_obj.can_submit: return render_template("submit.html", v=v, error=BAN_REASONS[domain_obj.reason], title=title, url=url, body=request.form.get("body",""), b=get_guild(request.form.get("board","general"), graceful=True) ) #check for embeds if domain_obj.embed_function: try: embed=eval(domain_obj.embed_function)(url) except: embed="" else: embed="" else: embed="" #board board_name=request.form.get("board","general") board_name=board_name.lstrip("+") board_name=board_name.rstrip() board=get_guild(board_name, graceful=True) if not board: board=get_guild('general') if board.is_banned: return render_template("submit.html", v=v, error=f"+{board.name} has been demolished.", title=title, url=url , body=request.form.get("body",""), b=get_guild("general", graceful=True) ), 403 if board.has_ban(v): return render_template("submit.html", v=v, error=f"You are exiled from +{board.name}.", title=title, url=url , body=request.form.get("body",""), b=get_guild("general") ), 403 if (board.restricted_posting or board.is_private) and not (board.can_submit(v)): return render_template("submit.html", v=v, error=f"You are not an approved contributor for +{board.name}.", title=title, url=url, body=request.form.get("body",""), b=get_guild(request.form.get("board","general"), graceful=True ) ) #similarity check now=int(time.time()) cutoff=now-60*60*24 similar_posts = g.db.query(Submission).options( lazyload('*') ).join(Submission.submission_aux ).filter( Submission.author_id==v.id, SubmissionAux.title.op('<->')(title)<app.config["SPAM_SIMILARITY_THRESHOLD"], Submission.created_utc>cutoff ).all() if url: similar_urls=g.db.query(Submission).options( lazyload('*') ).join(Submission.submission_aux ).filter( Submission.author_id==v.id, SubmissionAux.url.op('<->')(url)<app.config["SPAM_URL_SIMILARITY_THRESHOLD"], Submission.created_utc>cutoff ).all() else: similar_urls=[] threshold = app.config["SPAM_SIMILAR_COUNT_THRESHOLD"] if v.age >= (60*60*24*30): threshold *= 4 elif v.age >=(60*60*24*7): threshold *= 3 elif v.age >=(60*60*24): threshold *= 2 if max(len(similar_urls), len(similar_posts)) >= threshold: text="Your Ruqqus account has been suspended for 1 day for the following reason:\n\n> Too much spam!" send_notification(v, text) v.ban(reason="Spamming.", include_alts=True, days=1) for post in similar_posts+similar_urls: post.is_banned=True post.ban_reason="Automatic spam removal. This happened because the post's creator submitted too much similar content too quickly." g.db.add(post) g.db.commit() return redirect("/notifications") #catch too-long body if len(str(body))>10000: return render_template("submit.html", v=v, error="10000 character limit for text body", title=title, text=str(body)[0:10000], url=url, b=get_guild(request.form.get("board","general"), graceful=True ) ), 400 if len(url)>2048: return render_template("submit.html", v=v, error="URLs cannot be over 2048 characters", title=title, text=body[0:2000], b=get_guild(request.form.get("board","general"), graceful=True) ), 400 #render text with CustomRenderer() as renderer: body_md=renderer.render(mistletoe.Document(body)) body_html = sanitize(body_md, linkgen=True) ##check spam soup=BeautifulSoup(body_html, features="html.parser") links=[x['href'] for x in soup.find_all('a') if x.get('href')] if url: links=[url]+links check_links=[] for link in links: parse_link=urlparse(link) check_url=ParseResult(scheme="https", netloc=parse_link.netloc, path=parse_link.path, params=parse_link.params, query=parse_link.query, fragment='') check_links.append(urlunparse(check_url)) badlink=g.db.query(BadLink).filter(BadLink.link.in_(tuple(check_links))).first() if badlink: if badlink.autoban: text="Your Ruqqus account has been suspended for 1 day for the following reason:\n\n> Too much spam!" send_notification(v, text) v.ban(days=1, reason="spam") return redirect('/notifications') else: return render_template("submit.html", v=v, error=f"The link `{badlink.link}` is not allowed. Reason: {badlink.reason}", title=title, text=body[0:2000], b=get_guild(request.form.get("board","general"), graceful=True) ), 400 #check for embeddable video domain=parsed_url.netloc if url: repost = g.db.query(Submission).join(Submission.submission_aux).filter( SubmissionAux.url.ilike(url), Submission.board_id==board.id, Submission.is_deleted==False, Submission.is_banned==False ).order_by( Submission.id.asc() ).first() else: repost=None if request.files.get('file') and not v.can_submit_image: abort(403) #offensive for x in g.db.query(BadWord).all(): if (body and x.check(body)) or x.check(title): is_offensive=True break else: is_offensive=False new_post=Submission(author_id=v.id, domain_ref=domain_obj.id if domain_obj else None, board_id=board.id, original_board_id=board.id, over_18=(bool(request.form.get("over_18","")) or board.over_18), post_public=not board.is_private, repost_id=repost.id if repost else None, is_offensive=is_offensive ) g.db.add(new_post) g.db.flush() new_post_aux=SubmissionAux(id=new_post.id, url=url, body=body, body_html=body_html, embed_url=embed, title=title ) g.db.add(new_post_aux) g.db.flush() vote=Vote(user_id=v.id, vote_type=1, submission_id=new_post.id ) g.db.add(vote) g.db.flush() g.db.commit() g.db.refresh(new_post) #check for uploaded image if request.files.get('file'): file=request.files['file'] name=f'post/{new_post.base36id}/{secrets.token_urlsafe(8)}' upload_file(name, file) #thumb_name=f'posts/{new_post.base36id}/thumb.png' #upload_file(name, file, resize=(375,227)) #update post data new_post.url=f'https://{BUCKET}/{name}' new_post.is_image=True new_post.domain_ref=1 #id of i.ruqqus.com domain g.db.add(new_post) g.db.commit() #spin off thumbnail generation and csam detection as new threads if new_post.url or request.files.get('file'): new_thread=threading.Thread(target=thumbnail_thread, args=(new_post.base36id,) ) new_thread.start() csam_thread = threading.Thread(target=check_csam, args=(new_post,)) csam_thread.start() #expire the relevant caches: front page new, board new #cache.delete_memoized(frontlist, sort="new") g.db.commit() cache.delete_memoized(Board.idlist, board, sort="new") #print(f"Content Event: @{new_post.author.username} post {new_post.base36id}") return redirect(new_post.permalink)
def submit_post(v): title = request.form.get("title", "").lstrip().rstrip() title = title.lstrip().rstrip() title = title.replace("\n", "") title = title.replace("\r", "") title = title.replace("\t", "") url = request.form.get("url", "") board = get_guild(request.form.get('board', 'general'), graceful=True) if not board: board = get_guild('general') if not title: return { "html": lambda: (render_template("submit.html", v=v, error="Please enter a better title.", title=title, url=url, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": "Please enter a better title" }, 400) } # if len(title)<10: # return render_template("submit.html", # v=v, # error="Please enter a better title.", # title=title, # url=url, # body=request.form.get("body",""), # b=board # ) elif len(title) > 500: return { "html": lambda: (render_template("submit.html", v=v, error="500 character limit for titles.", title=title[0:500], url=url, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": "500 character limit for titles" }, 400) } parsed_url = urlparse(url) if not (parsed_url.scheme and parsed_url.netloc) and not request.form.get( "body") and not request.files.get("file", None): return { "html": lambda: (render_template("submit.html", v=v, error="Please enter a url or some text.", title=title, url=url, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": "`url` or `body` parameter required." }, 400) } # sanitize title title = bleach.clean(title, tags=[]) # Force https for submitted urls if request.form.get("url"): new_url = ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=parsed_url.query, fragment=parsed_url.fragment) url = urlunparse(new_url) else: url = "" body = request.form.get("body", "") # check for duplicate dup = g.db.query(Submission).join(Submission.submission_aux).filter( Submission.author_id == v.id, Submission.deleted_utc == 0, Submission.board_id == board.id, SubmissionAux.title == title, SubmissionAux.url == url, SubmissionAux.body == body).first() if dup: return redirect(dup.permalink) # check for domain specific rules parsed_url = urlparse(url) domain = parsed_url.netloc # check ban status domain_obj = get_domain(domain) if domain_obj: if not domain_obj.can_submit: if domain_obj.reason == 4: v.ban(days=30, reason="Digitally malicious content") elif domain_obj.reason == 7: v.ban(reason="Sexualizing minors") return { "html": lambda: (render_template("submit.html", v=v, error=BAN_REASONS[domain_obj.reason], title=title, url=url, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": BAN_REASONS[domain_obj.reason] }, 400) } # check for embeds if domain_obj.embed_function: try: embed = eval(domain_obj.embed_function)(url) except BaseException: embed = "" else: embed = "" else: embed = "" # board board_name = request.form.get("board", "general") board_name = board_name.lstrip("+") board_name = board_name.rstrip() board = get_guild(board_name, graceful=True) if not board: board = get_guild('general') if board.is_banned: return { "html": lambda: (render_template("submit.html", v=v, error=f"+{board.name} has been banned.", title=title, url=url, body=request.form.get("body", ""), b=get_guild("general", graceful=True)), 403), "api": lambda: (jsonify( {"error": f"403 Forbidden - +{board.name} has been banned."})) } if board.has_ban(v): return { "html": lambda: (render_template("submit.html", v=v, error=f"You are exiled from +{board.name}.", title=title, url=url, body=request.form.get("body", ""), b=get_guild("general")), 403), "api": lambda: (jsonify({ "error": f"403 Not Authorized - You are exiled from +{board.name}" }), 403) } if (board.restricted_posting or board.is_private) and not (board.can_submit(v)): return { "html": lambda: (render_template( "submit.html", v=v, error= f"You are not an approved contributor for +{board.name}.", title=title, url=url, body=request.form.get("body", ""), b=get_guild(request.form.get("board", "general"), graceful=True)), 403), "api": lambda: (jsonify({ "error": f"403 Not Authorized - You are not an approved contributor for +{board.name}" }), 403) } # similarity check now = int(time.time()) cutoff = now - 60 * 60 * 24 similar_posts = g.db.query(Submission).options(lazyload('*')).join( Submission.submission_aux ).filter( #or_( # and_( Submission.author_id == v.id, SubmissionAux.title.op('<->')(title) < app.config["SPAM_SIMILARITY_THRESHOLD"], Submission.created_utc > cutoff # ), # and_( # SubmissionAux.title.op('<->')(title) < app.config["SPAM_SIMILARITY_THRESHOLD"]/2, # Submission.created_utc > cutoff # ) #) ).all() if url: similar_urls = g.db.query(Submission).options(lazyload('*')).join( Submission.submission_aux ).filter( #or_( # and_( Submission.author_id == v.id, SubmissionAux.url.op('<->')(url) < app.config["SPAM_URL_SIMILARITY_THRESHOLD"], Submission.created_utc > cutoff # ), # and_( # SubmissionAux.url.op('<->')(url) < app.config["SPAM_URL_SIMILARITY_THRESHOLD"]/2, # Submission.created_utc > cutoff # ) #) ).all() else: similar_urls = [] threshold = app.config["SPAM_SIMILAR_COUNT_THRESHOLD"] if v.age >= (60 * 60 * 24 * 7): threshold *= 3 elif v.age >= (60 * 60 * 24): threshold *= 2 if max(len(similar_urls), len(similar_posts)) >= threshold: text = "Your Ruqqus account has been suspended for 1 day for the following reason:\n\n> Too much spam!" send_notification(v, text) v.ban(reason="Spamming.", days=1) for alt in v.alts: if not alt.is_suspended: alt.ban(reason="Spamming.", days=1) for post in similar_posts + similar_urls: post.is_banned = True post.is_pinned = False post.ban_reason = "Automatic spam removal. This happened because the post's creator submitted too much similar content too quickly." g.db.add(post) ma = ModAction(user_id=1, target_submission_id=post.id, kind="ban_post", board_id=post.board_id, note="spam") g.db.add(ma) g.db.commit() return redirect("/notifications") # catch too-long body if len(str(body)) > 10000: return { "html": lambda: (render_template("submit.html", v=v, error="10000 character limit for text body.", title=title, url=url, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": "10000 character limit for text body." }, 400) } if len(url) > 2048: return { "html": lambda: (render_template("submit.html", v=v, error="2048 character limit for URLs.", title=title, url=url, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": "2048 character limit for URLs." }, 400) } # render text body = preprocess(body) with CustomRenderer() as renderer: body_md = renderer.render(mistletoe.Document(body)) body_html = sanitize(body_md, linkgen=True) # Run safety filter bans = filter_comment_html(body_html) if bans: ban = bans[0] reason = f"Remove the {ban.domain} link from your post and try again." if ban.reason: reason += f" {ban.reason_text}" #auto ban for digitally malicious content if any([x.reason == 4 for x in bans]): v.ban(days=30, reason="Digitally malicious content is not allowed.") abort(403) return { "html": lambda: (render_template("submit.html", v=v, error=reason, title=title, url=url, body=request.form.get("body", ""), b=board), 403), "api": lambda: ({ "error": reason }, 403) } # check spam soup = BeautifulSoup(body_html, features="html.parser") links = [x['href'] for x in soup.find_all('a') if x.get('href')] if url: links = [url] + links for link in links: parse_link = urlparse(link) check_url = ParseResult(scheme="https", netloc=parse_link.netloc, path=parse_link.path, params=parse_link.params, query=parse_link.query, fragment='') check_url = urlunparse(check_url) badlink = g.db.query(BadLink).filter( literal(check_url).contains(BadLink.link)).first() if badlink: if badlink.autoban: text = "Your Ruqqus account has been suspended for 1 day for the following reason:\n\n> Too much spam!" send_notification(v, text) v.ban(days=1, reason="spam") return redirect('/notifications') else: return { "html": lambda: (render_template( "submit.html", v=v, error= f"The link `{badlink.link}` is not allowed. Reason: {badlink.reason}.", title=title, url=url, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": f"The link `{badlink.link}` is not allowed. Reason: {badlink.reason}" }, 400) } # check for embeddable video domain = parsed_url.netloc if url: repost = g.db.query(Submission).join(Submission.submission_aux).filter( SubmissionAux.url.ilike(url), Submission.board_id == board.id, Submission.deleted_utc == 0, Submission.is_banned == False).order_by( Submission.id.asc()).first() else: repost = None if repost and request.values.get("no_repost"): return redirect(repost.permalink) if request.files.get('file') and not v.can_submit_image: abort(403) # offensive is_offensive = False for x in g.db.query(BadWord).all(): if (body and x.check(body)) or x.check(title): is_offensive = True break new_post = Submission(author_id=v.id, domain_ref=domain_obj.id if domain_obj else None, board_id=board.id, original_board_id=board.id, over_18=(bool(request.form.get("over_18", "")) or board.over_18), post_public=not board.is_private, repost_id=repost.id if repost else None, is_offensive=is_offensive, app_id=v.client.application.id if v.client else None, creation_region=request.headers.get("cf-ipcountry"), is_bot=request.headers.get("X-User-Type") == "Bot") g.db.add(new_post) g.db.flush() new_post_aux = SubmissionAux(id=new_post.id, url=url, body=body, body_html=body_html, embed_url=embed, title=title) g.db.add(new_post_aux) g.db.flush() vote = Vote(user_id=v.id, vote_type=1, submission_id=new_post.id) g.db.add(vote) g.db.flush() g.db.refresh(new_post) # check for uploaded image if request.files.get('file'): #check file size if request.content_length > 16 * 1024 * 1024 and not v.has_premium: g.db.rollback() abort(413) file = request.files['file'] if not file.content_type.startswith('image/'): return { "html": lambda: (render_template("submit.html", v=v, error=f"Image files only.", title=title, body=request.form.get("body", ""), b=board), 400), "api": lambda: ({ "error": f"Image files only" }, 400) } name = f'post/{new_post.base36id}/{secrets.token_urlsafe(8)}' upload_file(name, file) # thumb_name=f'posts/{new_post.base36id}/thumb.png' #upload_file(name, file, resize=(375,227)) # update post data new_post.url = f'https://{BUCKET}/{name}' new_post.is_image = True new_post.domain_ref = 1 # id of i.ruqqus.com domain g.db.add(new_post) #csam detection def del_function(db): delete_file(name) new_post.is_banned = True db.add(new_post) db.commit() ma = ModAction(kind="ban_post", user_id=1, note="banned image", target_submission_id=new_post.id) db.add(ma) db.commit() csam_thread = threading.Thread( target=check_csam_url, args=(f"https://{BUCKET}/{name}", v, lambda: del_function(db=db_session()))) csam_thread.start() g.db.commit() # spin off thumbnail generation and csam detection as new threads if (new_post.url or request.files.get('file')) and ( v.is_activated or request.headers.get('cf-ipcountry') != "T1"): new_thread = threading.Thread(target=thumbnail_thread, args=(new_post.base36id, )) new_thread.start() # expire the relevant caches: front page new, board new cache.delete_memoized(frontlist) g.db.commit() cache.delete_memoized(Board.idlist, board, sort="new") # print(f"Content Event: @{new_post.author.username} post # {new_post.base36id}") return { "html": lambda: redirect(new_post.permalink), "api": lambda: jsonify(new_post.json) }