def webathena_kerberos_login( request: HttpRequest, user_profile: UserProfile, cred: Optional[str] = REQ(default=None) ) -> HttpResponse: global kerberos_alter_egos if cred is None: return json_error(_("Could not find Kerberos credential")) if not user_profile.realm.webathena_enabled: return json_error(_("Webathena login not enabled")) try: parsed_cred = orjson.loads(cred) user = parsed_cred["cname"]["nameString"][0] if user in kerberos_alter_egos: user = kerberos_alter_egos[user] assert (user == user_profile.email.split("@")[0]) # Limit characters in usernames to valid MIT usernames # This is important for security since DNS is not secure. assert (re.match(r'^[a-z0-9_.-]+$', user) is not None) ccache = make_ccache(parsed_cred) # 'user' has been verified to contain only benign characters that won't # help with shell injection. user = mark_sanitized(user) # 'ccache' is only written to disk by the script and used as a kerberos # credential cache file. ccache = mark_sanitized(ccache) except Exception: return json_error(_("Invalid Kerberos cache")) # TODO: Send these data via (say) rabbitmq try: api_key = get_api_key(user_profile) command = [ "/home/zulip/python-zulip-api/zulip/integrations/zephyr/process_ccache", user, api_key, base64.b64encode(ccache).decode("utf-8"), ] subprocess.check_call([ "ssh", settings.PERSONAL_ZMIRROR_SERVER, "--", " ".join(map(shlex.quote, command)) ]) except subprocess.CalledProcessError: logging.exception("Error updating the user's ccache", stack_info=True) return json_error(_("We were unable to setup mirroring for you")) return json_success()
def _handle_consume_exception(self, events: List[Dict[str, Any]], exception: Exception) -> None: if isinstance(exception, InterruptConsumeException): # The exception signals that no further error handling # is needed and the worker can proceed. return with configure_scope() as scope: scope.set_context( "events", { "data": events, "queue_name": self.queue_name, }, ) if isinstance(exception, WorkerTimeoutException): with sentry_sdk.push_scope() as scope: scope.fingerprint = ["worker-timeout", self.queue_name] logging.exception(exception, stack_info=True) else: logging.exception("Problem handling data on queue %s", self.queue_name, stack_info=True) if not os.path.exists(settings.QUEUE_ERROR_DIR): os.mkdir(settings.QUEUE_ERROR_DIR) # nocoverage # Use 'mark_sanitized' to prevent Pysa from detecting this false positive # flow. 'queue_name' is always a constant string. fname = mark_sanitized(f"{self.queue_name}.errors") fn = os.path.join(settings.QUEUE_ERROR_DIR, fname) line = f"{time.asctime()}\t{orjson.dumps(events).decode()}\n" lock_fn = fn + ".lock" with lockfile(lock_fn): with open(fn, "a") as f: f.write(line) check_and_send_restart_signal()
def get_link_embed_data( url: str, maxwidth: Optional[int] = 640, maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]: if not is_link(url): return None if not valid_content_type(url): return None # We are using two different mechanisms to get the embed data # 1. Use OEmbed data, if found, for photo and video "type" sites # 2. Otherwise, use a combination of Open Graph tags and Meta tags data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {} if data.get('oembed'): return data response = requests.get(mark_sanitized(url), stream=True, headers=HEADERS, timeout=TIMEOUT) if response.ok: og_data = OpenGraphParser(response.text).extract_data() for key in ['title', 'description', 'image']: if not data.get(key) and og_data.get(key): data[key] = og_data[key] generic_data = GenericParser(response.text).extract_data() or {} for key in ['title', 'description', 'image']: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def _handle_consume_exception(self, events: List[Dict[str, Any]], exception: Exception) -> None: with configure_scope() as scope: scope.set_context("events", { "data": events, "queue_name": self.queue_name, }) if isinstance(exception, WorkerTimeoutException): with sentry_sdk.push_scope() as scope: scope.fingerprint = ['worker-timeout', self.queue_name] logging.exception("%s in queue %s", str(exception), self.queue_name, stack_info=True) else: logging.exception("Problem handling data on queue %s", self.queue_name, stack_info=True) if not os.path.exists(settings.QUEUE_ERROR_DIR): os.mkdir(settings.QUEUE_ERROR_DIR) # nocoverage # Use 'mark_sanitized' to prevent Pysa from detecting this false positive # flow. 'queue_name' is always a constant string. fname = mark_sanitized(f'{self.queue_name}.errors') fn = os.path.join(settings.QUEUE_ERROR_DIR, fname) line = f'{time.asctime()}\t{orjson.dumps(events).decode()}\n' lock_fn = fn + '.lock' with lockfile(lock_fn): with open(fn, 'a') as f: f.write(line) check_and_send_restart_signal()
def _index_for(self, minified_src: str) -> sourcemap.SourceMapDecoder: '''Return the source map index for minified_src, loading it if not already loaded.''' # Prevent path traversal assert ".." not in minified_src and "/" not in minified_src if minified_src not in self._indices: for source_dir in self._dirs: filename = os.path.join(source_dir, minified_src + '.map') if os.path.isfile(filename): # Use 'mark_sanitized' to force Pysa to ignore the fact that # 'filename' is user controlled. While putting user # controlled data into a filesystem operation is bad, in # this case it's benign because 'filename' can't traverse # directories outside of the pre-configured 'sourcemap_dirs' # (due to the above assertions) and will always end in # '.map'. Additionally, the result of this function is used # for error logging and not returned to the user, so # controlling the loaded file would not be useful to an # attacker. with open(mark_sanitized(filename)) as fp: self._indices[minified_src] = sourcemap.load(fp) break return self._indices[minified_src]
def check_add_realm_emoji(realm: Realm, name: str, author: UserProfile, image_file: IO[bytes]) -> RealmEmoji: try: realm_emoji = RealmEmoji(realm=realm, name=name, author=author) realm_emoji.full_clean() realm_emoji.save() except django.db.utils.IntegrityError: # Match the string in upload_emoji. raise JsonableError(_("A custom emoji with this name already exists.")) emoji_file_name = get_emoji_file_name(image_file.name, realm_emoji.id) # The only user-controlled portion of 'emoji_file_name' is an extension, # which can not contain '..' or '/' or '\', making it difficult to exploit emoji_file_name = mark_sanitized(emoji_file_name) emoji_uploaded_successfully = False is_animated = False try: is_animated = upload_emoji_image(image_file, emoji_file_name, author) emoji_uploaded_successfully = True finally: if not emoji_uploaded_successfully: realm_emoji.delete() realm_emoji.file_name = emoji_file_name realm_emoji.is_animated = is_animated realm_emoji.save(update_fields=["file_name", "is_animated"]) notify_realm_emoji(realm_emoji.realm) return realm_emoji
def get_link_embed_data(url: str, maxwidth: int = 640, maxheight: int = 480) -> Optional[Dict[str, Any]]: if not is_link(url): return None if not valid_content_type(url): return None # We are using two different mechanisms to get the embed data # 1. Use OEmbed data, if found, for photo and video "type" sites # 2. Otherwise, use a combination of Open Graph tags and Meta tags data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {} if data.get("oembed"): return data response = PreviewSession().get(mark_sanitized(url), stream=True) if response.ok: og_data = OpenGraphParser( response.content, response.headers.get("Content-Type")).extract_data() for key in ["title", "description", "image"]: if not data.get(key) and og_data.get(key): data[key] = og_data[key] generic_data = (GenericParser( response.content, response.headers.get("Content-Type")).extract_data() or {}) for key in ["title", "description", "image"]: if not data.get(key) and generic_data.get(key): data[key] = generic_data[key] return data
def get_link_embed_data(url: str, maxwidth: int = 640, maxheight: int = 480) -> Optional[UrlEmbedData]: if not is_link(url): return None if not valid_content_type(url): return None # The oembed data from pyoembed may be complete enough to return # as-is; if so, we use it. Otherwise, we use it as a _base_ for # the other, less sophisticated techniques which we apply as # successive fallbacks. data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) if data is not None and isinstance(data, UrlOEmbedData): return data response = PreviewSession().get(mark_sanitized(url), stream=True) if not response.ok: return None if data is None: data = UrlEmbedData() for parser_class in (OpenGraphParser, GenericParser): parser = parser_class(response.content, response.headers.get("Content-Type")) data.merge(parser.extract_data()) if data.image: data.image = urljoin(response.url, data.image) return data
def get_safe_redirect_to(url: str, redirect_host: str) -> str: is_url_safe = url_has_allowed_host_and_scheme(url=url, allowed_hosts=None) if is_url_safe: # Mark as safe to prevent Pysa from surfacing false positives for # open redirects. In this branch, we have already checked that the URL # points to the specified 'redirect_host', or is relative. return urllib.parse.urljoin(redirect_host, mark_sanitized(url)) else: return redirect_host
def get_zoom_sid(request: HttpRequest) -> str: # This is used to prevent CSRF attacks on the Zoom OAuth # authentication flow. We want this value to be unpredictable and # tied to the session, but we don’t want to expose the main CSRF # token directly to the Zoom server. csrf.get_token(request) # Use 'mark_sanitized' to cause Pysa to ignore the flow of user controlled # data out of this function. 'request.META' is indeed user controlled, but # post-HMAC output is no longer meaningfully controllable. return mark_sanitized( "" if getattr(request, "_dont_enforce_csrf_checks", False) else salted_hmac("Zulip Zoom sid", request.META["CSRF_COOKIE"]).hexdigest())
def _handle_consume_exception(self, events: List[Dict[str, Any]]) -> None: self._log_problem() if not os.path.exists(settings.QUEUE_ERROR_DIR): os.mkdir(settings.QUEUE_ERROR_DIR) # nocoverage # Use 'mark_sanitized' to prevent Pysa from detecting this false positive # flow. 'queue_name' is always a constant string. fname = mark_sanitized(f'{self.queue_name}.errors') fn = os.path.join(settings.QUEUE_ERROR_DIR, fname) line = f'{time.asctime()}\t{orjson.dumps(events).decode()}\n' lock_fn = fn + '.lock' with lockfile(lock_fn): with open(fn, 'ab') as f: f.write(line.encode('utf-8')) check_and_send_restart_signal()
def login_and_go_to_home(request: HttpRequest, user_profile: UserProfile) -> HttpResponse: mobile_flow_otp = get_expirable_session_var(request.session, 'registration_mobile_flow_otp', delete=True) desktop_flow_otp = get_expirable_session_var(request.session, 'registration_desktop_flow_otp', delete=True) if mobile_flow_otp is not None: return finish_mobile_flow(request, user_profile, mobile_flow_otp) elif desktop_flow_otp is not None: return finish_desktop_flow(request, user_profile, desktop_flow_otp) do_login(request, user_profile) # Using 'mark_sanitized' to work around false positive where Pysa thinks # that 'user_profile' is user-controlled return HttpResponseRedirect(mark_sanitized(user_profile.realm.uri) + reverse('home'))
def add_query_arg_to_redirect_url(original_url: str, query_arg: str) -> str: assert '?' in original_url # Using 'mark_sanitized' because user-controlled data after the '?' is # not relevant for open redirects return original_url + "&" + mark_sanitized(query_arg)
def maybe_send_to_registration( request: HttpRequest, email: str, full_name: str = '', mobile_flow_otp: Optional[str] = None, desktop_flow_otp: Optional[str] = None, is_signup: bool = False, password_required: bool = True, multiuse_object_key: str = '', full_name_validated: bool = False) -> HttpResponse: """Given a successful authentication for an email address (i.e. we've confirmed the user controls the email address) that does not currently have a Zulip account in the target realm, send them to the registration flow or the "continue to registration" flow, depending on is_signup, whether the email address can join the organization (checked in HomepageForm), and similar details. """ # In the desktop and mobile registration flows, the sign up # happens in the browser so the user can use their # already-logged-in social accounts. Then at the end, with the # user account created, we pass the appropriate data to the app # via e.g. a `zulip://` redirect. We store the OTP keys for the # mobile/desktop flow in the session with 1-hour expiry, because # we want this configuration of having a successful authentication # result in being logged into the app to persist if the user makes # mistakes while trying to authenticate (E.g. clicks the wrong # Google account, hits back, etc.) during a given browser session, # rather than just logging into the webapp in the target browser. # # We can't use our usual pre-account-creation state storage # approach of putting something in PreregistrationUser, because # that would apply to future registration attempts on other # devices, e.g. just creating an account on the web on their laptop. assert not (mobile_flow_otp and desktop_flow_otp) if mobile_flow_otp: set_expirable_session_var(request.session, 'registration_mobile_flow_otp', mobile_flow_otp, expiry_seconds=3600) elif desktop_flow_otp: set_expirable_session_var(request.session, 'registration_desktop_flow_otp', desktop_flow_otp, expiry_seconds=3600) if multiuse_object_key: from_multiuse_invite = True multiuse_obj = Confirmation.objects.get( confirmation_key=multiuse_object_key).content_object realm = multiuse_obj.realm invited_as = multiuse_obj.invited_as else: from_multiuse_invite = False multiuse_obj = None try: realm = get_realm(get_subdomain(request)) except Realm.DoesNotExist: realm = None invited_as = PreregistrationUser.INVITE_AS['MEMBER'] form = HomepageForm({'email': email}, realm=realm, from_multiuse_invite=from_multiuse_invite) if form.is_valid(): # If the email address is allowed to sign up for an account in # this organization, construct a PreregistrationUser and # Confirmation objects, and then send the user to account # creation or confirm-continue-registration depending on # is_signup. try: prereg_user = PreregistrationUser.objects.filter( email__iexact=email, realm=realm).latest("invited_at") # password_required and full_name data passed here as argument should take precedence # over the defaults with which the existing PreregistrationUser that we've just fetched # was created. prereg_user.password_required = password_required update_fields = ["password_required"] if full_name: prereg_user.full_name = full_name prereg_user.full_name_validated = full_name_validated update_fields.extend(["full_name", "full_name_validated"]) prereg_user.save(update_fields=update_fields) except PreregistrationUser.DoesNotExist: prereg_user = create_preregistration_user( email, request, password_required=password_required, full_name=full_name, full_name_validated=full_name_validated, ) if multiuse_obj is not None: request.session.modified = True streams_to_subscribe = list(multiuse_obj.streams.all()) prereg_user.streams.set(streams_to_subscribe) prereg_user.invited_as = invited_as prereg_user.save() # We want to create a confirmation link to create an account # in the current realm, i.e. one with a hostname of # realm.host. For the Apache REMOTE_USER_SSO auth code path, # this is preferable over realm.get_host() because the latter # contains the port number of the Apache instance and we want # to send the user back to nginx. But if we're in the realm # creation code path, there might not be a realm yet, so we # have to use request.get_host(). if realm is not None: host = realm.host else: host = request.get_host() # Mark 'host' as safe for use in a redirect. It's pulled from the # current request or realm, both of which only allow a limited set of # trusted hosts. confirmation_link = create_confirmation_link( prereg_user, mark_sanitized(host), Confirmation.USER_REGISTRATION) if is_signup: return redirect(confirmation_link) context = { 'email': email, 'continue_link': confirmation_link, 'full_name': full_name } return render(request, 'zerver/confirm_continue_registration.html', context=context) # This email address it not allowed to join this organization, so # just send the user back to the registration page. url = reverse('register') context = login_context(request) extra_context: Mapping[str, Any] = { 'form': form, 'current_url': lambda: url, 'from_multiuse_invite': from_multiuse_invite, 'multiuse_object_key': multiuse_object_key, 'mobile_flow_otp': mobile_flow_otp, 'desktop_flow_otp': desktop_flow_otp, } context.update(extra_context) return render(request, 'zerver/accounts_home.html', context=context)