def completeurl(fullurl, partialurl): from urllib2 import urlparse parsed_jobsurl = urlparse.urlparse(fullurl) parsed_joburl = urlparse.urlparse(partialurl) fulljoburl = urlparse.urlunparse([parsed_jobsurl.scheme, parsed_jobsurl.netloc, parsed_joburl.path, parsed_joburl.params, parsed_joburl.query, parsed_joburl.fragment]) return fulljoburl
def addQuestionUrls(self, parenturi): question_urls = self.soup.findAll('a',{'href' :re.compile('/question/index;.*')}) for url in question_urls: self.curiter+=1 if self.curiter > self.POSTS_ITERATIONS: return False url = normalize(self.__base_uri + url['href']) url_segments = [each for each in urlparse.urlparse(url)] url_segments[3] = ';'.join([re.sub('=.*?$','=0',url_s) for url_s in url_segments[3].split(';') ]) url = urlparse.urlunparse(url_segments) temp_task = self.task.clone() temp_task.instance_data['uri'] = url temp_task.instance_data['isquestion'] = True self.linksOut.append(temp_task) log.info(self.log_msg("No. of tasks appened %s"%len(self.linksOut))) return True
def _getNextLink(self,inc=1,pageNum=None): #implements pagination javascript function equivalent in python url_unparsed = list(urlparse.urlparse(self.currenturi)) params = cgi.parse_qs(url_unparsed[4]) link = '' if self.soup.findAll('a',text='Next') or self.soup.findAll('a',text='Previous'): if not params.get('page'): params['page'] = ['0'] if pageNum: params['page'][0] = str(pageNum) else: params['page'][0] = str(int(params['page'][0]) + inc) params['lastpagesent'] = params['page'] url_unparsed[4] = '&'.join(['%s=%s'%(k,v[0])for k,v in params.items()]) if int(params['page'][0]) >= 0: link = urlparse.urlunparse(url_unparsed) return link
def getInstance(cls, connectionsUrl): """ Looks for config in connectionsUrl """ instance = cls._instances.get(connectionsUrl) if instance is None: # http://localhost/harmony/wk.cgi/harmony/connections?_SID_=20080208162931-2e6444a8371cf04261e43057f4b1a071 loc = {} if re.match('(?i)[A-Z]+://', connectionsUrl): #'/harmony/wk.cgi/harmony/connections' -> '/harmony/wk.cgi/harmony/fn_signatures' scheme, netloc, path, params, query, fragment = urlparse.urlparse(connectionsUrl) path = path.rsplit('/', 1)[0] + '/fn_signatures' url = urlparse.urlunparse((scheme, netloc, path, params, query, fragment)) script = urlopen(url).read() exec(script, {}, loc) else: path = os.path.join(os.path.split(connectionsUrl)[0], 'fn_signatures.conf.py') execfile(path, {}, loc) cls._instances[connectionsUrl] = instance = FnSignatureFactory(loc['fn_signatures'], loc['fn_names']) return instance
def convert(protocol, domain, path, query): """Convert any webpage into UTF-8 and rewrite all internal links as absolute links. If the encoding detection confidence is less than 0.5, an error is thrown. """ local = get_domain_parts(urlparse.urlparse(request.host_url).netloc) remote = get_domain_parts(domain) if local == remote: raise ConversationError('space-time continuum interruption') if protocol.lower() not in ('http', 'https'): raise ConversationError('only HTTP and HTTPS are supported') # If we use query string, then the user will have to handle query string # escaping by themselves. The current "fragmented" URL is done so that # prefixing the target URL and replacing `://` with `/` is all it # requires to use the service. urltuple = (protocol, domain, path, None, urlencode(query), None) pageurl = urlparse.urlunparse(urltuple) # httplib2 is used so that HTTP requests are properly cached according # to the source's header. Any page with embedded CSS or JavaScript that # request external resource from relative path will make a hit to this # application, thus we redirect out all non-HTML page to the original # location. # # TODO: # - Attempt to convert text/plain. try: headers, body = http.request(pageurl) if headers.status not in (200, 302, 304): msg = (headers.status, httplib.responses[headers.status]) raise ConversationError('could not fetch web page, %s %s' % msg) baseurl = headers['content-location'] if 'html' not in headers['content-type']: return redirect(baseurl) except urlfetch.DownloadError, e: raise ConversationError('could not fetch web page, invalid URL')
except Cookie.URLMismatch, e: self.debug("not sending session cookie, URL mismatch: %s", e) return original_url except Exception, e: self.error("not sending session cookie, unknown error: %s", e) return original_url # O.K. session_cookie is valid to be returned, stash it away where it will will # get included in a HTTP Cookie headed sent to the server. self.log.debug("setting session_cookie into context '%s'", session_cookie.http_cookie()) setattr(context, 'session_cookie', session_cookie.http_cookie()) # Form the session URL by substituting the session path into the original URL scheme, netloc, path, params, query, fragment = urlparse.urlparse(original_url) path = self.session_path session_url = urlparse.urlunparse((scheme, netloc, path, params, query, fragment)) return session_url def create_connection(self, ccache=None, verbose=0, fallback=True, delegate=False, nss_dir=None): try: rpc_uri = self.env[self.env_rpc_uri_key] principal = get_current_principal() setattr(context, 'principal', principal) # We have a session cookie, try using the session URI to see if it # is still valid if not delegate: rpc_uri = self.apply_session_cookie(rpc_uri) except ValueError: # No session key, do full Kerberos auth
def unparseLine(self, parsed): method, parsed_url, proto = parsed return ' '.join((method, urlparse.urlunparse(parsed_url), proto))
def sso_authorize(request): """ Authorizes specific web sites to utilize an existing My.jobs account Required on HTTP GET: :auth_callback: GET parameter - Desired return url when authorization succeeds Required on HTTP POST: :auth_callback: POST parameter, copy of :auth_callback: GET parameter """ # Common between GET and POST, callback is required. auth_callback = request.GET.get('auth_callback') or \ request.POST.get('auth_callback') data = {'auth_callback': auth_callback} if auth_callback: auth_callback = unquote(auth_callback) auth_callback = urlparse.urlparse(auth_callback) if not auth_callback.netloc: # If the base url of the callback is not truthy, the url # must be malformed somehow raise Http404 else: raise Http404 if request.method == 'GET': # Initial view after being redirected from an external site data['auth_callback_short'] = auth_callback.netloc if not request.user.is_anonymous(): # Process logged in users first; Certain criteria may cause the # user to be logged out. good_key = request.session.get('key') test_key = request.GET.get('key') if good_key: # The current user already has a key available. if test_key: # The remote site has provided a key; the user has # potentially already authorized this site. if test_key == good_key: if request.user.authorizedclient_set.filter( site=auth_callback.netloc): # The user has authorized this site; Reset the # current session expiry, add the key to the # callback url, and redirect to it. request.session.set_expiry(None) q = urlparse.parse_qs(auth_callback.query) q.update({'key': good_key}) auth_callback = auth_callback._replace( query=urlencode(q)) return redirect(urlparse.urlunparse(auth_callback)) else: # The user at one time authorized this site but it # was revoked (potential future functionality?). # Ask for authorization again. return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) else: # The key provided does not match the user's key; Log # the user out. It may be a different user's key. logout(request) else: # No key was provided; Proceed to authorization normally. return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) else: # The user has no key; Create one. request.session['key'] = AuthorizedClient.create_key( request.user) if test_key: # A key was provided, but the current user did not have one # until now. Log out the user. logout(request) else: # No key was provided; Proceed to authorization. return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) # Only anonymous users can reach this point. This is not inside an else # block so that it can catch users who were logged out above. login_form = CustomAuthForm(auto_id=True) login_form.fields.pop('remember_me') data['login_form'] = login_form return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) else: # Form was posted. action = request.POST.get('action') if action == 'login': login_form = CustomAuthForm(data=request.POST, auto_id=False) login_form.fields.pop('remember_me') if login_form.is_valid(): user = authenticate( username=login_form.cleaned_data['username'], password=login_form.cleaned_data['password']) login(request, user) request.session.set_expiry(None) # User was logged in. Fall through to code common to # preauthenticated users else: if request.is_ajax(): return HttpResponse( json.dumps({'errors': login_form.errors.items()})) else: data['login_form'] = login_form data['auth_callback_short'] = auth_callback.netloc return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) # Ensure that an AuthorizedClient instance exists for the current user # and the site that is requesting authorization. request.user.authorizedclient_set.get_or_create( site=auth_callback.netloc) # Ensure that the current user has a key. if not request.session.get('key'): request.session['key'] = AuthorizedClient.create_key(request.user) # Add the user's key to the callback url and redirect to it. q = urlparse.parse_qs(auth_callback.query) q.update({'key': request.session.get('key')}) auth_callback = auth_callback._replace(query=urlencode(q)) auth_callback = urlparse.urlunparse(auth_callback) if request.is_ajax(): return HttpResponse(json.dumps({'url': auth_callback})) else: return redirect(auth_callback)
def sso_authorize(request): """ Authorizes specific web sites to utilize an existing My.jobs account Required on HTTP GET: :auth_callback: GET parameter - Desired return url when authorization succeeds Required on HTTP POST: :auth_callback: POST parameter, copy of :auth_callback: GET parameter """ # Common between GET and POST, callback is required. auth_callback = request.GET.get('auth_callback') or \ request.POST.get('auth_callback') data = {'auth_callback': auth_callback} if auth_callback: auth_callback = unquote(auth_callback) auth_callback = urlparse.urlparse(auth_callback) if not auth_callback.netloc: # If the base url of the callback is not truthy, the url # must be malformed somehow raise Http404("mysignon.views.sso_authorize: bad callback") else: raise Http404("mysignon.views.sso_authorize: no callback") if request.method == 'GET': # Initial view after being redirected from an external site data['auth_callback_short'] = auth_callback.netloc if not request.user.is_anonymous(): # Process logged in users first; Certain criteria may cause the # user to be logged out. good_key = request.session.get('key') test_key = request.GET.get('key') if good_key: # The current user already has a key available. if test_key: # The remote site has provided a key; the user has # potentially already authorized this site. if test_key == good_key: if request.user.authorizedclient_set.filter( site=auth_callback.netloc): # The user has authorized this site; Reset the # current session expiry, add the key to the # callback url, and redirect to it. request.session.set_expiry(None) q = urlparse.parse_qs(auth_callback.query) q.update({'key': good_key}) auth_callback = auth_callback._replace( query=urlencode(q)) return redirect(urlparse.urlunparse(auth_callback)) else: # The user at one time authorized this site but it # was revoked (potential future functionality?). # Ask for authorization again. return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) else: # The key provided does not match the user's key; Log # the user out. It may be a different user's key. logout(request) else: # No key was provided; Proceed to authorization normally. return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) else: # The user has no key; Create one. request.session['key'] = AuthorizedClient.create_key( request.user) if test_key: # A key was provided, but the current user did not have one # until now. Log out the user. logout(request) else: # No key was provided; Proceed to authorization. return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) # Only anonymous users can reach this point. This is not inside an else # block so that it can catch users who were logged out above. login_form = CustomAuthForm(auto_id=True) login_form.fields.pop('remember_me') data['login_form'] = login_form return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) else: # Form was posted. action = request.POST.get('action') if action == 'login': login_form = CustomAuthForm(data=request.POST, auto_id=False) login_form.fields.pop('remember_me') if login_form.is_valid(): user = authenticate( username=login_form.cleaned_data['username'], password=login_form.cleaned_data['password']) login(request, user) request.session.set_expiry(None) # User was logged in. Fall through to code common to # preauthenticated users else: if request.is_ajax(): return HttpResponse(json.dumps( {'errors': login_form.errors.items()})) else: data['login_form'] = login_form data['auth_callback_short'] = auth_callback.netloc return render_to_response('mysignon/sso_auth.html', data, RequestContext(request)) # Ensure that an AuthorizedClient instance exists for the current user # and the site that is requesting authorization. request.user.authorizedclient_set.get_or_create(site=auth_callback.netloc) # Ensure that the current user has a key. if not request.session.get('key'): request.session['key'] = AuthorizedClient.create_key(request.user) # Add the user's key to the callback url and redirect to it. q = urlparse.parse_qs(auth_callback.query) q.update({'key': request.session.get('key')}) auth_callback = auth_callback._replace(query=urlencode(q)) auth_callback = urlparse.urlunparse(auth_callback) if request.is_ajax(): return HttpResponse(json.dumps({'url': auth_callback})) else: return redirect(auth_callback)
return original_url except Exception, e: self.error("not sending session cookie, unknown error: %s", e) return original_url # O.K. session_cookie is valid to be returned, stash it away where it will will # get included in a HTTP Cookie headed sent to the server. self.log.debug("setting session_cookie into context '%s'", session_cookie.http_cookie()) setattr(context, 'session_cookie', session_cookie.http_cookie()) # Form the session URL by substituting the session path into the original URL scheme, netloc, path, params, query, fragment = urlparse.urlparse( original_url) path = self.session_path session_url = urlparse.urlunparse( (scheme, netloc, path, params, query, fragment)) return session_url def create_connection(self, ccache=None, verbose=0, fallback=True, delegate=False, nss_dir=None): try: rpc_uri = self.env[self.env_rpc_uri_key] principal = get_current_principal() setattr(context, 'principal', principal) # We have a session cookie, try using the session URI to see if it # is still valid
def apply_session_cookie(self, url): ''' Attempt to load a session cookie for the current principal from the persistent secure storage. If the cookie is successfully loaded adjust the input url's to point to the session path and insert the session cookie into the per thread context for later insertion into the HTTP request. If the cookie is not successfully loaded then the original url is returned and the per thread context is not modified. Context Dependencies: The per thread context is expected to contain: principal The current pricipal the HTTP request was issued for. The per thread context will be updated with: session_cookie A cookie string to be inserted into the Cookie header of the HTPP request. ''' original_url = url principal = getattr(context, 'principal', None) session_cookie = self.get_session_cookie_from_persistent_storage(principal) if session_cookie is None: self.log.debug("failed to find session_cookie in persistent storage for principal '%s'", principal) return original_url else: self.debug("found session_cookie in persistent storage for principal '%s', cookie: '%s'", principal, session_cookie) # Decide if we should send the cookie to the server try: session_cookie.http_return_ok(original_url) except Cookie.Expired as e: self.debug("deleting session data for principal '%s': %s", principal, e) try: delete_persistent_client_session_data(principal) except Exception as e: pass return original_url except Cookie.URLMismatch as e: self.debug("not sending session cookie, URL mismatch: %s", e) return original_url except Exception as e: self.error("not sending session cookie, unknown error: %s", e) return original_url # O.K. session_cookie is valid to be returned, stash it away where it will will # get included in a HTTP Cookie headed sent to the server. self.log.debug("setting session_cookie into context '%s'", session_cookie.http_cookie()) setattr(context, 'session_cookie', session_cookie.http_cookie()) # Form the session URL by substituting the session path into the original URL scheme, netloc, path, params, query, fragment = urlparse.urlparse(original_url) path = self.session_path session_url = urlparse.urlunparse((scheme, netloc, path, params, query, fragment)) return session_url
def fkey_view(self, request, *args, **kwargs): """Common method for all fkey_list views""" view_name = kwargs.pop('view_name') fkey_name, fkey_id = args[0], args[1] # try to rescue custom views except change_view if view_name == 'change_view': value = args[2] if not value.isdigit(): try: self.model._default_manager.get(pk=value) except (ValueError, self.model.DoesNotExist): return HttpResponseRedirect('../../%s' % value) if not hasattr(self, view_name): raise Exception('FkeyList: view "%s" does not exist' % view_name) # check fkey instance if not hasattr(self.model, fkey_name): raise Exception('FkeyList: field "%s" does not exist in model "%s"' % (fkey_name, self.model._meta.model_name)) parent = getattr(self.model, fkey_name).field.rel.to.objects.filter( pk=fkey_id).first() if not parent: raise Exception('FkeyList: fkey "%s" #%s for "%s" does not exist' % (fkey_name, fkey_id, self.model._meta.model_name)) # default and fkey links dependencies link_name_parent = AdminViewName(parent.__class__._meta) link_name, link_args = AdminViewName(self.model._meta), args[:2] link_deps = { link_name.add: reverse( link_name.add_fkeylist, None, link_args), link_name.changelist: reverse( link_name.changelist_fkeylist, None, link_args), link_name.change: True, } request.FKEY_LIST = { 'fkey_name': fkey_name, 'fkey_opts': getattr(self.model, fkey_name).field.rel.to._meta, 'id': fkey_id, 'item': parent, 'item_link': reverse(link_name_parent.change, None, (args[1],)), 'list_link': reverse(link_name_parent.changelist, None, ()), 'link_name': link_name, } # update context of any view extra_context = kwargs.get('extra_context', {}) extra_context.update(dict([(i, getattr(self, i),) for i in ( 'fkey_list_parent_change_list_template', 'fkey_list_parent_change_form_template', 'fkey_list_parent_delete_confirmation_template', 'fkey_list_parent_delete_selected_confirmation_template', 'fkey_list_parent_object_history_template', )])) kwargs['extra_context'] = extra_context # get response fkeyargs, clearargs = tuple(args[:2]), tuple(args[2:]) response = getattr(self, view_name)(request, *clearargs, **kwargs) # try to return fkey location in HttpResponseRedirect instead original location, newlocation = (isinstance(response, HttpResponseRedirect) and response['Location']), None while location: # parse redirect url to save GET querystring, etc parsed = urlparse.urlparse(location) # resolve redirect location try: match = resolve(parsed.path) except Resolver404: break newlocation = link_deps.get(match.view_name, None) if not newlocation: break # 1 - if link_deps value contains non-empty string (url), redirect # rescue "add new after saving" and "return to changelist" redirects if isinstance(newlocation, str): break # 2 - if link_deps value is True, process each case directly # rescue "continue editing" redirects after new object adding if match.view_name == link_name.change: newlocation = reverse(link_name.change_fkeylist, None, fkeyargs + (match.args[0],)) break break if newlocation: response['Location'] = urlparse.urlunparse( parsed[:2] + (newlocation,) + parsed[3:] ) return response