def _generalize_url(self, url): parts = urlsplit(url) simplified_url = urlunsplit((parts.scheme, parts.netloc, '', '', '')) url = simplified_url segments = split_path_into_segments(parts.path) parent_is_collection = False for segment in segments: simplified_url = simplified_url + '/' + (ID_SUBSTITUTE_CHAR if parent_is_collection else segment) url = url + '/' + segment if url in self and self._is_a_collection(url): parent_is_collection = True else: parent_is_collection = False generalized_path = urlsplit(simplified_url).path return urlunsplit((parts.scheme, parts.netloc, generalized_path, parts.query, parts.fragment))
def to_python(self, value): def split_url(url): """ Returns a list of url parts via ``urlparse.urlsplit`` (or raises a ``ValidationError`` exception for certain). """ try: return list(urlparse.urlsplit(url)) except ValueError: # urlparse.urlsplit can raise a ValueError with some # misformatted URLs. raise ValidationError(self.error_messages["invalid"]) value = super(URLField, self).to_python(value) if value: url_fields = split_url(value) if not url_fields[0]: # If no URL scheme given, assume http:// url_fields[0] = "http" if not url_fields[1]: # Assume that if no domain is provided, that the path segment # contains the domain. url_fields[1] = url_fields[2] url_fields[2] = "" # Rebuild the url_fields list, since the domain segment may now # contain the path too. url_fields = split_url(urlparse.urlunsplit(url_fields)) if not url_fields[2]: # the path portion may need to be added before query params url_fields[2] = "/" value = urlparse.urlunsplit(url_fields) return value
def _get_robotparser(self, link): """Return the proper robots parser for the given url or None if one cannot be constructed. Robot parsers are cached per scheme and netloc.""" # only some schemes have a meaningful robots.txt file if link.scheme != 'http' and link.scheme != 'https': debugio.debug('crawler._get_robotparser() called with unsupported scheme (%s)' % link.scheme) return None # split out the key part of the url location = urlparse.urlunsplit((link.scheme, link.netloc, '', '', '')) # try to create a new robotparser if we don't already have one if not self._robotparsers.has_key(location): import httplib debugio.info(' getting robots.txt for %s' % location) self._robotparsers[location] = None try: rp = robotparser.RobotFileParser() rp.set_url(urlparse.urlunsplit( (link.scheme, link.netloc, '/robots.txt', '', '') )) rp.read() self._robotparsers[location] = rp except (TypeError, IOError, httplib.HTTPException): # ignore any problems setting up robot parser pass return self._robotparsers[location]
def __init__(self, baseUri, headers=None, maxClients=None, maxConnections=None): self._headers = headers or HTTPHeaders() self._user = None self._passwd = None baseUri = baseUri.rstrip('/') self._scheme, loc, self._path, query, frag = urlparse.urlsplit(baseUri) userpass, self._hostport = urllib.splituser(loc) if userpass: self._user, self._passwd = urllib.splitpasswd(userpass) self._baseUri = urlparse.urlunsplit((self._scheme, self._hostport, self._path, None, None)) if self._scheme not in ('http', 'https'): raise ValueError(self._scheme) self._dispatcher = RequestDispatcher(maxClients=maxClients, maxConnections=maxConnections) self._queryFragment = urlparse.urlunsplit(('', '', '', query, frag))
def key(self, obj): """Return a cache key (relative path to file in cache) for an object""" if isnumpy(obj): # Key is byte view sha1 hash with .h5 extension byteview = obj.view(numpy.uint8) key = str(hashlib.sha1(byteview).hexdigest()) + '.h5' elif isurl(obj): # key is URL filename with an appended hash (for uniqueness) p = urlparse.urlsplit(obj) urlquery = urlparse.urlunsplit([p[0],p[1],p[2],p[3],None]) urlpath = urlparse.urlunsplit([p[0],p[1],p[2],None,None]) urlhash = self._hash(obj) (filename, ext) = splitextension(path.basename(urlpath)) key = str(urlhash) + str(ext) elif os.path.isfile(obj): # within cache? filebase = obj.split(self.root(),1) if len(filebase) == 2: # key is subpath within cache key = filebase[1][1:] else: # key is filename with unique appended hash (head, tail) = os.path.split(obj) (filename, ext) = splitextension(tail) namehash = hashlib.sha1(tail).hexdigest() key = filename + '_' + str(namehash[0:7]) + ext elif (path.isfile(self.abspath(obj)) or path.isdir(self.abspath(obj))): key = obj # Already a cache key elif isstring(obj): key = obj # Use arbitrary string if not file or url else: raise CacheError('[bobo.cache][ERROR]: Unsupported object for constructing key') return key
def get(self): articles = models.Article.all().order('-pubdate').fetch(9) items = [] mostRecentDate = None url_parts = list(urlparse.urlsplit(self.request.url)[0:2]) for article in articles: if not mostRecentDate: mostRecentDate = article.pubdate article.rimages = [db.get(image) for image in article.images] url = urlparse.urlunsplit(url_parts + ['/page/%s' % article.slug, '', '']) items.append( PyRSS2Gen.RSSItem( title = article.title, link = url, description = article.text, pubDate = article.pubdate)) rss = PyRSS2Gen.RSS2( title = "RSS feed", link = urlparse.urlunsplit(url_parts + ['', '', '']), description = "My RSS feed", lastBuildDate = mostRecentDate, items = items, ) self.response.headers['Content-Type'] = 'text/xml' self.response.out.write(rss.to_xml())
def normalize_url(url, domain_canonical=None): """ Ensure we have a value url - raise exception if not. If given, we convert the domain to a domain_canonical """ url = url.strip() rgURL = list(urlparse.urlsplit(url)) if rgURL[split.scheme] == '': url = r"http://%s" % url rgURL = list(urlparse.urlsplit(url)) # Invalid protocol if rgURL[split.scheme] != "http" and rgURL[split.scheme] != "https": raise reqfilter.Error("Invalid protocol: %s" % rgURL[split.scheme]) if domain_canonical is not None: rgURL[split.domain] = domain_canonical if rgURL[split.domain]: rgURL[split.domain] = rgURL[split.domain].lower() if not rgURL[split.domain] or not regDomain.search(rgURL[split.domain]) or len(rgURL[split.domain]) > 255: raise reqfilter.Error("Invalid URL: %s" % urlparse.urlunsplit(rgURL)) # Always end naked domains with a trailing slash as canonical if rgURL[split.path] == '': rgURL[split.path] = '/' return urlparse.urlunsplit(rgURL)
def run(self): """Run listing directive.""" _fname = self.arguments.pop(0) fname = _fname.replace('/', os.sep) try: lang = self.arguments.pop(0) self.options['code'] = lang except IndexError: self.options['literal'] = True if len(self.folders) == 1: listings_folder = next(iter(self.folders.keys())) if fname.startswith(listings_folder): fpath = os.path.join(fname) # new syntax: specify folder name else: fpath = os.path.join(listings_folder, fname) # old syntax: don't specify folder name else: fpath = os.path.join(fname) # must be new syntax: specify folder name self.arguments.insert(0, fpath) if 'linenos' in self.options: self.options['number-lines'] = self.options['linenos'] with io.open(fpath, 'r+', encoding='utf8') as fileobject: self.content = fileobject.read().splitlines() self.state.document.settings.record_dependencies.add(fpath) target = urlunsplit(("link", 'listing', fpath.replace('\\', '/'), '', '')) src_target = urlunsplit(("link", 'listing_source', fpath.replace('\\', '/'), '', '')) src_label = self.site.MESSAGES('Source') generated_nodes = ( [core.publish_doctree('`{0} <{1}>`_ `({2}) <{3}>`_' .format( _fname, target, src_label, src_target))[0]]) generated_nodes += self.get_code_from_file(fileobject) return generated_nodes
def __init__(self, address, name=""): """Bind the publisher class to a port. """ # pylint: disable=E1103 self.name = name self.destination = address self.publish = context.socket(zmq.PUB) # Check for port 0 (random port) u__ = urlsplit(self.destination) port = u__.port if port == 0: dest = urlunsplit((u__.scheme, u__.hostname, u__.path, u__.query, u__.fragment)) self.port_number = self.publish.bind_to_random_port(dest) netloc = u__.hostname + ":" + str(self.port_number) self.destination = urlunsplit((u__.scheme, netloc, u__.path, u__.query, u__.fragment)) else: self.publish.bind(self.destination) self.port_number = port LOGGER.info("publisher started on port %s", str(self.port_number)) # Initialize no heartbeat self._heartbeat = None self._pub_lock = Lock()
def _load_uri(self, base_uri, uri_to_resolve): """ Obtain a remote instruction. Returns the instruction as a python object, along with the resolved uri """ resolved_uri = urlparse.urlsplit(urlparse.urljoin(base_uri, uri_to_resolve)) base_scheme = urlparse.urlsplit(base_uri).scheme if base_scheme is not None and base_scheme != resolved_uri.scheme: raise SchemeSecurityError("Cannot cross from '%s' to '%s'" % ( base_scheme, resolved_uri.scheme)) try: if resolved_uri.scheme in ['http', 'https']: instruction = json.loads(requests.get(resolved_uri).text) elif resolved_uri.scheme is '': instruction = json.load(open(urlparse.urlunsplit(resolved_uri))) else: raise InvalidInstructionError("Reference to unsupported scheme '%s'" % ( resolved_uri.scheme)) return instruction, urlparse.urlunsplit(resolved_uri) except requests.exceptions.RequestException as e: raise InvalidInstructionError("Couldn't load '%s': %s" % (resolved_uri, e)) except IOError as e: raise InvalidInstructionError("Couldn't open '%s': %s" % (resolved_uri, e)) except ValueError: raise InvalidInstructionError("Invalid JSON in '%s'" % resolved_uri)
def _split_uri(self, identifier): if isinstance(identifier, URIRef): scheme, netloc, path, query, fragment = urlsplit(identifier) if query: namespace, resource_id = split_uri(identifier) if fragment: # if we have a fragment, we will split there namespace, resource_id = urldefrag(identifier) namespace += "#" elif "/" in path and len(path) > 1: splits = path.split("/") if path.endswith("/"): resource_id = "/".join(splits[-2:]) path = "/".join(splits[:-2]) + "/" namespace = urlunsplit((scheme, netloc, path, "", "")) else: resource_id = "/".join(splits[-1:]) path = "/".join(splits[:-1]) + "/" namespace = urlunsplit((scheme, netloc, path, "", "")) elif path: resource_id = path namespace = urlunsplit((scheme, netloc, "", "", "")) else: namespace, resource_id = split_uri(identifier) log.debug("Split %s to %s, %s" % (identifier, namespace, resource_id)) return namespace, resource_id else: raise ValueError("Unknown identifier type %r" % identifier)
def __init__(self, url): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + '.dds', query, fragment)) r = requests.get(ddsurl) r.raise_for_status() dds = r.text.encode('utf-8') dasurl = urlunsplit((scheme, netloc, path + '.das', query, fragment)) r = requests.get(dasurl) r.raise_for_status() das = r.text.encode('utf-8') # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, '&'.join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.descr) for var in walk(self.dataset, SequenceType): var.data = SequenceProxy(url, var.id, var.descr) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if index and isinstance(target.data, BaseProxy): target.data.slice = fix_slice(index, target.shape)
def login(self): """ Set a cookie and redirect to the url that we tried to authenticate against originally. FIXME - I don't think we need this any more now that the EULA is gone -EAD """ request = self.REQUEST response = request["RESPONSE"] login = request.get("__ac_name", "") password = request.get("__ac_password", "") submitted = request.get("submitted", "") pas_instance = self._getPAS() if pas_instance is not None: try: pas_instance.updateCredentials(request, response, login, password) except (KeyError, POSKeyError): # see defect ZEN-2942 If the time changes while the server is running # set the session database to a sane state. ts = self.unrestrictedTraverse("/temp_folder/session_data") ts._reset() # try again and if it fails this time there isn't anything we can do pas_instance.updateCredentials(request, response, login, password) came_from = request.form.get("came_from") or "" if came_from: parts = urlparse.urlsplit(came_from) querydict = parse_qs(parts[3]) querydict.pop("terms", None) if "submitted" not in querydict.keys(): querydict["submitted"] = submitted newqs = urllib.urlencode(querydict, doseq=True) parts = parts[:3] + (newqs,) + parts[4:] came_from = urlparse.urlunsplit(parts) else: submittedQs = "submitted=%s" % submitted came_from = "/zport/dmd?%s" % submittedQs if not self.dmd.acceptedTerms: url = "%s/zenoss_terms/?came_from=%s" % (self.absolute_url(), urllib.quote(came_from)) else: # get rid of host part of URL (prevents open redirect attacks) clean_url = ["", ""] + list(urlparse.urlsplit(came_from))[2:] url = urlparse.urlunsplit(clean_url) fragment = request.get("fragment", "") if fragment: fragment = urllib.unquote(fragment) if not fragment.startswith("#"): fragment = "#" + fragment url += fragment if self.dmd.uuid is None: self.dmd.uuid = str(uuid1()) return response.redirect(url)
def resolve_links(self, links, pageurl): for x in links: p = urlparse.urlsplit(x) if p.scheme == "http": if p.netloc != self.hostname: # Remote link continue # Turn this into a host-relative url p = ('', '', p.path, p.query, '') if p[4] != "" or p[3] != "": # Remove fragments (part of the url past #) p = (p[0], p[1], p[2], '', '') if p[0] == "": if p[2] == "": # Nothing in the path, so it's a pure fragment url continue if p[2][0] == "/": # Absolute link on this host, so just return it yield urlparse.urlunsplit(p) else: # Relative link yield urlparse.urljoin(pageurl, urlparse.urlunsplit(p)) else: # Ignore unknown url schemes like mailto pass
def parse(self, response): sel = Selector(response) # Extract any cars found cars = sel.xpath('//*[contains(@class, "inv-type-used")]') for c in cars: car = Car() # Title and year car['title'] = c.xpath('.//div/div/h1/a/text()').extract()[0].strip() car['year'] = car['title'][0:4] # Price, but remove non-number characters. # Examples: '$12,000', 'Please Call', etc. price = c.xpath('.//*[contains(@class, "value")]/text()').extract()[0] car['price'] = ''.join(d for d in price if d.isdigit()) # url path = c.xpath('.//div/div/h1/a/@href').extract()[0] url = urlparse.urlparse(response.url) car['url'] = urlparse.urlunsplit([url.scheme, url.netloc, path, None, None]) # Certain specs are frequently missing, so we need to handle # them with try / except specs = [ { 'name': 'vin', 'xpath': './/*/dt[text()="VIN:"]/following-sibling::dd/text()' }, { 'name': 'color', 'xpath': './/*/dt[text()="Exterior Color:"]/following-sibling::dd/text()' }, { 'name': 'miles', 'xpath': './/*/dt[text()="Mileage:"]/following-sibling::dd/text()' }, { 'name': 'transmission', 'xpath': './/*/dt[text()="Transmission:"]/following-sibling::dd/text()' } ] for s in specs: try: car[s['name']] = c.xpath(s['xpath']).extract()[0] except IndexError: car[s['name']] = None yield car # If there's a next page link, parse it for cars as well next_links = sel.xpath('//*[@rel="next"]/@href').extract() if len(next_links) > 0: query = next_links[0] url = urlparse.urlparse(response.url) base = urlparse.urlunsplit([url.scheme, url.netloc, url.path, None, None]) next_url = urlparse.urljoin(base, query) # Construct url yield Request(next_url, callback=self.parse)
def get_relative_url(destination, source): """Get relative URL between two sources. http://stackoverflow.com/a/7469668/315168 :param destination: :param source: :return: tuple (is same domain, relative url) """ u_dest = urlparse.urlsplit(destination) u_src = urlparse.urlsplit(source) _uc1 = urlparse.urlunsplit(u_dest[:2]+tuple('' for i in range(3))) _uc2 = urlparse.urlunsplit(u_src[:2]+tuple('' for i in range(3))) if _uc1 != _uc2: ## This is a different domain return False, destination # If there is no / component in url assume it's root path src_path = u_src.path or "/" _relpath = posixpath.relpath(u_dest.path, posixpath.dirname(src_path)) return True, _relpath # return True, urlparse.urlunsplit(('', '', _relpath, u_dest.query, u_dest.fragment))
def rewrite_urls(origin_url, urls): origin_pack = urlparse.urlsplit(origin_url) for u in urls: # kill breaks if u: u = re.sub("(\n|\t)", "", u) pack = urlparse.urlsplit(u) (scheme, netloc, path, query, fragment) = pack # try to rewrite scheme scheme = rewrite_scheme(pack.scheme) # rewrite netloc to include credentials if origin_pack.username and pack.hostname == origin_pack.hostname: netloc = assemble_netloc(origin_pack.username,\ origin_pack.password, pack.hostname, pack.port) # reassemble into url new_u = urlparse.urlunsplit((scheme, netloc, path, query, None)) # no scheme or netloc, it's a path on-site if not scheme and not netloc and (path or query): path_query = urlparse.urlunsplit((None, None, path, query, None)) new_u = urlparse.urljoin(origin_url, path_query) # quote spaces new_u = new_u.replace(" ", "%20") if new_u: yield new_u
def login(self): """ Set a cookie and redirect to the url that we tried to authenticate against originally. FIXME - I don't think we need this any more now that the EULA is gone -EAD """ request = self.REQUEST response = request['RESPONSE'] login = request.get('__ac_name', '') password = request.get('__ac_password', '') submitted = request.get('submitted', '') pas_instance = self._getPAS() if pas_instance is not None: try: pas_instance.updateCredentials(request, response, login, password) except (KeyError, POSKeyError): # see defect ZEN-2942 If the time changes while the server is running # set the session database to a sane state. ts = self.unrestrictedTraverse('/temp_folder/session_data') ts._reset() _originalResetCredentials(self, request, response) came_from = request.form.get('came_from') or '' if came_from: parts = urlparse.urlsplit(came_from) querydict = parse_qs(parts[3]) querydict.pop('terms', None) if 'submitted' not in querydict.keys(): querydict['submitted'] = submitted newqs = urllib.urlencode(querydict, doseq=True) parts = parts[:3] + (newqs,) + parts[4:] came_from = urlparse.urlunsplit(parts) else: submittedQs = 'submitted=%s' % submitted came_from = '/zport/dmd?%s' % submittedQs if not self.dmd.acceptedTerms: url = "%s/zenoss_terms/?came_from=%s" % ( self.absolute_url(), urllib.quote(came_from)) else: # get rid of host part of URL (prevents open redirect attacks) clean_url = ['', ''] + list(urlparse.urlsplit(came_from))[2:] url = urlparse.urlunsplit(clean_url) fragment = request.get('fragment', '') if fragment: fragment = urllib.unquote( fragment) if not fragment.startswith( '#'): fragment = '#' + fragment url += fragment if self.dmd.uuid is None: self.dmd.uuid = str(uuid1()) return response.redirect(url)
def handler(self, fname, language='text', linenumbers=False, filename=None, site=None, data=None, lang=None, post=None): """Create HTML for a listing.""" fname = fname.replace('/', os.sep) if len(self.folders) == 1: listings_folder = next(iter(self.folders.keys())) if fname.startswith(listings_folder): fpath = os.path.join(fname) # new syntax: specify folder name else: # old syntax: don't specify folder name fpath = os.path.join(listings_folder, fname) else: # must be new syntax: specify folder name fpath = os.path.join(fname) linenumbers = 'table' if linenumbers else False deps = [fpath] with open(fpath, 'r') as inf: target = urlunsplit( ("link", 'listing', fpath.replace('\\', '/'), '', '')) src_target = urlunsplit( ("link", 'listing_source', fpath.replace('\\', '/'), '', '')) src_label = self.site.MESSAGES('Source') data = inf.read() lexer = pygments.lexers.get_lexer_by_name(language) formatter = pygments.formatters.get_formatter_by_name( 'html', linenos=linenumbers) output = '<a href="{1}">{0}</a> <a href="{3}">({2})</a>' .format( fname, target, src_label, src_target) + pygments.highlight(data, lexer, formatter) return output, deps
def video(): ''' Video request handler. :return: list of available videos in json format. ''' entries = [] for entry in os.walk(settings.VIDEO_FILES_PATH): if not entry[2]: # there is no file continue date = os.path.basename(entry[0]) for basename in entry[2]: filename = os.path.join(entry[0], basename) relpath = os.path.relpath(filename, start=settings.VIDEO_FILES_PATH) parts = list(urlparse.urlsplit(request.base_url)[:2]) parts.append(settings.VIDEO_FILES_LOCATION + '/' + relpath) parts.extend(['', '']) url = urlparse.urlunsplit(parts) parts[2] = settings.THUMBNAIL_FILES_LOCATION + '/' parts[2] += os.path.splitext(relpath)[0] + '.png' thumbnail = urlparse.urlunsplit(parts) entries.append({'date': date, 'url': url, 'thumbnail': thumbnail}) entries.sort(reverse=True, key=lambda x: x['date']) response = Response() response.headers['Content-Type'] = 'application/json' response.data = json.dumps(entries) return response
def requestData(self, basepath): self.log.info("Attempting to communicate with Nexus server.") auth = "Basic " + base64.b64encode(self.user + ':' + self.pasw) deppath = self.url[2] + basepath delpath = deppath + '/artifactorymigrator' runpath = delpath + '/run' depurl = urlparse.urlunsplit((self.url[0], self.url[1], deppath, '', '')) delurl = urlparse.urlunsplit((self.url[0], self.url[1], delpath, '', '')) runurl = urlparse.urlunsplit((self.url[0], self.url[1], runpath, '', '')) delheaders = {'User-Agent': 'nex2art', 'Authorization': auth} depheaders, runheaders = delheaders.copy(), delheaders.copy() depheaders['Content-Type'] = 'application/json' runheaders['Content-Type'] = 'text/plain' depjson = {'name': 'artifactorymigrator', 'type': 'groovy'} depjson['content'] = pkgutil.get_data('nex2art', 'resources/plugin.groovy') depbody = json.dumps(depjson) res, data = None, None self.log.info("Deploying extraction plugin to Nexus.") ex, _ = self.dorequest(depurl, depbody, depheaders, 'POST', "deploy") if ex == None: try: self.log.info("Executing Nexus extraction.") ex, res = self.dorequest(runurl, None, runheaders, 'POST', "execute", True) finally: self.log.info("Deleting extraction plugin from Nexus.") self.dorequest(delurl, None, delheaders, 'DELETE', "delete") if res != None and 'result' in res: data = json.loads(res['result']) if ex != None: self.log.error("Error accessing Nexus instance: %s", ex) return "Error accessing Nexus instance." self.log.info("Successfully fetched Nexus data.") return data
def __init__(self, db, co, logger, host=cereconf.AD_SERVER_HOST, port=cereconf.AD_SERVER_PORT, url=None, ad_ldap=cereconf.AD_LDAP, mock=False): """ Initialize AD syncronization, i.e. connect to AD service on given host. """ self.db = db self.co = co self.logger = logger if mock: self.logger.warn("Using mock server") from Cerebrum.modules.ad import ADTesting self.server = ADTesting.MockADServer(self.logger) else: parts = urlparse.urlsplit(url or '') scheme = parts.scheme or 'https' host = parts.hostname or host port = parts.port or port username = parts.username or cereconf.AD_DOMAIN_ADMIN_USER password = read_password(username, host) netloc = "%s:%s@%s:%d" % (username, '********', host, port) url = urlparse.urlunsplit((scheme, netloc, ) + parts[2:]) self.logger.debug("Connecting to %s", url) netloc = "%s:%s@%s:%d" % (username, password, host, port) url = urlparse.urlunsplit((scheme, netloc, ) + parts[2:]) self.server = xmlrpclib.Server(url) self.ad_ldap = ad_ldap
def get_onedrive_embed_code(self, onedrive_url): onedrive_url = onedrive_url.strip() # check if it already is an embed code embed_code_regex = '<iframe' matched = re.match(embed_code_regex, onedrive_url, re.IGNORECASE) if matched is not None: return onedrive_url scheme, netloc, path, query_string, fragment = urlsplit(onedrive_url) query_params = parse_qs(query_string) # OneDrive for Business odb_regex = 'https?:\/\/((\w|-)+)-my.sharepoint.com\/' matched = re.match(odb_regex, onedrive_url, re.IGNORECASE) if matched is not None: query_params['action'] = ['embedview'] new_query_string = urlencode(query_params, doseq=True) document_url = urlunsplit((scheme, netloc, path, new_query_string, fragment)) return self.EMBED_CODE_TEMPLATE.format(document_url) # OneDrive (for consumers) onedrive_regex = '(https?:\/\/(onedrive\.)?)(live\.com)' matched = re.match(onedrive_regex, onedrive_url, re.IGNORECASE) if matched is not None: new_path = path.replace('view.aspx', 'embed').replace('redir', 'embed') query_params = parse_qs(query_string) query_params['em'] = ['2'] new_query_string = urlencode(query_params, doseq=True) document_url = urlunsplit((scheme, netloc, new_path, new_query_string, fragment)) return self.EMBED_CODE_TEMPLATE.format(document_url)
def verify_image(self, baseURL, imageURL): fullImageURL = imageURL if not urlsplit(imageURL).scheme: # Resolve relative path fullImageURL = urljoin(baseURL, imageURL) echo("Checking image: {}".format(fullImageURL)) urlparts = urlsplit(fullImageURL) escapedparts = self.get_escaped_address_parts_minus_host(urlparts) if urlparts.netloc and urlparts.path: try: conn = httplib.HTTPConnection(urlparts.netloc) conn.request("HEAD", urlunsplit(escapedparts)) echo("Going to path: {}\n".format(urlunsplit(escapedparts))) res = conn.getresponse() except Exception as inst: self.fail("While checking image {}, encountered exception: {}".format( fullImageURL, inst)) self.assertEqual(res.status, 200, 'The image at {} is not OK. Looking for it resulted in HTTP code: {}'.format( urlunsplit([urlparts.scheme, urlparts.netloc, escapedparts[2], escapedparts[3], escapedparts[4]]), res.status)) else: self.fail("The URL for this image is invalid: {}".format(fullImageURL))
def from_url(url, headers=None, allowed=None): if headers is None: headers = {} result = urlparse.urlsplit(url) if result.scheme == 'qpid': # remove the queue from the url queue, query = extract_param('queue', result.query) if queue is None: raise ApplicationException('No queue provided in qpid url!') new_url = urlparse.urlunsplit((result.scheme, result.netloc, result.path, query, result.fragment)) return QpidPublisher(new_url, queue, headers, allowed) elif result.scheme == 'rabbit': queue, query = extract_param('queue', result.query) if queue is None: raise ApplicationException('No queue provided in qpid url!') new_url = urlparse.urlunsplit(('amqp', result.netloc, result.path, query, result.fragment)) return RabbitPublisher(new_url, queue, headers, allowed) elif result.scheme == 'log': return LogPublisher(allowed) elif result.scheme == 'count': return CountPublisher(allowed)
def getComponentUriRel(self, path): file_uri = urlparse.urlunsplit(urlparse.urlsplit(str(self.getComponentUri(path)))) ro_uri = urlparse.urlunsplit(urlparse.urlsplit(str(self.getRoUri()))) if ro_uri is not None and file_uri.startswith(ro_uri): file_uri_rel = file_uri.replace(ro_uri, "", 1) else: file_uri_rel = path return rdflib.URIRef(file_uri_rel)
def redirect_to_login_response(self, request, params): parts = list(urlparse.urlsplit(request.path)) parts[3] = self.urlencode(params) query = {'next': urlparse.urlunsplit(parts)} parts[2] = self.get_login_uri() parts[3] = self.urlencode(query) return Response(302, headers={'Location': urlparse.urlunsplit(parts)})
def encode_url(url): url = url.strip().encode('utf-8') url = url.replace('ENCODED:','') p = urlparse.urlsplit(urllib2.unquote(url)) query = urlparse.parse_qsl(p.query) p2 = p._replace(query=urllib.urlencode(query)) p2 = p2._replace(path=urllib2.quote(p2.path)) print "ENCODED:{0}".format(urlparse.urlunsplit(p2)) return "ENCODED:{0}".format(urlparse.urlunsplit(p2))
def send_request(self, request, test=False): """ Send a HTTP request to server >>> replayer = RequestLogReplayer(verbose=False) >>> request = {'host': 'www.zuikong.com', 'uri': '/xs/46/ping.json', 'method': 'POST', 'format': 'JSON', 'time': datetime(2011, 3, 25, 14, 32, 5), 'parameters': {'v': 2}} >>> result = replayer.send_request(request, test=True) Replayed POST "/xs/46/ping.json" on "www.zuikong.com" at 2011-03-25 14:32:05 Processed as JSON Parameters: {'v': 2} # Started at 2011-03-27 22:21:42. Completed at 2011-03-27 22:21:42. Taken 2 ms <BLANKLINE> <BLANKLINE> >>> request = {'host': 'www.zuikong.com', 'uri': '/xs/46/ping.json', 'method': 'GET', 'format': 'JSON', 'time': datetime(2011, 3, 25, 14, 32, 5), 'parameters': {'v': 2}} >>> result = replayer.send_request(request, test=True) Replayed GET "/xs/46/ping.json" on "www.zuikong.com" at 2011-03-25 14:32:05 Processed as JSON Parameters: {'v': 2} # Started at 2011-03-27 22:21:42. Completed at 2011-03-27 22:21:42. Taken 0 ms <BLANKLINE> <BLANKLINE> """ start_time = datetime.now() curl = pycurl.Curl() data = urlencode(request['parameters']) # Setup url and parameters if request['method'] == 'GET': curl.setopt(pycurl.URL, urlunsplit(('http', request['host'], request['uri'], data, ''))) elif request['method'] == 'POST': curl.setopt(pycurl.URL, urlunsplit(('http', request['host'], request['uri'], '', ''))) curl.setopt(pycurl.POST, 1) curl.setopt(pycurl.POSTFIELDS, data) # Setup HTTP headers if request['format'] == 'HTML': content_type = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' elif request['format'] == 'JSON': content_type = 'Accept: application/json, text/javascript, */*; q=0.01' elif request['format'] == 'MANIFEST': content_type = 'Accept: text/cache-manifest;q=0.9' curl.setopt(pycurl.HTTPHEADER, [content_type]) # Setup other optiosn response = StringIO() curl.setopt(pycurl.USERAGENT, ANDROID_USER_AGENT) curl.setopt(pycurl.WRITEFUNCTION, response.write) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.MAXREDIRS, 5) curl.setopt(pycurl.TIMEOUT, 300) curl.setopt(pycurl.VERBOSE, 1 if self.verbose else 0) try: if not test: curl.perform() except Exception, e: print("Failed to perform request. Reason: %s" % e)
def output_base(self, tag, attrs): """ Change the document base if there is a base tag """ baseurl = listget(attrs, "href", self.baseurl) (scheme, host, path, params, fragment) = urlparse.urlsplit(baseurl) lastslash = path.rfind("/") if lastslash > -1: path = path[:lastslash] self.baseurl = urlparse.urlunsplit((scheme, host, path, "", "")) self.basehost = urlparse.urlunsplit((scheme, host, "", "", ""))
def urlescape(url): scheme, netloc, path, qs, anchor = urlparse.urlsplit(url) path = urllib.quote(path, '/%') qs = urllib.quote_plus(qs, ':&=') return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))
os.unlink(destination) if attempt == 0: print >>sys.stderr, "That was our last attempt so giving up." exit(1) else: print >>sys.stderr, "%s does not exist: downloading" % destination download(url, destination) if __name__ == "__main__": if len(sys.argv) <> 3: print >>sys.stderr, "Wrong number of arguments. Use %s <url> <destination>" % sys.argv[0] exit(1) url_string = sys.argv[1] destination = sys.argv[2] url = urlparse.urlparse(url_string) path = url.path.split('/') # Avoid relying on github to set the destination filename if url.netloc == "github.com" and path[-3] == "archive": ext = None possible_exts = [ ".tar", ".tar.gz", ".zip", ".tbz", "tar.bz2" ] for e in possible_exts: if url.path.endswith(e): ext = e break if not ext: print >>sys.stderr, "I did not recognise extension of %s. I know about: %s" % (url.path, ", ".join(possible_exts)) exit(1) url_path = "/".join(path[0:-2] + [ path[-2] + ext ]) url_string = str(urlparse.urlunsplit((url.scheme, url.netloc, url_path, url.query, url.fragment),)) look_for_it(url_string, destination)
def normalize_feed_url(url): """ Normalize and convert a URL. If the URL cannot be converted (invalid or unknown scheme), None is returned. This will also normalize feed:// and itpc:// to http://. >>> normalize_feed_url('itpc://example.org/podcast.rss') 'http://example.org/podcast.rss' If no URL scheme is defined (e.g. "curry.com"), we will simply assume the user intends to add a http:// feed. >>> normalize_feed_url('curry.com') 'http://curry.com/' It will also take care of converting the domain name to all-lowercase (because domains are not case sensitive): >>> normalize_feed_url('http://Example.COM/') 'http://example.com/' Some other minimalistic changes are also taken care of, e.g. a ? with an empty query is removed: >>> normalize_feed_url('http://example.org/test?') 'http://example.org/test' Leading and trailing whitespace is removed >>> normalize_feed_url(' http://example.com/podcast.rss ') 'http://example.com/podcast.rss' Incomplete (too short) URLs are not accepted >>> normalize_feed_url('http://') is None True Unknown protocols are not accepted >>> normalize_feed_url('gopher://gopher.hprc.utoronto.ca/file.txt') is None True """ url = url.strip() if not url or len(url) < 8: return None # Assume HTTP for URLs without scheme if '://' not in url: url = 'http://' + url scheme, netloc, path, query, fragment = urlparse.urlsplit(url) # Schemes and domain names are case insensitive scheme, netloc = scheme.lower(), netloc.lower() # Normalize empty paths to "/" if path == '': path = '/' # feed://, itpc:// and itms:// are really http:// if scheme in ('feed', 'itpc', 'itms'): scheme = 'http' if scheme not in ('http', 'https', 'ftp', 'file'): return None # urlunsplit might return "a slighty different, but equivalent URL" return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
def replace_query_params(self, url, params): (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url) query_dict = QueryDict(query).dict() query_dict.update(params) query = urlencode(query_dict) return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
def test_unparse_parse(self): for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]: self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
def absurl(index, relpath='', normpath=os.path.normpath): if index.lower().startswith('http') or (relpath and relpath.startswith('http')): new = urlparse.urlparse(urlparse.urljoin(index, relpath)) return urlparse.urlunsplit((new.scheme, (new.port == None) and new.hostname or new.netloc, normpath(new.path), new.query, '')) else: return os.path.normpath(os.path.join(os.path.dirname(index), relpath))
def create_response_uri(self): uri = urlsplit(self.params.redirect_uri) query_params = parse_qs(uri.query) query_fragment = parse_qs(uri.fragment) try: if self.grant_type == 'authorization_code': code = create_code( user=self.request.user, client=self.client, scope=self.params.scope, nonce=self.params.nonce, is_authentication=self.is_authentication, code_challenge=self.params.code_challenge, code_challenge_method=self.params.code_challenge_method) code.save() query_params['code'] = code.code query_params[ 'state'] = self.params.state if self.params.state else '' elif self.grant_type == 'implicit': # We don't need id_token if it's an OAuth2 request. if self.is_authentication: id_token_dic = create_id_token(user=self.request.user, aud=self.client.client_id, nonce=self.params.nonce, request=self.request) query_fragment['id_token'] = encode_id_token( id_token_dic, self.client) else: id_token_dic = {} token = create_token(user=self.request.user, client=self.client, id_token_dic=id_token_dic, scope=self.params.scope) # Store the token. token.save() query_fragment['token_type'] = 'bearer' # TODO: Create setting 'OIDC_TOKEN_EXPIRE'. query_fragment['expires_in'] = 60 * 10 # Check if response_type is an OpenID request with value 'id_token token' # or it's an OAuth2 Implicit Flow request. if self.params.response_type in ['id_token token', 'token']: query_fragment['access_token'] = token.access_token query_fragment[ 'state'] = self.params.state if self.params.state else '' except Exception as error: logger.debug( '[Authorize] Error when trying to create response uri: %s', error) raise AuthorizeError(self.params.redirect_uri, 'server_error', self.grant_type) uri = uri._replace(query=urlencode(query_params, doseq=True)) uri = uri._replace(fragment=urlencode(query_fragment, doseq=True)) return urlunsplit(uri)
in_path = '/net/lxstor/export/infra/sysadmin/snsakala/drupal-perf-har/har_2/%s.har' % name out_path = '/net/lxstor/export/infra/sysadmin/snsakala/drupal-perf-har/tsung_2/partial/%s.xml' % name # load json file with open(in_path, 'r') as f: data = json.load(f) requests = [x[u'request'] for x in data[u'log'][u'entries']] urls_get = [x[u'url'] for x in requests if x[u'method'] == u'GET'] urls_noget = [x[u'url'] for x in requests if x[u'method'] != u'GET'] if urls_noget: print('Found other method than GET') print(urls_noget) print(u'Found %d GET requests' % len(urls_get)) tsung_partial = [] for url in urls_get: splited = urlsplit(url) # only for drupal server if splited.netloc != 'www-prd.luxairtours.lu': print('Found request to %s -> %s ' % splited[1:3]) continue relurl = urlunsplit(('', '') + splited[2:]) tsung_partial.append( "<request><http url='%s' version='1.1' method='GET'></http></request>" % relurl) with open(out_path, 'w') as f: f.write('\n'.join(tsung_partial))
def inner(path, protocol="http", query="", fragment=""): port = server_config["ports"][protocol][0] host = "%s:%s" % (server_config["host"], port) return urlparse.urlunsplit((protocol, host, path, query, fragment))
def get(self): # TODO(vadimsh): Show some prettier page. This code path is hit when user # clicks "Deny" on consent page. error = self.request.get('error') if error: self.abort(400, detail='OpenID login error: %s' % error) # Validate inputs. code = self.request.get('code') if not code: self.abort(400, detail='Missing "code" parameter') state = self.request.get('state') if not state: self.abort(400, detai='Missing "state" parameter') try: state = validate_state(state) except OpenIDError as e: self.abort(400, detail=str(e)) # Callback URI is hardcoded in OAuth2 client config and must always point # to default version. Yet we want to support logging to non-default versions # that have different hostnames. Do some redirect dance here to pass control # to required version if necessary (so that it can set cookie on # non-default version domain). Same handler with same params, just with # different hostname. For most common case of logging in into default # version this should not trigger. if self.request.host_url != state['host_url']: # Replace 'scheme' and 'netloc' of this_url with host_url values. host_url = urlparse.urlsplit(state['host_url']) this_url = urlparse.urlsplit(self.request.url) target_url = urlparse.urlunsplit(host_url[:2] + this_url[2:]) self.redirect(target_url) return # Grab user profile from the code. try: userinfo = handle_authorization_code(get_cached_config(), code) except OpenIDError as e: self.abort(500 if e.transient else 400, detail=str(e)) # Strictly speaking dest_url was already validated when put into state. # Double check this. dest_url = state['dest_url'] try: dest_url = normalize_dest_url(self.request.host_url, dest_url) except ValueError as e: self.abort(400, detail='Bad redirect URL: %s' % e) # Ignore non https:// URLs for pictures. We serve all pages over HTTPS and # don't want to break this rule just for a pretty picture. Google userinfo # endpoint always returns https:// URL anyway. pic = userinfo.get('picture') if pic and not pic.startswith('https://'): pic = None # Google avatars sometimes look weird if used directly. Resized version # always looks fine. 's64' is documented, for example, here: # https://cloud.google.com/appengine/docs/python/images if pic and pic.endswith('/photo.jpg'): pic = pic.rstrip('/photo.jpg') + '/s64/photo.jpg' userinfo['picture'] = pic # Close previous session (if any), create a new one. close_session(self.request.cookies.get(COOKIE_NAME)) session = make_session(userinfo, SessionCookie.expiration_sec) # Make cookie expire a bit earlier than the the session, to avoid # "bad token" due to minor clock drifts between the server and the client. self.response.set_cookie(key=COOKIE_NAME, value=make_session_cookie(session), expires=session.expiration_ts - datetime.timedelta(seconds=300), secure=not utils.is_local_dev_server(), httponly=True) nuke_gae_cookies(self.response) self.redirect(dest_url)
def join_URL_frags(base, query): split = list(urlparse.urlsplit(base)) split[2] = (split[2] + query).replace('//', '/') return urlparse.urlunsplit(split)
def update_repodata(bucketName, key, operation): if key.rfind("/") > -1: fileName = key[key.rfind("/") + 1:] repoPath = key[:key.rfind("/")] else: fileName = key repoPath = "" (name, version, release, epoch, arch) = splitFilename(fileName) logger.debug("fileName={0}".format(fileName)) logger.debug("repoPath={0}".format(repoPath)) tmpdir = tempfile.mkdtemp() s3base = urlparse.urlunsplit(("s3", bucketName, repoPath, "", "")) s3grabber = S3Grabber(s3base) # Set up temporary repo that will fetch repodata from s3 yumbase = yum.YumBase() yumbase.preconf.disabled_plugins = '*' yumbase.conf.cachedir = os.path.join(tmpdir, 'cache') yumbase.repos.disableRepo('*') repo = yumbase.add_enable_repo('s3') repo._grab = s3grabber repo._urls = [os.path.join(s3base, '')] # Ensure that missing base path doesn't cause trouble repo._sack = yum.sqlitesack.YumSqlitePackageSack( createrepo.readMetadata.CreaterepoPkgOld) # Create metadata generator mdconf = createrepo.MetaDataConfig() mdconf.directory = tmpdir mdconf.pkglist = yum.packageSack.MetaSack() mdgen = createrepo.MetaDataGenerator(mdconf, LoggerCallback()) mdgen.tempdir = tmpdir mdgen._grabber = s3grabber new_packages = yum.packageSack.PackageSack() if operation == "add": # Combine existing package sack with new rpm file list newpkg = mdgen.read_in_package(os.path.join(s3base, fileName)) newpkg._baseurl = '' # don't leave s3 base urls in primary metadata new_packages.addPackage(newpkg) else: # Remove deleted package logger.debug("Delete package {0}".format(key)) older_pkgs = yumbase.pkgSack.searchNevra(name=name) for i, older in enumerate(older_pkgs, 1): if older.version == version and older.release == release: yumbase.pkgSack.delPackage(older) mdconf.pkglist.addSack('existing', yumbase.pkgSack) mdconf.pkglist.addSack('new', new_packages) # Write out new metadata to tmpdir mdgen.doPkgMetadata() mdgen.doRepoMetadata() mdgen.doFinalMove() # Replace metadata on s3 s3grabber.syncdir(os.path.join(tmpdir, 'repodata'), 'repodata') shutil.rmtree(tmpdir)
def no_dynamic(cls, url): """Removes the fragment, params and query part of the url""" parsed = urlsplit(url) return urlunsplit( parsed[:3] + ('', '') )
DEFAULT_USERNAME, DEFAULT_PASSWORD = os_environ['CSAUTH'].split(':') except: DEFAULT_USERNAME, DEFAULT_PASSWORD = USERNAME, PASSWORD # You can override this value by running: # $ export CSURI='https://zrh.cloudsigma.com/api/2.0/' URL = 'https://zrh.cloudsigma.com/api/2.0/' try: DEFAULT_URL = os_environ['CSURI'] except: DEFAULT_URL = URL DEFAULT_URL_UPLOAD = list(urlparse.urlsplit(DEFAULT_URL)) DEFAULT_URL_UPLOAD[1] = 'direct.' + DEFAULT_URL_UPLOAD[1] DEFAULT_URL_UPLOAD = urlparse.urlunsplit(DEFAULT_URL_UPLOAD) def make_request(a_type, a_url, a_endpoint, a_username, a_password, a_data=None, a_params=None, a_headers=None, a_verify=True, a_verbose=False): """ Makes request by setting appropriate url and auth.
def make_url_with_extraparam(url, k, v): scheme, netloc, path, query_string, fragment = urlsplit(url) query_params = parse_qs(query_string) query_params[k] = [v] new_query_string = urlencode(query_params, doseq=True) return urlunsplit((scheme, netloc, path, new_query_string, fragment))