def process_response(self, request, response, spider): if request.meta.get('dont_redirect', False): return response if request.method == 'HEAD': if response.status in [301, 302, 303, 307] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = request.replace(url=redirected_url) return self._redirect(redirected, request, spider, response.status) else: return response referer = request.url if response.status in [302, 303] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = self._redirect_request_using_get(request, redirected_url) redirected = self._redirect(redirected, request, spider, response.status) redirected.headers['Referer'] = referer return redirected referer = request.url if response.status in [301, 307] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = request.replace(url=redirected_url) redirected = self._redirect(redirected, request, spider, response.status) redirected.headers['Referer'] = referer return redirected return response
def buildDiscover(base_url, out_dir): """Convert all files in a directory to apache mod_asis files in another directory.""" test_data = discoverdata.readTests(discoverdata.default_test_file) def writeTestFile(test_name): template = test_data[test_name] data = discoverdata.fillTemplate( test_name, template, base_url, discoverdata.example_xrds) out_file_name = os.path.join(out_dir, test_name) out_file = open(out_file_name, 'w') out_file.write(data) manifest = [manifest_header] for success, input_name, id_name, result_name in discoverdata.testlist: if not success: continue writeTestFile(input_name) input_url = urljoin(base_url, input_name) id_url = urljoin(base_url, id_name) result_url = urljoin(base_url, result_name) manifest.append('\t'.join((input_url, id_url, result_url))) manifest.append('\n') manifest_file_name = os.path.join(out_dir, 'manifest.txt') manifest_file = open(manifest_file_name, 'w') for chunk in manifest: manifest_file.write(chunk) manifest_file.close()
def _legacy_interact(self, location, error_info): visit_url = urljoin(location, error_info.info.visit_url) wait_url = urljoin(location, error_info.info.wait_url) method_urls = { "interactive": visit_url } if (len(self._interaction_methods) > 1 or self._interaction_methods[0].kind() != WEB_BROWSER_INTERACTION_KIND): # We have several possible methods or we only support a non-window # method, so we need to fetch the possible methods supported by # the discharger. method_urls = _legacy_get_interaction_methods(visit_url) for interactor in self._interaction_methods: kind = interactor.kind() if kind == WEB_BROWSER_INTERACTION_KIND: # This is the old name for browser-window interaction. kind = "interactive" if not isinstance(interactor, LegacyInteractor): # Legacy interaction mode isn't supported. continue visit_url = method_urls.get(kind) if visit_url is None: continue visit_url = urljoin(location, visit_url) interactor.legacy_interact(self, location, visit_url) return _wait_for_macaroon(wait_url) raise InteractionError('no methods supported; supported [{}]; provided [{}]'.format( ' '.join([x.kind() for x in self._interaction_methods]), ' '.join(method_urls.keys()), ))
def _send_batch(self, destination, events): ''' Makes a single batch API request with the given list of events. The `destination` argument contains the write key, API host and dataset name used to build the request.''' start = time.time() status_code = 0 try: url = urljoin(urljoin(destination.api_host, "/1/batch/"), destination.dataset) payload = [] for ev in events: event_time = ev.created_at.isoformat() if ev.created_at.tzinfo is None: event_time += "Z" payload.append({ "time": event_time, "samplerate": ev.sample_rate, "data": ev.fields()}) self.log("firing batch, size = %d", len(payload)) resp = self.session.post( url, headers={"X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json"}, data=json.dumps(payload, default=json_default_handler), timeout=10.0, ) status_code = resp.status_code resp.raise_for_status() statuses = [{"status": d.get("status"), "error": d.get("error")} for d in resp.json()] for ev, status in zip(events, statuses): self._enqueue_response(status.get("status"), "", status.get("error"), start, ev.metadata) except Exception as e: # Catch all exceptions and hand them to the responses queue. self._enqueue_errors(status_code, e, start, events)
def get_subscriptions(address): url = urljoin(MAILMAN_INSTANCE, "3.1/members/find?subscriber={}".format(address)) response = requests.get(url, auth=MAILMAN_AUTH) if response.status_code >= 300: log.error("Could not get URL %s: %d %s", url, response.status_code, response.reason) return [] result = response.json() subscriptions = [] for entry in result.get("entries", []): subscription = { "list_id": entry["list_id"], "role": entry["role"], "delivery_mode": entry["delivery_mode"], } # Get the subscription's preferences member_id = entry["member_id"] pref_url = urljoin(MAILMAN_INSTANCE, "3.1/members/{}/preferences".format(member_id)) pref_response = requests.get(pref_url, auth=MAILMAN_AUTH) pref_result = pref_response.json() if pref_response.status_code >= 300: log.error("Could not get URL %s: %d %s", pref_url, pref_response.status_code, pref_response.reason) else: subscription["preferences"] = dict([ (key, pref_result[key]) for key in pref_result if key not in ("http_etag", "self_link") ]) subscriptions.append(subscription) return subscriptions
def test_href_template(self): self.headers = { 'Client-ID': uuidutils.generate_uuid(), 'X-Project-ID': '8383830383' } body = self.simulate_get(self.url_prefix, headers=self.headers) self.assertEqual(falcon.HTTP_200, self.srmock.status) resp = jsonutils.loads(body[0]) queue_href_template = resp['resources']['rel/queue']['href-template'] path_1 = 'https://zaqar.example.com' + self.url_prefix path_2 = 'https://zaqar.example.com' + self.url_prefix + '/' # Verify all the href template start with the correct version prefix def get_href_or_template(resource): return resource.get('href-template', '') or resource['href'] for resource in list(resp['resources']): self.assertTrue( get_href_or_template(resp['resources'][resource]). startswith(self.url_prefix)) url = urlparse.urljoin(path_1, queue_href_template) expected = ('https://zaqar.example.com' + self.url_prefix + '/queues/foo') self.assertEqual(expected, url.format(queue_name='foo')) url = urlparse.urljoin(path_2, queue_href_template) self.assertEqual(expected, url.format(queue_name='foo'))
def rel_to_abs(self, base_url): """ Converts relative links from html contents to absolute links """ # Delete target attributes strip_attributes(self.tree, "target") # Absolute links self.tree.rewrite_links( lambda link: urljoin(base_url, link) if not link.startswith(self.rel_to_abs_excluded_prefixes) else link ) # Extra attributes onclick_elements = self.tree.xpath("//*[@onclick]") for element in onclick_elements: # Replace attribute with absolute URL element.set( "onclick", self.javascript_open_re.sub( lambda match: "%s%s%s" % (match.group("opening"), urljoin(base_url, match.group("url")), match.group("ending")), element.get("onclick"), ), )
def __init__(self, username=None, serverloc=None, userapikey="nokey"): # This logic is based on ContinuumModelsClient.__init__ and # mpl.PlotClient.__init__. There is some merged functionality here # since a Session is meant to capture the little bit of lower-level # logic in PlotClient (i.e. avoiding handling of things like # _newxyplot()), but also build in the functionality of the # ContinuumModelsClient. self.username = username self.root_url = serverloc self.http_session = requests.session() self.http_session.headers.update( {"content-type": "application/json", "BOKEHUSER-API-KEY": userapikey, "BOKEHUSER": username} ) if self.root_url: url = urljoin(self.root_url, "/bokeh/userinfo/") self.userinfo = utils.get_json(self.http_session.get(url, verify=False)) else: logger.info("Not using a server, plots will only work in embedded mode") self.userinfo = None self.docid = None self.plotcontext = None self.apikey = None self.bbclient = None # reference to a ContinuumModelsClient self.base_url = urljoin(self.root_url, "/bokeh/bb/") self.raw_js_objs = [] super(PlotServerSession, self).__init__()
def export(self, ds, requestor, notify): """ This function exports data as FITS files. To do this, the function binds metadata (keywords) to images (arrays) to create FITS files and then serves the FITS files at jsoc.stanford.edu. Written by Monica Bobra and Art Amezcua 19 July 2016 Parameters ---------- requestor: string Username of requestor. notify : string E-mail address of requestor. ds : string Name of the data series. Returns ------- supath : list List containing paths to all the requested FITS files. """ # test to see if the user's e-mail address is registered with jsoc.stanford.edu test_email_query = 'http://jsoc.stanford.edu/cgi-bin/ajax/checkAddress.sh?address='+quote_plus(notify)+'&checkonly=1' response = urlopen(test_email_query) data = json.loads(response.read()) if (data['status'] == 4): raise RuntimeError('User e-mail address is not registered with jsoc.stanford.edu') query = '?' + urlencode({'op': 'exp_request', 'protocol': 'fits', 'format': 'json', 'method': 'url', 'requestor': requestor, 'notify': notify, 'ds': ds}) req = self._json_request(self._url_jsoc_fetch + query) # waiting for the request to be ready if (int(req.data['status']) == 1 or int(req.data['status']) == 2): if 'requestid' in req.data: query = '?' + urlencode({'op': 'exp_status', 'requestid': req.data['requestid']}) supath = [] print('Waiting for the request to be ready. Please allow at least 20 seconds.') time.sleep(15) while True : req = self._json_request(self._url_jsoc_fetch + query) if (int(req.data['status']) == 1 or int(req.data['status']) == 2 or int(req.data['status']) == 6): time.sleep(5) elif (int(req.data['status']) == 0): dir = req.data['dir'] for dataobj in (req.data['data']): supath.append(urljoin(self.baseurl,os.path.join(req.data['dir'],dataobj['filename']))) break else: print(type(req.data['status'])) if (req.data['status'] == 3): raise RuntimeError('DRMS Query failed, request size is too large, status=%s' % req.data['status']) if (req.data['status'] == 4): raise RuntimeError('DRMS Query failed, request not formed correctly, status=%s' % req.data['status']) if (req.data['status'] == 5): raise RuntimeError('DRMS Query failed, export request expired, status=%s' % req.data['status']) else: raise RuntimeError('DRMS Query failed, there is no requestid, status=%s' % req.data['status']) else: raise RuntimeError('DRMS Query failed, series is not a valid series, status=%s' % req.data['status']) print("All the data are available at:") print(str(urljoin(self.baseurl,req.data['dir']))) return supath
def list_directory(urlpath, filepath): """Helper to produce a directory listing (absent index.html). Return value is either a file object, or None (indicating an wsgierror). In either case, the headers are sent, making the interface the same as for send_head(). """ path = urlpath.rstrip('/') + '/' listdir = os.listdir(filepath) dirlist = [] filelist = [] for file in listdir: if os.path.isdir(os.path.join(path, file)): dirlist.append(file) else: filelist.append(file) dirlist.sort() filelist.sort() res = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">\n' res += '<html><head><title>{0}</title></head><body>\n'.format(path) res += '<big><strong>Listing %s</strong></big><br>\n' % (path) if path != '/': item = '..' res += 'D <a href=%s>%s</a><br/>\n' % (urljoin(path, item), item) for item in dirlist: res += 'D <a href=%s>%s</a><br/>\n' % (urljoin(path, item), item) for item in filelist: res += 'F <a href=%s>%s</a><br/>\n' % (urljoin(path, item), item) res += '</body></html>' return str(res)
def handleSection(self, section, items): locales = items['locales'] if locales == 'all': inipath = '/'.join(( items['repo'], items['mozilla'], 'raw-file', 'default', items['l10n.ini'] )) ini = ConfigParser() ini.readfp(urlopen(inipath)) allpath = urljoin( urljoin(inipath, ini.get('general', 'depth')), ini.get('general', 'all')) locales = urlopen(allpath).read() locales = locales.split() obs = (Active.objects .filter(run__tree__code=section) .exclude(run__locale__code__in=locales) .order_by('run__locale__code')) obslocs = ' '.join(obs.values_list('run__locale__code', flat=True)) if not obslocs: self.stdout.write(' OK\n') return s = input('Remove %s? [Y/n] ' % obslocs) if s.lower() == 'y' or s == '': obs.delete()
def process_response(self, request, response, spider): if (request.meta.get('dont_redirect', False) or response.status in getattr(spider, 'handle_httpstatus_list', []) or response.status in request.meta.get('handle_httpstatus_list', []) or request.meta.get('handle_httpstatus_all', False)): return response if request.method == 'HEAD': if response.status in [301, 302, 303, 307] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = request.replace(url=redirected_url) return self._redirect(redirected, request, spider, response.status) else: return response if response.status in [302, 303] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = self._redirect_request_using_get(request, redirected_url) return self._redirect(redirected, request, spider, response.status) if response.status in [301, 307] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = request.replace(url=redirected_url) return self._redirect(redirected, request, spider, response.status) return response
def _get_form_url(form, url): if url is None: action = form.get('action') if action is None: return form.base_url return urljoin(form.base_url, strip_html5_whitespace(action)) return urljoin(form.base_url, url)
def __init__(self): self.verify_https = os.environ.get('OAUTHLIB_INSECURE_TRANSPORT', '') == "" if self.verify_https and os.environ.get("REQUESTS_CA_BUNDLE", "").strip() != "": self.verify_https = os.environ["REQUESTS_CA_BUNDLE"].strip() self.jwt_enable = six.text_type(os.environ.get('CKAN_OAUTH2_JWT_ENABLE', toolkit.config.get('ckan.oauth2.jwt.enable',''))).strip().lower() in ("true", "1", "on") self.legacy_idm = six.text_type(os.environ.get('CKAN_OAUTH2_LEGACY_IDM', toolkit.config.get('ckan.oauth2.legacy_idm', ''))).strip().lower() in ("true", "1", "on") self.authorization_endpoint = six.text_type(os.environ.get('CKAN_OAUTH2_AUTHORIZATION_ENDPOINT', toolkit.config.get('ckan.oauth2.authorization_endpoint', ''))).strip() self.token_endpoint = six.text_type(os.environ.get('CKAN_OAUTH2_TOKEN_ENDPOINT', toolkit.config.get('ckan.oauth2.token_endpoint', ''))).strip() self.profile_api_url = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_URL', toolkit.config.get('ckan.oauth2.profile_api_url', ''))).strip() self.client_id = six.text_type(os.environ.get('CKAN_OAUTH2_CLIENT_ID', toolkit.config.get('ckan.oauth2.client_id', ''))).strip() self.client_secret = six.text_type(os.environ.get('CKAN_OAUTH2_CLIENT_SECRET', toolkit.config.get('ckan.oauth2.client_secret', ''))).strip() self.scope = six.text_type(os.environ.get('CKAN_OAUTH2_SCOPE', toolkit.config.get('ckan.oauth2.scope', ''))).strip() self.rememberer_name = six.text_type(os.environ.get('CKAN_OAUTH2_REMEMBER_NAME', toolkit.config.get('ckan.oauth2.rememberer_name', 'auth_tkt'))).strip() self.profile_api_user_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_USER_FIELD', toolkit.config.get('ckan.oauth2.profile_api_user_field', ''))).strip() self.profile_api_fullname_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_FULLNAME_FIELD', toolkit.config.get('ckan.oauth2.profile_api_fullname_field', ''))).strip() self.profile_api_mail_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_MAIL_FIELD', toolkit.config.get('ckan.oauth2.profile_api_mail_field', ''))).strip() self.profile_api_groupmembership_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_GROUPMEMBERSHIP_FIELD', toolkit.config.get('ckan.oauth2.profile_api_groupmembership_field', ''))).strip() self.sysadmin_group_name = six.text_type(os.environ.get('CKAN_OAUTH2_SYSADMIN_GROUP_NAME', toolkit.config.get('ckan.oauth2.sysadmin_group_name', ''))).strip() self.redirect_uri = urljoin(urljoin(toolkit.config.get('ckan.site_url', 'http://localhost:5000'), toolkit.config.get('ckan.root_path')), constants.REDIRECT_URL) # Init db db.init_db(model) missing = [key for key in REQUIRED_CONF if getattr(self, key, "") == ""] if missing: raise ValueError("Missing required oauth2 conf: %s" % ", ".join(missing)) elif self.scope == "": self.scope = None
def startElementNS(self, name, qname, attrs): stack = self.stack stack.append(ElementHandler()) current = self.current parent = self.parent base = attrs.get(BASE, None) if base is not None: base, frag = urldefrag(base) if parent and parent.base: base = urljoin(parent.base, base) else: systemId = self.locator.getPublicId() \ or self.locator.getSystemId() if systemId: base = urljoin(systemId, base) else: if parent: base = parent.base if base is None: systemId = self.locator.getPublicId() \ or self.locator.getSystemId() if systemId: base, frag = urldefrag(systemId) current.base = base language = attrs.get(LANG, None) if language is None: if parent: language = parent.language current.language = language current.start(name, qname, attrs)
def fix_auth_url_version(auth_url): """Fix up the auth url if an invalid or no version prefix was given. People still give a v2 auth_url even when they specify that they want v3 authentication. Fix the URL to say v3 in this case and add version if it is missing entirely. This should be smarter and use discovery. """ # Check for empty path component in endpoint URL and add keystone version # to endpoint: as of Kilo, the identity URLs returned by Keystone might no # longer contain API versions, leaving the version choice up to the user. if urlparse.urlparse(auth_url).path.rstrip('/') == '': if get_keystone_version() >= 3: auth_url = urlparse.urljoin(auth_url, 'v3') else: auth_url = urlparse.urljoin(auth_url, 'v2.0') if get_keystone_version() >= 3: if has_in_url_path(auth_url, "/v2.0"): LOG.warning("The settings.py file points to a v2.0 keystone " "endpoint, but v3 is specified as the API version " "to use. Using v3 endpoint for authentication.") auth_url = url_path_replace(auth_url, "/v2.0", "/v3", 1) return auth_url
def _extract_links(self, selector, response_url, response_encoding, base_url): ''' Pretty much the same function, just added 'ignore' to to_native_str() ''' links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue # added 'ignore' to encoding errors url = to_native_str(url, encoding=response_encoding, errors='ignore') # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=rel_has_nofollow(el.get('rel'))) links.append(link) return self._deduplicate_if_needed(links)
def open_in_browser(config_obj, jql_query, query_log_path, new_tab=False): """ Open browser in JIRA with the retrieved keys from Stash as url-params :param config_obj: :param jql_query: :return: """ jira_url = config_obj.jira_url if len(jql_query) < OPEN_IN_BROWSER_BELOW: params = { 'jql': jql_query } b_url = urljoin(urljoin(jira_url, 'issues/'), '?' + urlencode(params)) if new_tab: webbrowser.open(b_url, new=2) else: webbrowser.open(b_url, new=0) else: click.echo("Too much data to open in browser.") click.echo("Query saved to " + query_log_path) with open(query_log_path, 'a') as f: f.write(jql_query + "\n\n")
def request_raw(method, path, params=None, body=None, headers=None, handle_errors=True, auto_retry=True): kwargs = { 'params': params, 'data': body, 'headers': headers, 'verify': analyzere.tls_verify, } username = analyzere.username password = analyzere.password if username and password: kwargs['auth'] = (username, password) resp = requests.request(method, urljoin(analyzere.base_url, path), **kwargs) # Handle HTTP 503 with the Retry-After header by automatically retrying # request after sleeping for the recommended amount of time retry_after = resp.headers.get('Retry-After') while auto_retry and (resp.status_code == 503 and retry_after): time.sleep(float(retry_after)) # Repeat original request after Retry-After time has elapsed. resp = requests.request(method, urljoin(analyzere.base_url, path), **kwargs) retry_after = resp.headers.get('Retry-After') if handle_errors and (not 200 <= resp.status_code < 300): handle_api_error(resp, resp.status_code) return resp
def get_journal_about_page_url(about_page_id=0, auth=True): """ Return url to journal about page. If auth=True, the url will redirect through the journals service log in page which will prevent the "purchase now" button being shown. If auth=False, the url will point to Journal About Page with purchase button shown Arguments: about_page_id (int): id of Journal About Page as found in Discovery auth (boolen): authorization flag, if true will force login to journal service and redirect to last visited page in Journal after login. If false, this method will return direct url to journal about page. Returns: url (str): url pointing to Journals Service login, w/ a redirect to last visited journal page or url pointing directly to journal about page. """ if not auth: return urljoin(get_journals_frontend_url(), '{id}/about'.format(id=about_page_id)) # by providing just the about_page_id in the url, the user will be redirected # to the last page viewed after logging in about_page_url = urljoin(get_journals_frontend_url(), '{id}'.format(id=about_page_id)) login_url = urljoin(get_journals_root_url(), 'require_auth') query = 'forward={next_url}'.format(next_url=about_page_url) split_url = urlsplit(login_url) url = urlunsplit(( split_url.scheme, split_url.netloc, split_url.path, query, split_url.fragment, )) return url
def open(self, filename=None): if filename is None: filename = self._base_uri else: if self._file_type == 's3': filename = urljoin(self._base_uri.replace( 's3://', 'http://'), filename.replace('\\', '/')).replace('http://', 's3://') elif self._file_type == 'http': filename = urljoin(self._base_uri, filename.replace('\\', '/')) else: filename = os.path.abspath(os.path.join(os.path.dirname( self._base_uri.replace('\\', '/')), filename.replace('\\', '/'))) f = None if self._file_type == 's3': uri_header, uri_body = filename.split('://', 1) us = uri_body.split('/') bucketname = us.pop(0) key = '/'.join(us) logger.info('Opening {}'.format(key)) f = StringIO(self._s3_bucket.Object(key).get()['Body'].read()) elif self._file_type == 'http': f = request.urlopen(filename) else: f = open(filename, 'rb') yield f f.close()
def process_response(self, request, response, spider): if request.meta.get('dont_redirect', False): return response if request.method == 'HEAD': if response.status in [301, 302, 303, 307] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = request.replace(url=redirected_url) return self._redirect(redirected, request, spider, response.status) else: return response if response.status in [302, 303] and 'Location' in response.headers: if (response.headers['Location'] == "http://store.steampowered.com/") or \ (response.headers['Location'] == "http://store.steampowered.com") or \ ('video' in response.headers['Location']): # log.msg("Ignored home page / video redirect!") raise IgnoreRequest() redirected_url = urljoin(request.url, response.headers['location']) redirected = self._redirect_request_using_get(request, redirected_url) return self._redirect(redirected, request, spider, response.status) if response.status in [301, 307] and 'Location' in response.headers: redirected_url = urljoin(request.url, response.headers['location']) redirected = request.replace(url=redirected_url) return self._redirect(redirected, request, spider, response.status) return response
def add_absolute_urls(results, request=None): for hit in results: base_url = get_template_url(hit['vendor'], hit['name'], hit['version'], hit['template_uri'], request=request) hit['uri'] = "/".join((hit['vendor'], hit['name'], hit['version'])) hit['image'] = urljoin(base_url, hit['image']) hit['smartphoneimage'] = urljoin(base_url, hit['smartphoneimage'])
def _extract_links(self, selector, response_url, response_encoding, base_url): ''' Pretty much the same function, just added 'ignore' to url.encode ''' links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue if isinstance(url, unicode): # add 'ignore' to encoding errors url = url.encode(response_encoding, 'ignore') # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=True if el.get('rel') == 'nofollow' else False) links.append(link) return unique_list(links, key=lambda link: link.url) \ if self.unique else links
def _post_request(self, route, data, description): url = urlparse.urljoin(self.base_url, route) extra_logger.info('[%s] %s: POST %s: %s', self.label, description, url, json.dumps(data)) resp = self.session.post(urlparse.urljoin(self.base_url, route), data=json.dumps(data), auth=(self.api_key, ''), timeout=self.request_timeout, ) return self._handle_resp(resp)
def ci_artifacts(job): url = urljoin(urljoin(APPVEYOR_API_JOB_URL, job['jobId'] + '/'), 'artifacts/') response = urlopen(url) files = json.loads(response.read().decode('utf-8')) # py3 compat response.close() for file_ in files: file_['url'] = urljoin(url, file_['fileName']) return files
def absolutize(self, uri, defrag=1): base = urljoin("file:", pathname2url(os.getcwd())) result = urljoin("%s/" % base, uri, allow_fragments=not defrag) if defrag: result = urldefrag(result)[0] if not defrag: if uri and uri[-1] == "#" and result[-1] != "#": result = "%s#" % result return URIRef(result)
def browse_directory(dir_url): name_dict = {} try: with closing(urlopen(dir_url)) as urlpath: string_from_url = urlpath.read() except HTTPError: logger.exception("Skipping: %r", dir_url) return None rhevm_pattern = re.compile(r'<a href="?\'?([^"\']*(?:rhevm\.ova|ovirt)[^"\'>]*)') rhevm_image_name = rhevm_pattern.findall(string_from_url) rhos_pattern = re.compile(r'<a href="?\'?([^"\']*(?:rhos|openstack|rhelosp)[^"\'>]*)') rhos_image_name = rhos_pattern.findall(string_from_url) scvmm_pattern = re.compile(r'<a href="?\'?([^"\']*hyperv[^"\'>]*)') scvmm_image_name = scvmm_pattern.findall(string_from_url) vsphere_pattern = re.compile(r'<a href="?\'?([^"\']*vsphere[^"\'>]*)') vsphere_image_name = vsphere_pattern.findall(string_from_url) google_pattern = re.compile(r'<a href="?\'?([^"\']*gce[^"\'>]*)') google_image_name = google_pattern.findall(string_from_url) ec2_pattern = re.compile(r'<a href="?\'?([^"\']*ec2[^"\'>]*)') ec2_image_name = ec2_pattern.findall(string_from_url) openshift_pattern = re.compile(r'<a href="?\'?(openshift-pods/*)') openshift_image_name = openshift_pattern.findall(string_from_url) if len(rhevm_image_name) is not 0: name_dict['template_upload_rhevm'] = rhevm_image_name[0] if len(rhos_image_name) is not 0: name_dict['template_upload_rhos'] = rhos_image_name[0] if len(scvmm_image_name) is not 0: name_dict['template_upload_scvmm'] = scvmm_image_name[0] if len(vsphere_image_name) is not 0: name_dict['template_upload_vsphere'] = vsphere_image_name[0] if len(google_image_name) is not 0: name_dict['template_upload_gce'] = google_image_name[0] if len(ec2_image_name) is not 0: name_dict['template_upload_ec2'] = ec2_image_name[0] if len(openshift_image_name) is not 0: name_dict['template_upload_openshift'] = openshift_image_name[0] for key, val in name_dict.items(): name_dict[key] = urljoin(dir_url, val) for key in name_dict.keys(): if key == 'template_upload_openshift': # this is necessary because headers don't contain last-modified date for folders # cfme-template is disposed in templates everywhere except 'latest' in 5.9 # todo: remove this along with refactoring script if '5.8' in name_dict[key] or ('5.9' in name_dict[key] and 'latest' in name_dict[key]): url = urljoin(name_dict[key], 'cfme-template.yaml') else: url = urljoin(name_dict[key], 'templates/cfme-template.yaml') else: url = name_dict[key] date = urlopen(url).info().getdate('last-modified') name_dict[key + "_date"] = "%02d" % date[1] + "%02d" % date[2] return name_dict
def interact(self, ctx, location, ir_err): '''Implement Interactor.interact by opening the browser window and waiting for the discharge token''' p = ir_err.interaction_method(self.kind(), WebBrowserInteractionInfo) if not location.endswith('/'): location += '/' visit_url = urljoin(location, p.visit_url) wait_token_url = urljoin(location, p.wait_token_url) self._open_web_browser(visit_url) return self._wait_for_token(ctx, wait_token_url)
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): if base_url is None: base_url = urljoin(response_url, self.base_url) if self.base_url else response_url clean_url = lambda u: urljoin(base_url, replace_entities(clean_link(u.decode(response_encoding)))) clean_text = lambda t: replace_escape_chars(remove_tags(t.decode(response_encoding))).strip() links_text = linkre.findall(response_text) return [Link(clean_url(url).encode(response_encoding), clean_text(text)) for url, _, text in links_text]
def absolute_uri(url=None): if not url: return options.get('system.url-prefix') return urljoin( options.get('system.url-prefix').rstrip('/') + '/', url.lstrip('/'))
def expand_frames(self, frames, release): last_token = None token = None cache = self.cache sourcemaps = self.sourcemaps all_errors = [] sourcemap_applied = False for frame in frames: errors = cache.get_errors(frame.abs_path) if errors: all_errors.extend(errors) # can't fetch source if there's no filename present if not frame.abs_path: continue source = self.get_source(frame.abs_path, release) if source is None: logger.debug('No source found for %s', frame.abs_path) continue sourcemap_url, sourcemap_view = sourcemaps.get_link(frame.abs_path) if sourcemap_view and frame.colno is None: all_errors.append({ 'type': EventError.JS_NO_COLUMN, 'url': expose_url(frame.abs_path), }) elif sourcemap_view: last_token = token if is_data_uri(sourcemap_url): sourcemap_label = frame.abs_path else: sourcemap_label = sourcemap_url sourcemap_label = expose_url(sourcemap_label) try: # Errors are 1-indexed in the frames, so we need to -1 to get # zero-indexed value from tokens. assert frame.lineno > 0, "line numbers are 1-indexed" token = sourcemap_view.lookup_token( frame.lineno - 1, frame.colno) except Exception: token = None all_errors.append({ 'type': EventError.JS_INVALID_SOURCEMAP_LOCATION, 'column': frame.colno, 'row': frame.lineno, 'source': frame.abs_path, 'sourcemap': sourcemap_label, }) # Store original data in annotation # HACK(dcramer): we stuff things into raw which gets popped off # later when adding the raw_stacktrace attribute. raw_frame = frame.to_json() frame.data = { 'raw': raw_frame, 'sourcemap': sourcemap_label, } sourcemap_applied = True if token is not None: abs_path = urljoin(sourcemap_url, token.src) logger.debug( 'Mapping compressed source %r to mapping in %r', frame.abs_path, abs_path) source = self.get_source(abs_path, release) if not source: errors = cache.get_errors(abs_path) if errors: all_errors.extend(errors) else: all_errors.append({ 'type': EventError.JS_MISSING_SOURCE, 'url': expose_url(abs_path), }) if token is not None: # Token's return zero-indexed lineno's frame.lineno = token.src_line + 1 frame.colno = token.src_col # The offending function is always the previous function in the stack # Honestly, no idea what the bottom most frame is, so we're ignoring that atm if last_token: frame.function = last_token.name or frame.function else: frame.function = token.name or frame.function filename = token.src # special case webpack support # abs_path will always be the full path with webpack:/// prefix. # filename will be relative to that if abs_path.startswith('webpack:'): filename = abs_path # webpack seems to use ~ to imply "relative to resolver root" # which is generally seen for third party deps # (i.e. node_modules) if '/~/' in filename: filename = '~/' + abs_path.split('/~/', 1)[-1] else: filename = filename.split('webpack:///', 1)[-1] # As noted above, '~/' means they're coming from node_modules, # so these are not app dependencies if filename.startswith('~/'): frame.in_app = False # And conversely, local dependencies start with './' elif filename.startswith('./'): frame.in_app = True # Update 'raw' copy to have same in_app status raw_frame['in_app'] = frame.in_app # We want to explicitly generate a webpack module name frame.module = generate_module(filename) frame.abs_path = abs_path frame.filename = filename if not frame.module and abs_path.startswith( ('http:', 'https:', 'webpack:')): frame.module = generate_module(abs_path) elif sourcemap_url: frame.data = { 'sourcemap': expose_url(sourcemap_url), } # TODO: theoretically a minified source could point to another mapped, minified source frame.pre_context, frame.context_line, frame.post_context = get_source_context( source=source, lineno=frame.lineno, colno=frame.colno or 0) if not frame.context_line and source: all_errors.append({ 'type': EventError.JS_INVALID_SOURCEMAP_LOCATION, 'column': frame.colno, 'row': frame.lineno, 'source': frame.abs_path, }) return all_errors, sourcemap_applied
def _url(target, action): return urljoin(target['url'], action)
def discover_sourcemap(result): """ Given a UrlResult object, attempt to discover a sourcemap. """ # When coercing the headers returned by urllib to a dict # all keys become lowercase so they're normalized sourcemap = result.headers.get('sourcemap', result.headers.get('x-sourcemap')) if not sourcemap: parsed_body = result.body.split('\n') # Source maps are only going to exist at either the top or bottom of the document. # Technically, there isn't anything indicating *where* it should exist, so we # are generous and assume it's somewhere either in the first or last 5 lines. # If it's somewhere else in the document, you're probably doing it wrong. if len(parsed_body) > 10: possibilities = parsed_body[:5] + parsed_body[-5:] else: possibilities = parsed_body # We want to scan each line sequentially, and the last one found wins # This behavior is undocumented, but matches what Chrome and Firefox do. for line in possibilities: if line[:21] in ('//# sourceMappingURL=', '//@ sourceMappingURL='): # We want everything AFTER the indicator, which is 21 chars long sourcemap = line[21:].rstrip() # If we still haven't found anything, check end of last line AFTER source code. # This is not the literal interpretation of the spec, but browsers support it. # e.g. {code}//# sourceMappingURL={url} if not sourcemap: # Only look at last 300 characters to keep search space reasonable (minified # JS on a single line could be tens of thousands of chars). This is a totally # arbitrary number / best guess; most sourceMappingURLs are relative and # not very long. search_space = possibilities[-1][-300:].rstrip() match = SOURCE_MAPPING_URL_RE.search(search_space) if match: sourcemap = match.group(1) if sourcemap: # react-native shoves a comment at the end of the # sourceMappingURL line. # For example: # sourceMappingURL=app.js.map/*ascii:...*/ # This comment is completely out of spec and no browser # would support this, but we need to strip it to make # people happy. if '/*' in sourcemap and sourcemap[-2:] == '*/': index = sourcemap.index('/*') # comment definitely shouldn't be the first character, # so let's just make sure of that. if index == 0: raise AssertionError( 'react-native comment found at bad location: %d, %r' % (index, sourcemap)) sourcemap = sourcemap[:index] # fix url so its absolute sourcemap = urljoin(result.url, sourcemap) return sourcemap
def UploadFile(request): """上传文件""" if not request.method == "POST": return HttpResponse(json.dumps(u"{'state:'ERROR'}"), content_type="application/javascript") state = "SUCCESS" action = request.GET.get("action") # 上传文件 upload_field_name = { "uploadfile": "fileFieldName", "uploadimage": "imageFieldName", "uploadscrawl": "scrawlFieldName", "catchimage": "catcherFieldName", "uploadvideo": "videoFieldName", } UploadFieldName = request.GET.get( upload_field_name[action], USettings.UEditorUploadSettings.get(action, "upfile")) # 上传涂鸦,涂鸦是采用base64编码上传的,需要单独处理 if action == "uploadscrawl": upload_file_name = "scrawl.png" upload_file_size = 0 else: # 取得上传的文件 file = request.FILES.get(UploadFieldName, None) if file is None: return HttpResponse(json.dumps(u"{'state:'ERROR'}"), content_type="application/javascript") upload_file_name = file.name upload_file_size = file.size # 取得上传的文件的原始名称 upload_original_name, upload_original_ext = os.path.splitext( upload_file_name) # 文件类型检验 upload_allow_type = { "uploadfile": "fileAllowFiles", "uploadimage": "imageAllowFiles", "uploadvideo": "videoAllowFiles" } if action in upload_allow_type: allow_type = list( request.GET.get( upload_allow_type[action], USettings.UEditorUploadSettings.get(upload_allow_type[action], ""))) if not upload_original_ext in allow_type: state = u"服务器不允许上传%s类型的文件。" % upload_original_ext # 大小检验 upload_max_size = { "uploadfile": "filwMaxSize", "uploadimage": "imageMaxSize", "uploadscrawl": "scrawlMaxSize", "uploadvideo": "videoMaxSize" } max_size = long( request.GET.get( upload_max_size[action], USettings.UEditorUploadSettings.get(upload_max_size[action], 0))) if max_size != 0: from .utils import FileSize MF = FileSize(max_size) if upload_file_size > MF.size: state = u"上传文件大小不允许超过%s。" % MF.FriendValue # 检测保存路径是否存在,如果不存在则需要创建 upload_path_format = { "uploadfile": "filePathFormat", "uploadimage": "imagePathFormat", "uploadscrawl": "scrawlPathFormat", "uploadvideo": "videoPathFormat" } path_format_var = get_path_format_vars() path_format_var.update({ "basename": upload_original_name, "extname": upload_original_ext[1:], "filename": upload_file_name, }) # 取得输出文件的路径 OutputPathFormat, OutputPath, OutputFile = get_output_path( request, upload_path_format[action], path_format_var) # 所有检测完成后写入文件 if state == "SUCCESS": if action == "uploadscrawl": state = save_scrawl_file(request, os.path.join(OutputPath, OutputFile)) else: # 保存到文件中,如果保存错误,需要返回ERROR upload_module_name = USettings.UEditorUploadSettings.get( "upload_module", None) if upload_module_name: mod = import_module(upload_module_name) state = mod.upload(file, OutputPathFormat) else: state = save_upload_file(file, os.path.join(OutputPath, OutputFile)) # 返回数据 return_info = { # 保存后的文件名称 'url': urljoin(USettings.gSettings.MEDIA_URL, OutputPathFormat), 'original': upload_file_name, # 原始文件名 'type': upload_original_ext, 'state': state, # 上传状态,成功时返回SUCCESS,其他任何值将原样返回至图片上传框中 'size': upload_file_size } return HttpResponse(json.dumps(return_info, ensure_ascii=False), content_type="application/javascript")
def catcher_remote_image(request): """远程抓图,当catchRemoteImageEnable:true时, 如果前端插入图片地址与当前web不在同一个域,则由本函数从远程下载图片到本地 """ if not request.method == "POST": return HttpResponse(json.dumps(u"{'state:'ERROR'}"), content_type="application/javascript") state = "SUCCESS" allow_type = list( request.GET.get( "catcherAllowFiles", USettings.UEditorUploadSettings.get("catcherAllowFiles", ""))) max_size = long( request.GET.get( "catcherMaxSize", USettings.UEditorUploadSettings.get("catcherMaxSize", 0))) remote_urls = request.POST.getlist("source[]", []) catcher_infos = [] path_format_var = get_path_format_vars() for remote_url in remote_urls: # 取得上传的文件的原始名称 remote_file_name = os.path.basename(remote_url) remote_original_name, remote_original_ext = os.path.splitext( remote_file_name) # 文件类型检验 if remote_original_ext in allow_type: path_format_var.update({ "basename": remote_original_name, "extname": remote_original_ext[1:], "filename": remote_original_name }) # 计算保存的文件名 o_path_format, o_path, o_file = get_output_path( request, "catcherPathFormat", path_format_var) o_filename = os.path.join(o_path, o_file).replace("\\", "/") # 读取远程图片文件 try: remote_image = urlopen(remote_url) # 将抓取到的文件写入文件 try: f = open(o_filename, 'wb') f.write(remote_image.read()) f.close() state = "SUCCESS" except Exception as E: state = u"写入抓取图片文件错误:%s" % E.message except Exception as E: state = u"抓取图片错误:%s" % E.message catcher_infos.append({ "state": state, "url": urljoin(USettings.gSettings.MEDIA_URL, o_path_format), "size": os.path.getsize(o_filename), "title": os.path.basename(o_file), "original": remote_file_name, "source": remote_url }) return_info = { "state": "SUCCESS" if len(catcher_infos) > 0 else "ERROR", "list": catcher_infos } return HttpResponse(json.dumps(return_info, ensure_ascii=False), content_type="application/javascript")
def contents(owner, repo): r = requests.get(urljoin(GitHubAPI.API_URL, 'repos/{}/{}/contents'.format(owner, repo))) return r.json()
def __init__(self): self.name = "darknet" self.source_url = _urlparse.urljoin(MODELS_URL_ROOT, "darknet.mlmodel") self.source_md5 = "a06761976a0472cf0553b64ecc15b0fe"
def __init__(self): self.source_url = _urlparse.urljoin( MODELS_URL_ROOT, "drawing_classifier_pre_trained_model_245_classes_v0.mlmodel", ) self.source_md5 = "fc1c04126728514c47991a62b9e66715"
def _get_form_url(form, url): if url is None: return urljoin(form.base_url, form.action) return urljoin(form.base_url, url)
import client import dom_parser2, os,re from bs4 import BeautifulSoup buildDirectory = utils.buildDir #CODE BY NEMZZY AND ECHO dialog = xbmcgui.Dialog() translatePath = xbmc.translatePath if PY2 else xbmcvfs.translatePath filename = os.path.basename(__file__).split('.')[0] base_domain = 'https://eporner.com' base_name = base_domain.replace('www.',''); base_name = re.findall('(?:\/\/|\.)([^.]+)\.',base_name)[0].title() type = 'video' menu_mode = 200 content_mode = 201 player_mode = 801 search_tag = 1 search_base = urljoin(base_domain,'search/%s')#.replace(' ','-') @utils.url_dispatcher.register('%s' % menu_mode) def menu(): lover.checkupdates() try: url = urljoin(base_domain, 'categories/') c = client.request(url) soup = BeautifulSoup(c, 'html5lib') content = soup.find_all('div', class_={'ctbinner'}) if ( not content ): log_utils.log('Scraping Error in %s:: Content of request: %s' % (base_name.title(),str(c)), log_utils.LOGERROR) kodi.notify(msg='Scraping Error: Info Added To Log File', duration=6000, sound=True) quit()
# Run tasks in-process, without sending them to the queue (i.e., synchronously). CELERY_ALWAYS_EAGER = True # END CELERY # Use production settings for asset compression so that asset compilation can be tested on the CI server. COMPRESS_ENABLED = True COMPRESS_OFFLINE = True # Comprehensive theme settings for testing environment COMPREHENSIVE_THEME_DIRS = [ Path(DJANGO_ROOT + "/tests/themes"), Path(DJANGO_ROOT + "/tests/themes-dir-2"), ] DEFAULT_SITE_THEME = "test-theme" ENTERPRISE_API_URL = urljoin(ENTERPRISE_SERVICE_URL, 'api/v1/') # Don't bother sending fake events to Segment. Doing so creates unnecessary threads. SEND_SEGMENT_EVENTS = False # SPEED DEBUG = False TEMPLATE_DEBUG = False CELERY_EAGER_PROPAGATES_EXCEPTIONS = True BROKER_BACKEND = 'memory' #SAILTHRU settings SAILTHRU_KEY = 'abc123' SAILTHRU_SECRET = 'top_secret'
def process_frame(self, processable_frame, processing_task): frame = processable_frame.frame token = None cache = self.cache sourcemaps = self.sourcemaps all_errors = [] sourcemap_applied = False # can't fetch source if there's no filename present or no line if not frame.get('abs_path') or not frame.get('lineno'): return # can't fetch if this is internal node module as well # therefore we only process user-land frames (starting with /) # or those created by bundle/webpack internals if self.data.get('platform') == 'node' and \ not frame.get('abs_path').startswith(('/', 'app:', 'webpack:')): return errors = cache.get_errors(frame['abs_path']) if errors: all_errors.extend(errors) # This might fail but that's okay, we try with a different path a # bit later down the road. source = self.get_sourceview(frame['abs_path']) in_app = None new_frame = dict(frame) raw_frame = dict(frame) sourcemap_url, sourcemap_view = sourcemaps.get_link(frame['abs_path']) self.sourcemaps_touched.add(sourcemap_url) if sourcemap_view and frame.get('colno') is None: all_errors.append({ 'type': EventError.JS_NO_COLUMN, 'url': http.expose_url(frame['abs_path']), }) elif sourcemap_view: if is_data_uri(sourcemap_url): sourcemap_label = frame['abs_path'] else: sourcemap_label = sourcemap_url sourcemap_label = http.expose_url(sourcemap_label) if frame.get('function'): minified_function_name = frame['function'] minified_source = self.get_sourceview(frame['abs_path']) else: minified_function_name = minified_source = None try: # Errors are 1-indexed in the frames, so we need to -1 to get # zero-indexed value from tokens. assert frame['lineno'] > 0, "line numbers are 1-indexed" token = sourcemap_view.lookup(frame['lineno'] - 1, frame['colno'] - 1, minified_function_name, minified_source) except Exception: token = None all_errors.append({ 'type': EventError.JS_INVALID_SOURCEMAP_LOCATION, 'column': frame.get('colno'), 'row': frame.get('lineno'), 'source': frame['abs_path'], 'sourcemap': sourcemap_label, }) # persist the token so that we can find it later processable_frame.data['token'] = token # Store original data in annotation new_frame['data'] = dict(frame.get('data') or {}, sourcemap=sourcemap_label) sourcemap_applied = True if token is not None: abs_path = urljoin(sourcemap_url, token.src) logger.debug('Mapping compressed source %r to mapping in %r', frame['abs_path'], abs_path) source = self.get_sourceview(abs_path) if source is None: errors = cache.get_errors(abs_path) if errors: all_errors.extend(errors) else: all_errors.append({ 'type': EventError.JS_MISSING_SOURCE, 'url': http.expose_url(abs_path), }) if token is not None: # the tokens are zero indexed, so offset correctly new_frame['lineno'] = token.src_line + 1 new_frame['colno'] = token.src_col + 1 # Try to use the function name we got from symbolic original_function_name = token.function_name # In the ideal case we can use the function name from the # frame and the location to resolve the original name # through the heuristics in our sourcemap library. if original_function_name is None: last_token = None # Find the previous token for function name handling as a # fallback. if processable_frame.previous_frame and \ processable_frame.previous_frame.processor is self: last_token = processable_frame.previous_frame.data.get( 'token') if last_token: original_function_name = last_token.name if original_function_name is not None: new_frame['function'] = original_function_name filename = token.src # special case webpack support # abs_path will always be the full path with webpack:/// prefix. # filename will be relative to that if abs_path.startswith('webpack:'): filename = abs_path # webpack seems to use ~ to imply "relative to resolver root" # which is generally seen for third party deps # (i.e. node_modules) if '/~/' in filename: filename = '~/' + abs_path.split('/~/', 1)[-1] else: filename = filename.split('webpack:///', 1)[-1] # As noted above: # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies # * [node] sames goes for `./node_modules/`, which is used when bundling node apps # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals # eg. webpack:///webpack/bootstrap, webpack:///external if filename.startswith('~/') or \ filename.startswith('./node_modules/') or \ not filename.startswith('./'): in_app = False # And conversely, local dependencies start with './' elif filename.startswith('./'): in_app = True # We want to explicitly generate a webpack module name new_frame['module'] = generate_module(filename) if abs_path.startswith('app:'): if filename and NODE_MODULES_RE.search(filename): in_app = False else: in_app = True new_frame['abs_path'] = abs_path new_frame['filename'] = filename if not frame.get('module') and abs_path.startswith( ('http:', 'https:', 'webpack:', 'app:')): new_frame['module'] = generate_module(abs_path) elif sourcemap_url: new_frame['data'] = dict(new_frame.get('data') or {}, sourcemap=http.expose_url(sourcemap_url)) # TODO: theoretically a minified source could point to # another mapped, minified source changed_frame = self.expand_frame(new_frame, source=source) # If we did not manage to match but we do have a line or column # we want to report an error here. if not new_frame.get('context_line') \ and source and \ new_frame.get('colno') is not None: all_errors.append({ 'type': EventError.JS_INVALID_SOURCEMAP_LOCATION, 'column': new_frame['colno'], 'row': new_frame['lineno'], 'source': new_frame['abs_path'], }) changed_raw = sourcemap_applied and self.expand_frame(raw_frame) if sourcemap_applied or all_errors or changed_frame or \ changed_raw: if in_app is not None: new_frame['in_app'] = in_app raw_frame['in_app'] = in_app return [new_frame ], [raw_frame] if changed_raw else None, all_errors
def skeletonize(packages, output_dir=".", version=None, recursive=False, all_urls=False, pypi_url='https://pypi.io/pypi/', noprompt=False, version_compare=False, python_version=default_python, manual_url=False, all_extras=False, noarch_python=False, config=None, setup_options=None, extra_specs=[], pin_numpy=False): package_dicts = {} if not setup_options: setup_options = [] if isinstance(setup_options, string_types): setup_options = [setup_options] if not config: config = Config() created_recipes = [] while packages: package = packages.pop() created_recipes.append(package) is_url = ':' in package if is_url: package_pypi_url = '' else: package_pypi_url = urljoin(pypi_url, '/'.join((package, 'json'))) if not is_url: dir_path = join(output_dir, package.lower()) if exists(dir_path) and not version_compare: raise RuntimeError("directory already exists: %s" % dir_path) d = package_dicts.setdefault(package, { 'packagename': package.lower(), 'run_depends': '', 'build_depends': '', 'entry_points': '', 'test_commands': '', 'tests_require': '', }) if is_url: del d['packagename'] if is_url: d['version'] = 'UNKNOWN' # Make sure there is always something to pass in for this pypi_data = {} else: sort_by_version = lambda l: sorted(l, key=parse_version) pypi_resp = requests.get(package_pypi_url) if pypi_resp.status_code != 200: sys.exit("Request to fetch %s failed with status: %d" % (package_pypi_url, pypi_resp.status_code)) pypi_data = pypi_resp.json() versions = sort_by_version(pypi_data['releases'].keys()) if version_compare: version_compare(versions) if version: if version not in versions: sys.exit("Error: Version %s of %s is not available on PyPI." % (version, package)) d['version'] = version else: # select the most visible version from PyPI. if not versions: sys.exit("Error: Could not find any versions of package %s" % package) if len(versions) > 1: print("Warning, the following versions were found for %s" % package) for ver in versions: print(ver) print("Using %s" % versions[-1]) print("Use --version to specify a different version.") d['version'] = versions[-1] data, d['pypiurl'], d['filename'], d['digest'] = get_download_data(pypi_data, package, d['version'], is_url, all_urls, noprompt, manual_url) d['import_tests'] = '' # Get summary and description directly from the metadata returned # from PyPI. summary will be pulled from package information in # get_package_metadata or a default value set if it turns out that # data['summary'] is empty. d['summary'] = data.get('summary', '') d['description'] = data.get('description', '') get_package_metadata(package, d, data, output_dir, python_version, all_extras, recursive, created_recipes, noarch_python, noprompt, packages, extra_specs, config=config, setup_options=setup_options) # Set these *after* get_package_metadata so that the preferred hash # can be calculated from the downloaded file, if necessary. d['hash_type'] = d['digest'][0] d['hash_value'] = d['digest'][1] # Change requirements to use format that guarantees the numpy # version will be pinned when the recipe is built and that # the version is included in the build string. if pin_numpy: for depends in ['build_depends', 'run_depends']: deps = d[depends] numpy_dep = [idx for idx, dep in enumerate(deps) if 'numpy' in dep] if numpy_dep: # Turns out this needs to be inserted before the rest # of the numpy spec. deps.insert(numpy_dep[0], 'numpy x.x') d[depends] = deps for package in package_dicts: d = package_dicts[package] name = d['packagename'] makedirs(join(output_dir, name)) print("Writing recipe for %s" % package.lower()) with open(join(output_dir, name, 'meta.yaml'), 'w') as f: rendered_recipe = PYPI_META_HEADER.format(**d) ordered_recipe = ruamel_yaml.comments.CommentedMap() # Create all keys in expected ordered for key in EXPECTED_SECTION_ORDER: try: ordered_recipe[key] = PYPI_META_STATIC[key] except KeyError: ordered_recipe[key] = ruamel_yaml.comments.CommentedMap() if d['entry_points']: ordered_recipe['build']['entry_points'] = d['entry_points'] if noarch_python: ordered_recipe['build']['noarch'] = 'python' ordered_recipe['build']['script'] = 'python setup.py install ' + ' '.join(setup_options) if any(re.match(r'^setuptools(?:\s|$)', req) for req in d['build_depends']): ordered_recipe['build']['script'] += ('--single-version-externally-managed ' '--record=record.txt') # Always require python as a dependency ordered_recipe['requirements'] = ruamel_yaml.comments.CommentedMap() ordered_recipe['requirements']['build'] = ['python'] + ensure_list(d['build_depends']) ordered_recipe['requirements']['run'] = ['python'] + ensure_list(d['run_depends']) if d['import_tests']: ordered_recipe['test']['imports'] = d['import_tests'] if d['test_commands']: ordered_recipe['test']['commands'] = d['test_commands'] if d['tests_require']: ordered_recipe['test']['requires'] = d['tests_require'] ordered_recipe['about'] = ruamel_yaml.comments.CommentedMap() for key in ABOUT_ORDER: try: ordered_recipe['about'][key] = d[key] except KeyError: ordered_recipe['about'][key] = '' ordered_recipe['extra']['recipe-maintainers'] = '' # Prune any top-level sections that are empty for key in EXPECTED_SECTION_ORDER: if not ordered_recipe[key]: del ordered_recipe[key] else: rendered_recipe += ruamel_yaml.dump({key: ordered_recipe[key]}, Dumper=ruamel_yaml.RoundTripDumper, default_flow_style=False, width=200) rendered_recipe += '\n' # make sure that recipe ends with one newline, by god. rendered_recipe.rstrip() # This hackery is necessary because # - the default indentation of lists is not what we would like. # Ideally we'd contact the ruamel.yaml auther to find the right # way to do this. See this PR thread for more: # https://github.com/conda/conda-build/pull/2205#issuecomment-315803714 # Brute force fix below. # Fix the indents recipe_lines = [] for line in rendered_recipe.splitlines(): match = re.search('^\s+(-) ', line, flags=re.MULTILINE) if match: pre, sep, post = line.partition('-') sep = ' ' + sep line = pre + sep + post recipe_lines.append(line) rendered_recipe = '\n'.join(recipe_lines) f.write(rendered_recipe)
def album_url(albumid): return urljoin(BASE_URL, 'release/' + albumid)
def track_url(trackid): return urljoin(BASE_URL, 'recording/' + trackid)
def _spider(url, visited, root, depth, max_depth, raise_on_error): """Fetches URL and any pages it links to up to max_depth. depth should initially be zero, and max_depth is the max depth of links to follow from the root. Prints out a warning only if the root can't be fetched; it ignores errors with pages that the root links to. Returns a tuple of: - pages: dict of pages visited (URL) mapped to their full text. - links: set of links encountered while visiting the pages. """ pages = {} # dict from page URL -> text content. links = set() # set of all links seen on visited pages. # root may end with index.html -- chop that off. if root.endswith('/index.html'): root = re.sub('/index.html$', '', root) try: context = None if sys.version_info < (2, 7, 9) or \ ((3,) < sys.version_info < (3, 4, 3)): if not spack.insecure: tty.warn("Spack will not check SSL certificates. You need to " "update your Python to enable certificate " "verification.") else: # We explicitly create default context to avoid error described in # https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html context = ssl._create_unverified_context() \ if spack.insecure \ else ssl.create_default_context() # Make a HEAD request first to check the content type. This lets # us ignore tarballs and gigantic files. # It would be nice to do this with the HTTP Accept header to avoid # one round-trip. However, most servers seem to ignore the header # if you ask for a tarball with Accept: text/html. req = Request(url) req.get_method = lambda: "HEAD" resp = _urlopen(req, timeout=_timeout, context=context) if "Content-type" not in resp.headers: tty.debug("ignoring page " + url) return pages, links if not resp.headers["Content-type"].startswith('text/html'): tty.debug("ignoring page " + url + " with content type " + resp.headers["Content-type"]) return pages, links # Do the real GET request when we know it's just HTML. req.get_method = lambda: "GET" response = _urlopen(req, timeout=_timeout, context=context) response_url = response.geturl() # Read the page and and stick it in the map we'll return page = response.read().decode('utf-8') pages[response_url] = page # Parse out the links in the page link_parser = LinkParser() subcalls = [] link_parser.feed(page) while link_parser.links: raw_link = link_parser.links.pop() abs_link = urljoin(response_url, raw_link.strip()) links.add(abs_link) # Skip stuff that looks like an archive if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES): continue # Skip things outside the root directory if not abs_link.startswith(root): continue # Skip already-visited links if abs_link in visited: continue # If we're not at max depth, follow links. if depth < max_depth: subcalls.append((abs_link, visited, root, depth + 1, max_depth, raise_on_error)) visited.add(abs_link) if subcalls: pool = NonDaemonPool(processes=len(subcalls)) try: results = pool.map(_spider_wrapper, subcalls) for sub_pages, sub_links in results: pages.update(sub_pages) links.update(sub_links) finally: pool.terminate() pool.join() except URLError as e: tty.debug(e) if hasattr(e, 'reason') and isinstance(e.reason, ssl.SSLError): tty.warn("Spack was unable to fetch url list due to a certificate " "verification problem. You can try running spack -k, " "which will not check SSL certificates. Use this at your " "own risk.") if raise_on_error: raise NoNetworkConnectionError(str(e), url) except HTMLParseError as e: # This error indicates that Python's HTML parser sucks. msg = "Got an error parsing HTML." # Pre-2.7.3 Pythons in particular have rather prickly HTML parsing. if sys.version_info[:3] < (2, 7, 3): msg += " Use Python 2.7.3 or newer for better HTML parsing." tty.warn(msg, url, "HTMLParseError: " + str(e)) except Exception as e: # Other types of errors are completely ignored, except in debug mode. tty.debug("Error in _spider: %s:%s" % (type(e), e), traceback.format_exc()) return pages, links
def setup_injection_workflow(workflow, output_dir=None, inj_section_name='injections', exttrig_file=None, tags=None): """ This function is the gateway for setting up injection-generation jobs in a workflow. It should be possible for this function to support a number of different ways/codes that could be used for doing this, however as this will presumably stay as a single call to a single code (which need not be inspinj) there are currently no subfunctions in this moudle. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the coincidence jobs will be added to. output_dir : path The directory in which injection files will be stored. inj_section_name : string (optional, default='injections') The string that corresponds to the option describing the exe location in the [executables] section of the .ini file and that corresponds to the section (and sub-sections) giving the options that will be given to the code at run time. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. This will be used in output names. Returns -------- inj_files : pycbc.workflow.core.FileList The list of injection files created by this call. inj_tags : list of strings The tag corresponding to each injection file and used to uniquely identify them. The FileList class contains functions to search based on tags. """ if tags is None: tags = [] logging.info("Entering injection module.") make_analysis_dir(output_dir) # Get full analysis segment for output file naming full_segment = workflow.analysis_time ifos = workflow.ifos # Identify which injections to do by presence of sub-sections in # the configuration file inj_tags = [] inj_files = FileList([]) for section in workflow.cp.get_subsections(inj_section_name): inj_tag = section.upper() curr_tags = tags + [inj_tag] # Parse for options in ini file injection_method = workflow.cp.get_opt_tags("workflow-injections", "injections-method", curr_tags) if injection_method in ["IN_WORKFLOW", "AT_RUNTIME"]: # FIXME: Add ability to specify different exes inj_job = LalappsInspinjExecutable(workflow.cp, inj_section_name, out_dir=output_dir, ifos='HL', tags=curr_tags) node = inj_job.create_node(full_segment) if injection_method == "AT_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] inj_files.append(inj_file) elif injection_method == "PREGENERATED": injectionFilePath = workflow.cp.get_opt_tags( "workflow-injections", "injections-pregenerated-file", curr_tags) injectionFilePath = resolve_url(injectionFilePath) file_url = urljoin('file:', pathname2url(injectionFilePath)) inj_file = File('HL', 'PREGEN_inj_file', full_segment, file_url, tags=curr_tags) inj_file.PFN(injectionFilePath, site='local') inj_files.append(inj_file) elif injection_method in ["IN_COH_PTF_WORKFLOW", "AT_COH_PTF_RUNTIME"]: inj_job = LalappsInspinjExecutable(workflow.cp, inj_section_name, out_dir=output_dir, ifos=ifos, tags=curr_tags) node = inj_job.create_node(full_segment, exttrig_file) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] if workflow.cp.has_option("workflow-injections", "em-bright-only"): em_filter_job = PycbcDarkVsBrightInjectionsExecutable( workflow.cp, 'em_bright_filter', tags=curr_tags, out_dir=output_dir, ifos=ifos) node = em_filter_job.create_node(inj_file, full_segment, curr_tags) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] if workflow.cp.has_option("workflow-injections", "do-jitter-skyloc"): jitter_job = LigolwCBCJitterSkylocExecutable( workflow.cp, 'jitter_skyloc', tags=curr_tags, out_dir=output_dir, ifos=ifos) node = jitter_job.create_node(inj_file, full_segment, curr_tags) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] if workflow.cp.has_option("workflow-injections", "do-align-total-spin"): align_job = LigolwCBCAlignTotalSpinExecutable( workflow.cp, 'align_total_spin', tags=curr_tags, out_dir=output_dir, ifos=ifos) node = align_job.create_node(inj_file, full_segment, curr_tags) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] inj_files.append(inj_file) else: err = "Injection method must be one of IN_WORKFLOW, " err += "AT_RUNTIME or PREGENERATED. Got %s." % (injection_method) raise ValueError(err) inj_tags.append(inj_tag) logging.info("Leaving injection module.") return inj_files, inj_tags
def _get_system_stats(self, server, name, instance, tags): url = urljoin(server, "/_node/{0}/_system".format(name)) # Fetch _system (Erlang) stats. return self.agent_check.get(url, instance, tags)
def check(self, instance): kubelet_conn_info = get_connection_info() endpoint = kubelet_conn_info.get('url') if endpoint is None: raise CheckException( "Unable to detect the kubelet URL automatically.") self.kube_health_url = urljoin(endpoint, KUBELET_HEALTH_PATH) self.node_spec_url = urljoin(endpoint, NODE_SPEC_PATH) self.pod_list_url = urljoin(endpoint, POD_LIST_PATH) self.stats_url = urljoin(endpoint, STATS_PATH) self.instance_tags = instance.get('tags', []) self.kubelet_credentials = KubeletCredentials(kubelet_conn_info) # Test the kubelet health ASAP self._perform_kubelet_check(self.instance_tags) if 'cadvisor_metrics_endpoint' in instance: self.cadvisor_scraper_config['prometheus_url'] = instance.get( 'cadvisor_metrics_endpoint', urljoin(endpoint, CADVISOR_METRICS_PATH)) else: self.cadvisor_scraper_config['prometheus_url'] = instance.get( 'metrics_endpoint', urljoin(endpoint, CADVISOR_METRICS_PATH)) if 'metrics_endpoint' in instance: self.log.warning( 'metrics_endpoint is deprecated, please specify cadvisor_metrics_endpoint instead.' ) self.kubelet_scraper_config['prometheus_url'] = instance.get( 'kubelet_metrics_endpoint', urljoin(endpoint, KUBELET_METRICS_PATH)) # Kubelet credentials handling self.kubelet_credentials.configure_scraper( self.cadvisor_scraper_config) self.kubelet_credentials.configure_scraper(self.kubelet_scraper_config) # Legacy cadvisor support try: self.cadvisor_legacy_url = self.detect_cadvisor( endpoint, self.cadvisor_legacy_port) except Exception as e: self.log.debug( 'cAdvisor not found, running in prometheus mode: %s', e) self.pod_list = self.retrieve_pod_list() self.pod_list_utils = PodListUtils(self.pod_list) self._report_node_metrics(self.instance_tags) self._report_pods_running(self.pod_list, self.instance_tags) self._report_container_spec_metrics(self.pod_list, self.instance_tags) self._report_container_state_metrics(self.pod_list, self.instance_tags) self.stats = self._retrieve_stats() self._report_ephemeral_storage_usage(self.pod_list, self.stats, self.instance_tags) self._report_system_container_metrics(self.stats, self.instance_tags) if self.cadvisor_legacy_url: # Legacy cAdvisor self.log.debug('processing legacy cadvisor metrics') self.process_cadvisor(instance, self.cadvisor_legacy_url, self.pod_list, self.pod_list_utils) elif self.cadvisor_scraper_config['prometheus_url']: # Prometheus self.log.debug('processing cadvisor metrics') self.process(self.cadvisor_scraper_config, metric_transformers=self.transformers) if self.kubelet_scraper_config['prometheus_url']: # Prometheus self.log.debug('processing kubelet metrics') self.process(self.kubelet_scraper_config, metric_transformers=self.transformers) # Free up memory self.pod_list = None self.pod_list_utils = None