def unquote_folder_paths(state, schema): try: NodeSettings = state.get_model('addons_googledrive', 'nodesettings') targets = NodeSettings.objects.filter(folder_path__isnull=False) except LookupError: return for obj in targets: try: obj.folder_path = unquote(obj.folder_path).decode('utf-8') except UnicodeEncodeError: obj.folder_path = unquote(obj.folder_path) bulk_update(targets, update_fields=['folder_path'])
def from_bytes(cls, bytes): """ Parse a URL from some bytes. """ try: # this belongs on the first thing likely to cause a (Type)Error scheme, _, rest = bytes.strip().partition(b":") except Exception: exception = InvalidURL("{!r} is not a valid URL".format(bytes)) raise_with_traceback(exception) if scheme and not rest.startswith(b"//"): raise InvalidURL( "{!r} is not a valid URL without initial '//'".format(bytes), ) authority, slash, rest = rest[2:].partition(b"/") userinfo, _, host_and_port = authority.rpartition(b"@") username, _, password = userinfo.partition(b":") if host_and_port.startswith(b"["): # IPv6 Host host, delimiter, port_str = host_and_port.partition(b"]:") host += b"]" if delimiter else b"" else: host, _, port_str = host_and_port.partition(b":") if not port_str: port = None else: try: port = int(unquote(port_str)) except ValueError: raise InvalidURL("{!r} is not a valid port".format(port_str)) path, _, rest = rest.partition(b"?") query, _, fragment = rest.partition(b"#") return cls.normalized( scheme=scheme, username=username, password=password, host=host, port=port, path=unquote(slash + path), query=parse_qs(query, keep_blank_values=True), fragment=unquote_plus(fragment), unnormalized=bytes, unnormalized_authority=authority, unnormalized_userinfo=userinfo, )
def caidao_decode(data, *args, **kwargs): p = PrintCollector() data_dict = query_str_2_dict(data.strip()) d = {} for k, v in data_dict.items(): v = unquote(v) try: x = force_bytes(v) missing_padding = len(v) % 4 if missing_padding != 0: x += b'=' * (4 - missing_padding) d[k] = force_text(base64.decodebytes(x)) except Exception as e: print(e) d[k] = v z0_raw = '' if 'z0' in d: z0_raw = d['z0'] d['z0'] = ';\n'.join(d['z0'].split(';')) for k, v in d.items(): value = '{}:\n{}\n'.format(k, v) p.print(value) if k == 'z0': if value != 'z0:\n{}\n'.format(z0_raw): p.print('z0_raw:\n{}\n'.format(z0_raw)) return p.smart_output()
def save_file(self, url_parsed, res): try: md5 = hashlib.md5(res.body).hexdigest() netloc = url_parsed.netloc site_dir_name = netloc.replace('.', '_').replace(':', '_') if url_parsed.path == '': path = site_dir_name + '/' else: path = site_dir_name + url_parsed.path path = unquote(path) if path.endswith('/'): path = path + 'index.html' dir_name = os.path.dirname(path) if md5 in self.file_md5_dict and self.file_md5_dict[md5] == path: return logger.warning('saved: {}'.format(path)) # 相同目录下文件夹名不能跟文件名一样,因为无法保存,需要改名 if os.path.exists(path) and os.path.isdir(path): path = path + '_' + str(uuid.uuid4())[-3:] elif os.path.exists(dir_name) and os.path.isfile(dir_name): dir_name = dir_name + '_' + str(uuid.uuid4())[-3:] path = dir_name + path.split('/')[-1] if not os.path.exists(dir_name): os.makedirs(dir_name) with open(path, 'wb') as f: f.write(res.body) self.file_md5_dict[md5] = path except Exception as e: logger.error(e) pass
def endpointForURI(self, uri): if uri.scheme in (b"http", b"https", b"ws", b"wss"): defaultport = 443 if uri.scheme in (b"https", b"wss") else 80 host, port = BaseUrl.parsenetloc(uri.netloc, defaultport) endpoint = t_endpoints.HostnameEndpoint(self.reactor, host, port) if defaultport == 443: ssl_supported = hasattr(t_endpoints, "TLSWrapperClientEndpoint") try: from twisted.internet.ssl import optionsForClientTLS except ImportError: ssl_supported = False if not ssl_supported: raise t_error.SchemeNotSupported( "{} not supported (OpenSSL is not available)".format(uri.scheme.decode("utf_8")) ) options = optionsForClientTLS(host.decode("utf_8")) endpoint = t_endpoints.TLSWrapperClientEndpoint(options, endpoint) return endpoint if uri.scheme == b"unix": path = url_parse.unquote(uri.netloc.decode("ascii")) uri.netloc = b"localhost" return t_endpoints.UNIXClientEndpoint(self.reactor, path) raise t_error.SchemeNotSupported("{} not supported (unrecognized)".format(uri.scheme.decode("utf_8")))
def delete(self, session): """ Delete seen entries """ args = seen_delete_parser.parse_args() value = args['value'] is_seen_local = args['is_seen_local'] if value: value = unquote(value) value = '%' + value + '%' seen_entries_list = seen.search(value=value, status=is_seen_local, session=session) if not seen_entries_list.all(): return {'status': 'error', 'message': 'no results to delete'}, 404 for entry in seen_entries_list: try: seen.forget_by_id(entry.id) except ValueError: return { 'status': 'error', 'message': 'Could not delete entry ID {0}'.format(entry.id) }, 500 return {}
def matches(self, entry, regexp, find_from=None, not_regexps=None): """ Check if :entry: has any string fields or strings in a list field that match :regexp: :param entry: Entry instance :param regexp: Compiled regexp :param find_from: None or a list of fields to search from :param not_regexps: None or list of regexps that can NOT match :return: Field matching """ unquote_fields = ['url'] for field in find_from or ['title', 'description']: # Only evaluate lazy fields if find_from has been explicitly specified if not entry.get(field, eval_lazy=find_from): continue # Make all fields into lists for search purposes values = entry[field] if not isinstance(values, list): values = [values] for value in values: if not isinstance(value, basestring): continue if field in unquote_fields: value = unquote(value) # If none of the not_regexps match if regexp.search(value): # Make sure the not_regexps do not match for this field for not_regexp in not_regexps or []: if self.matches(entry, not_regexp, find_from=[field]): entry.trace('Configured not_regexp %s matched, ignored' % not_regexp) break else: # None of the not_regexps matched return field
def on_task_download(self, task, config): config = self.prepare_config(config, task) for entry in task.accepted: ftp_url = urlparse(entry.get('url')) ftp_url = ftp_url._replace(path=unquote(ftp_url.path)) current_path = os.path.dirname(ftp_url.path) try: ftp = self.ftp_connect(config, ftp_url, current_path) except ftplib.all_errors as e: entry.fail("Unable to connect to server : %s" % (e)) break if not os.path.isdir(config['ftp_tmp_path']): log.debug('creating base path: %s' % config['ftp_tmp_path']) os.mkdir(config['ftp_tmp_path']) file_name = os.path.basename(ftp_url.path) try: # Directory ftp = self.check_connection(ftp, config, ftp_url, current_path) ftp.cwd(file_name) self.ftp_walk(ftp, os.path.join(config['ftp_tmp_path'], file_name), config, ftp_url, ftp_url.path) ftp = self.check_connection(ftp, config, ftp_url, current_path) ftp.cwd('..') if config['delete_origin']: ftp.rmd(file_name) except ftplib.error_perm: # File self.ftp_down(ftp, file_name, config['ftp_tmp_path'], config, ftp_url, current_path) ftp.close()
def BuildToken(request, execution_time): """Build an ACLToken from the request.""" # The request.args dictionary will also be filled on HEAD calls. if request.method in ["GET", "HEAD"]: reason = request.args.get("reason", "") elif request.method in ["POST", "DELETE", "PATCH"]: # The header X-GRR-Reason is set in api-service.js. reason = utils.SmartUnicode( urlparse.unquote(request.headers.get("X-Grr-Reason", ""))) # We assume that request.user contains the username that we can trust. # No matter what authentication method is used, the WebAuthManager is # responsible for authenticating the userand setting request.user to # a correct value (see gui/webauth.py). # # The token that's built here will be later used to find an API router, # get the ApiCallHandler from the router, and then to call the handler's # Handle() method. API router will be responsible for all the ACL checks. token = access_control.ACLToken( username=request.user, reason=reason, process="GRRAdminUI", expiry=rdfvalue.RDFDatetime.Now() + execution_time) for field in ["Remote_Addr", "X-Forwarded-For"]: remote_addr = request.headers.get(field, "") if remote_addr: token.source_ips.append(remote_addr) return token
def matches(self, entry, regexp, find_from=None, not_regexps=None): """ Check if :entry: has any string fields or strings in a list field that match :regexp: :param entry: Entry instance :param regexp: Compiled regexp :param find_from: None or a list of fields to search from :param not_regexps: None or list of regexps that can NOT match :return: Field matching """ unquote_fields = ['url'] for field in find_from or ['title', 'description']: # Only evaluate lazy fields if find_from has been explicitly specified if not entry.get(field, eval_lazy=find_from): continue # Make all fields into lists for search purposes values = entry[field] if not isinstance(values, list): values = [values] for value in values: if not isinstance(value, basestring): value = str(value) if field in unquote_fields: value = unquote(value) # If none of the not_regexps match if regexp.search(value): # Make sure the not_regexps do not match for this field for not_regexp in not_regexps or []: if self.matches(entry, not_regexp, find_from=[field]): entry.trace('Configured not_regexp %s matched, ignored' % not_regexp) break else: # None of the not_regexps matched return field
def get(self, session): """ Search for seen entries """ args = seen_search_parser.parse_args() value = args["value"] page = args["page"] page_size = args["page_size"] is_seen_local = args["is_seen_local"] sort_by = args["sort_by"] order = args["order"] # Handle max size limit if page_size > 100: page_size = 100 # Handles default if it explicitly called if order == "desc": order = True else: order = False # Unquotes and prepares value for DB lookup if value: value = unquote(value) value = "%{0}%".format(value) start = page_size * (page - 1) stop = start + page_size kwargs = { "value": value, "status": is_seen_local, "stop": stop, "start": start, "order_by": sort_by, "descending": order, "session": session, } count = seen.search(count=True, **kwargs) raw_seen_entries_list = seen.search(**kwargs) converted_seen_entry_list = [entry.to_dict() for entry in raw_seen_entries_list.all()] pages = int(ceil(count / float(page_size))) actual_size = min(count, page_size) # Invalid page request if page > pages and pages != 0: return {"status": "error", "message": "page %s does not exist" % page}, 404 return jsonify( { "seen_entries": converted_seen_entry_list, "total_number_of_seen_entries": count, "page_size": actual_size, "page_number": page, "total_number_of_pages": pages, } )
def get(self, session): """ Search for seen entries """ args = seen_search_parser.parse_args() value = args['value'] page = args['page'] page_size = args['page_size'] is_seen_local = args['is_seen_local'] sort_by = args['sort_by'] order = args['order'] # Handle max size limit if page_size > 100: page_size = 100 # Handles default if it explicitly called descending = bool(order == 'desc') # Unquotes and prepares value for DB lookup if value: value = unquote(value) value = '%{0}%'.format(value) start = page_size * (page - 1) stop = start + page_size kwargs = { 'value': value, 'status': is_seen_local, 'stop': stop, 'start': start, 'order_by': sort_by, 'descending': descending, 'session': session } count = seen.search(count=True, **kwargs) raw_seen_entries_list = seen.search(**kwargs) converted_seen_entry_list = [ entry.to_dict() for entry in raw_seen_entries_list.all() ] pages = int(ceil(count / float(page_size))) actual_size = min(count, page_size) # Invalid page request if page > pages and pages != 0: return { 'status': 'error', 'message': 'page %s does not exist' % page }, 404 return jsonify({ 'seen_entries': converted_seen_entry_list, 'total_number_of_seen_entries': count, 'page_size': actual_size, 'page_number': page, 'total_number_of_pages': pages })
def folder_name(self): if not self.folder_id: return None if self.folder_id != DEFAULT_ROOT_ID: return unquote(os.path.split(self.folder_path)[1]) else: return '/ (Full OneDrive)'
def get(self, session): """ Search for seen entries """ args = seen_search_parser.parse_args() value = args['value'] page = args['page'] page_size = args['page_size'] is_seen_local = args['is_seen_local'] sort_by = args['sort_by'] order = args['order'] # Handle max size limit if page_size > 100: page_size = 100 # Handles default if it explicitly called if order == 'desc': order = True else: order = False # Unquotes and prepares value for DB lookup if value: value = unquote(value) value = '%{0}%'.format(value) start = page_size * (page - 1) stop = start + page_size kwargs = { 'value': value, 'status': is_seen_local, 'stop': stop, 'start': start, 'order_by': sort_by, 'descending': order, 'session': session } count = seen.search(count=True, **kwargs) raw_seen_entries_list = seen.search(**kwargs) converted_seen_entry_list = [entry.to_dict() for entry in raw_seen_entries_list.all()] pages = int(ceil(count / float(page_size))) actual_size = min(count, page_size) # Invalid page request if page > pages and pages != 0: return {'status': 'error', 'message': 'page %s does not exist' % page}, 404 return jsonify({ 'seen_entries': converted_seen_entry_list, 'total_number_of_seen_entries': count, 'page_size': actual_size, 'page_number': page, 'total_number_of_pages': pages })
def download_entry(self, entry, config, sftp): """ Downloads the file(s) described in entry """ path = unquote(urlparse(entry['url']).path) or '.' delete_origin = config['delete_origin'] recursive = config['recursive'] to = config['to'] if to: try: to = render_from_entry(to, entry) except RenderError as e: log.error('Could not render path: %s' % to) entry.fail(e) return if not sftp.lexists(path): log.error('Remote path does not exist: %s' % path) return if sftp.isfile(path): source_file = remotepath.basename(path) source_dir = remotepath.dirname(path) try: sftp.cwd(source_dir) self.download_file(source_file, to, sftp, delete_origin) except Exception as e: error = 'Failed to download file %s (%s)' % (path, e) log.error(error) entry.fail(error) elif sftp.isdir(path): base_path = remotepath.normpath(remotepath.join(path, '..')) dir_name = remotepath.basename(path) handle_file = partial(self.download_file, dest=to, sftp=sftp, delete_origin=delete_origin) try: sftp.cwd(base_path) sftp.walktree(dir_name, handle_file, self.handle_dir, self.handle_unknown, recursive) except Exception as e: error = 'Failed to download directory %s (%s)' % (path, e) log.error(error) entry.fail(error) return if delete_origin: self.remove_dir(sftp, path) else: log.warning('Skipping unknown file %s' % path)
def download_entry(self, entry, config, sftp): """ Downloads the file(s) described in entry """ path = unquote(urlparse(entry['url']).path) or '.' delete_origin = config['delete_origin'] recursive = config['recursive'] to = config['to'] if to: try: to = render_from_entry(to, entry) except RenderError as e: log.error('Could not render path: %s' % to) entry.fail(e) return if not sftp.lexists(path): log.error('Remote path does not exist: %s' % path) return if sftp.isfile(path): source_file = remotepath.basename(path) source_dir = remotepath.dirname(path) try: sftp.cwd(source_dir) self.download_file(source_file, to, sftp, delete_origin) except Exception as e: error = 'Failed to download file %s (%s)' % (path, e) log.error(error) entry.fail(error) elif sftp.isdir(path): base_path = remotepath.normpath(remotepath.join(path, '..')) dir_name = remotepath.basename(path) handle_file = partial( self.download_file, dest=to, sftp=sftp, delete_origin=delete_origin ) try: sftp.cwd(base_path) sftp.walktree( dir_name, handle_file, self.handle_dir, self.handle_unknown, recursive ) except Exception as e: error = 'Failed to download directory %s (%s)' % (path, e) log.error(error) entry.fail(error) return if delete_origin: self.remove_dir(sftp, path) else: log.warning('Skipping unknown file %s' % path)
def from_bytes(cls, bytes): """ Parse a URL from some bytes. """ scheme, _, rest = bytes.strip().partition(b":") if scheme and not rest.startswith(b"//"): raise InvalidURL( "{!r} is not a valid URL without initial '//'".format(bytes), ) authority, slash, rest = rest[2:].partition(b"/") userinfo, _, host_and_port = authority.rpartition(b"@") username, _, password = userinfo.partition(b":") host, _, port_str = host_and_port.partition(b":") if not port_str: port = None else: try: port = int(unquote(port_str)) except ValueError: raise InvalidURL("{!r} is not a valid port".format(port_str)) path, _, rest = rest.partition(b"?") query, _, fragment = rest.partition(b"#") return cls.normalized( scheme=scheme, username=username, password=password, host=host, port=port, path=unquote(slash + path), query=parse_qs(query, keep_blank_values=True), fragment=unquote_plus(fragment), unnormalized=bytes, authority=authority, userinfo=userinfo, )
def folder_name(self): if not self.folder_id: return None if self.folder_id != DEFAULT_ROOT_ID: # `urllib` does not properly handle unicode. # encode input to `str`, decode output back to `unicode` return unquote(os.path.split( self.folder_path)[1].encode('utf-8')).decode('utf-8') else: return '/ (Full OneDrive)'
def versions(self): versions = {} for version in self.data.get('derivativeInfo'): (version, width, height, size, mimetype, u1, u2, u3, url, filename) = version.split(':') versions[version] = { 'width': width, 'height': height, 'size': size, 'mimetype': mimetype, 'url': unquote(url), 'filename': filename, } return versions
def on_task_download(self, task, config): config = self.prepare_config(config, task) for entry in task.accepted: ftp_url = urlparse(entry.get('url')) ftp_url = ftp_url._replace(path=unquote(ftp_url.path)) current_path = os.path.dirname(ftp_url.path) try: ftp = self.ftp_connect(config, ftp_url, current_path) except ftplib.all_errors as e: entry.fail("Unable to connect to server : %s" % (e)) break to_path = config['ftp_tmp_path'] try: to_path = entry.render(to_path) except RenderError as err: raise plugin.PluginError( "Path value replacement `%s` failed: %s" % (to_path, err.args[0])) # Clean invalid characters with pathscrub plugin to_path = pathscrub(to_path) if not os.path.exists(to_path): log.debug("Creating base path: %s" % to_path) os.makedirs(to_path) if not os.path.isdir(to_path): raise plugin.PluginWarning( "Destination `%s` is not a directory." % to_path) file_name = os.path.basename(ftp_url.path) try: # Directory ftp = self.check_connection(ftp, config, ftp_url, current_path) ftp.cwd(file_name) self.ftp_walk(ftp, os.path.join(to_path, file_name), config, ftp_url, ftp_url.path) ftp = self.check_connection(ftp, config, ftp_url, current_path) ftp.cwd('..') if config['delete_origin']: ftp.rmd(file_name) except ftplib.error_perm: # File self.ftp_down(ftp, file_name, to_path, config, ftp_url, current_path) ftp.close()
def parse_link_rel(url, fn): """ Read through html file ``fn`` downloaded from ``url``, looking for a link tag of the form: <link rel="alternate" type="application/sage" title="currently ignored" href=".../example.sws" /> This function reads ``fn`` looking for such tags and returns a list of dictionaries of the form {'title': from title field in link, 'url': absolute URL to .sws file} for the corresponding ``.sws`` files. Naturally if there are no appropriate link tags found, the returned list is empty. """ class GetLinkRelWorksheets(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.worksheets = [] def handle_starttag(self, tag, attrs): if (tag == 'link' and ('rel', 'alternate') in attrs and ('type', 'application/sage') in attrs): self.worksheets.append({ 'title': [_ for _ in attrs if _[0] == 'title'][0][1], 'url': [_ for _ in attrs if _[0] == 'href'][0][1] }) parser = GetLinkRelWorksheets() with open(fn) as f: parser.feed(f.read()) ret = [] for d in parser.worksheets: sws = d['url'] # is that link a relative URL? if not urlparse(sws).netloc: # unquote-then-quote to avoid turning %20 into %2520, etc ret.append({ 'url': urljoin(url, quote(unquote(sws))), 'title': d['title'] }) else: ret.append({'url': sws, 'title': d['title']}) return ret
def delete(self, session): """ Delete seen entries """ args = seen_base_parser.parse_args() value = args['value'] local = args['local'] if value: value = unquote(value) value = '%' + value + '%' seen_entries_list = db.search(value=value, status=local, session=session) deleted = 0 for se in seen_entries_list: db.forget_by_id(se.id, session=session) deleted += 1 return success_response('successfully deleted %i entries' % deleted)
def on_task_download(self, task, config): config = self.prepare_config(config, task) for entry in task.accepted: ftp_url = urlparse(entry.get('url')) ftp_url = ftp_url._replace(path=unquote(ftp_url.path)) current_path = os.path.dirname(ftp_url.path) try: ftp = self.ftp_connect(config, ftp_url, current_path) except ftplib.all_errors as e: entry.fail("Unable to connect to server : %s" % (e)) break to_path = config['ftp_tmp_path'] try: to_path = entry.render(to_path) except RenderError as err: raise plugin.PluginError("Path value replacement `%s` failed: %s" % (to_path, err.args[0])) # Clean invalid characters with pathscrub plugin to_path = pathscrub(to_path) if not os.path.exists(to_path): log.debug("Creating base path: %s" % to_path) os.makedirs(to_path) if not os.path.isdir(to_path): raise plugin.PluginWarning("Destination `%s` is not a directory." % to_path) file_name = os.path.basename(ftp_url.path) try: # Directory ftp = self.check_connection(ftp, config, ftp_url, current_path) ftp.cwd(file_name) self.ftp_walk(ftp, os.path.join(to_path, file_name), config, ftp_url, ftp_url.path) ftp = self.check_connection(ftp, config, ftp_url, current_path) ftp.cwd('..') if config['delete_origin']: ftp.rmd(file_name) except ftplib.error_perm: # File self.ftp_down(ftp, file_name, to_path, config, ftp_url, current_path) ftp.close()
def lint(): """Run linter on the provided text and return the results.""" if 'text' in request.values: text = unquote(request.values['text']) print(text) try: job = q.enqueue(worker_function, text) print(job) return jsonify(job_id=job.id), 202 except Exception as e: print(e) return False elif 'job_id' in request.values: job = q.fetch_job(request.values['job_id']) if not job: return jsonify(status="error", message="No job with requested job_id."), 404 elif job.result is None: return jsonify(status="error", message="Job is not yet ready."), 202 else: errors = [] for i, e in enumerate(job.result): app.logger.debug(e) errors.append({ "check": e[0], "message": e[1], "line": e[2], "column": e[3], "start": e[4], "end": e[5], "extent": e[5] - e[4], "severity": e[7], "replacements": e[8], "source_name": "", "source_url": "", }) return jsonify(status="success", data={"errors": errors})
def delete(self, session): """ Delete seen entries """ args = seen_delete_parser.parse_args() value = args["value"] is_seen_local = args["is_seen_local"] if value: value = unquote(value) value = "%" + value + "%" seen_entries_list = seen.search(value=value, status=is_seen_local, session=session) if not seen_entries_list.all(): return {"status": "error", "message": "no results to delete"}, 404 for entry in seen_entries_list: try: seen.forget_by_id(entry.id) except ValueError: return {"status": "error", "message": "Could not delete entry ID {0}".format(entry.id)}, 500 return {}
def lint(): """Run linter on the provided text and return the results.""" if 'text' in request.values: text = unquote(request.values['text']) job = q.enqueue(worker_function, text) return jsonify(job_id=job.id), 202 elif 'job_id' in request.values: job = q.fetch_job(request.values['job_id']) if not job: return jsonify( status="error", message="No job with requested job_id."), 404 elif job.result is None: return jsonify( status="error", message="Job is not yet ready."), 202 else: errors = [] for i, e in enumerate(job.result): app.logger.debug(e) errors.append({ "check": e[0], "message": e[1], "line": e[2], "column": e[3], "start": e[4], "end": e[5], "extent": e[5] - e[4], "severity": e[7], "replacements": e[8], "source_name": "", "source_url": "", }) return jsonify( status="success", data={"errors": errors})
def delete(self, session): """ Delete seen entries """ args = seen_delete_parser.parse_args() value = args['value'] is_seen_local = args['is_seen_local'] if value: value = unquote(value) value = '%' + value + '%' seen_entries_list = seen.search(value=value, status=is_seen_local, session=session) if not seen_entries_list.all(): return {'status': 'error', 'message': 'no results to delete'}, 404 for entry in seen_entries_list: try: seen.forget_by_id(entry.id) except ValueError as e: return {'status': 'error', 'message': 'Could not delete entry ID {0}'.format(entry.id)}, 500 return {}
def get_loginform(self, redirect_uri=''): from plexpy.webserve import serve_template return serve_template(templatename="login.html", title="Login", redirect_uri=unquote(redirect_uri))
def unprocess_payload(data): return process_data( data, lambda value: unquote(value.encode('utf-8') if value else ''))
def download_entry(self, task, entry, url, tmp_path): """Downloads `entry` by using `url`. :raises: Several types of exceptions ... :raises: PluginWarning """ log.debug('Downloading url \'%s\'', url) # get content auth = None if 'download_auth' in entry: auth = entry['download_auth'] log.debug('Custom auth enabled for %s download: %s', entry['title'], entry['download_auth']) try: response = task.requests.get(url, auth=auth, raise_status=False) except UnicodeError: log.error('Unicode error while encoding url %s', url) return if response.status_code != 200: log.debug('Got %s response from server. Saving error page.', response.status_code) # Save the error page if response.content: self.save_error_page(entry, task, response.content) # Raise the error response.raise_for_status() return # expand ~ in temp path # TODO jinja? try: tmp_path = os.path.expanduser(tmp_path) except RenderError as e: entry.fail('Could not set temp path. Error during string replacement: %s' % e) return # Clean illegal characters from temp path name tmp_path = pathscrub(tmp_path) # create if missing if not os.path.isdir(tmp_path): log.debug('creating tmp_path %s' % tmp_path) os.mkdir(tmp_path) # check for write-access if not os.access(tmp_path, os.W_OK): raise plugin.PluginError('Not allowed to write to temp directory `%s`' % tmp_path) # download and write data into a temp file tmp_dir = tempfile.mkdtemp(dir=tmp_path) fname = hashlib.md5(url.encode('utf-8', 'replace')).hexdigest() datafile = os.path.join(tmp_dir, fname) outfile = io.open(datafile, 'wb') try: for chunk in response.iter_content(chunk_size=150 * 1024, decode_unicode=False): outfile.write(chunk) except Exception as e: # don't leave futile files behind # outfile has to be closed before we can delete it on Windows outfile.close() log.debug('Download interrupted, removing datafile') os.remove(datafile) if isinstance(e, socket.timeout): log.error('Timeout while downloading file') else: raise else: outfile.close() # Do a sanity check on downloaded file if os.path.getsize(datafile) == 0: entry.fail('File %s is 0 bytes in size' % datafile) os.remove(datafile) return # store temp filename into entry so other plugins may read and modify content # temp file is moved into final destination at self.output entry['file'] = datafile log.debug('%s field file set to: %s', entry['title'], entry['file']) if 'content-type' in response.headers: entry['mime-type'] = str(parse_header(response.headers['content-type'])[0]) else: entry['mime-type'] = "unknown/unknown" content_encoding = response.headers.get('content-encoding', '') decompress = 'gzip' in content_encoding or 'deflate' in content_encoding if 'content-length' in response.headers and not decompress: entry['content-length'] = int(response.headers['content-length']) # prefer content-disposition naming, note: content-disposition can be disabled completely # by setting entry field `content-disposition` to False if entry.get('content-disposition', True): self.filename_from_headers(entry, response) else: log.info('Content-disposition disabled for %s', entry['title']) self.filename_ext_from_mime(entry) if not entry.get('filename'): filename = unquote(url.rsplit('/', 1)[1]) log.debug('No filename - setting from url: %s', filename) entry['filename'] = filename log.debug('Finishing download_entry() with filename %s', entry.get('filename'))
def scrape_image_urls(keywords, number=None, face_only=False, safe_mode=False, proxy=None, proxy_type="http"): print("\nScraping From Google Image Search ...\n") print("Keywords:\t" + keywords) base_url = "https://www.google.com/search?tbm=isch" keywords_str = "&q=" + "+".join(keywords.split()) query_url = base_url + keywords_str if number is None: print("Number:\t\tNo limit") else: print("Number:\t\t" + str(number)) if face_only is True: query_url += "&tbs=itp:face" print("Face Only:\tYes") else: print("Face Only:\tNo") if safe_mode is True: query_url += "&safe=on" print("Safe Mode:\tOn") else: query_url += "&safe=off" print("Safe Mode:\tOff") print("Query URL:\t" + query_url) phantomjs_args = list() if proxy is not None: phantomjs_args = [ "--proxy=" + proxy, "--proxy-type=" + proxy_type, ] driver = webdriver.PhantomJS(executable_path="/opt/phantomjs-2.1.1/bin/phantomjs", service_args=phantomjs_args, desired_capabilities=dcap) driver.set_window_size(10000, 7500) driver.get(query_url) last_image_count = 0 retry_times = 0 time.sleep(3) while True: img_count = driver.find_elements_by_class_name("rg_l").__len__() if img_count > last_image_count: if retry_times > 5: break else: retry_times += 1 else: last_image_count = img_count retry_times = 0 time.sleep(0.5) image_elements = driver.find_elements_by_class_name("rg_l") image_urls = list() url_pattern = "imgurl=\S*&imgrefurl" for image_element in image_elements: outer_html = image_element.get_attribute("outerHTML") re_group = re.search(url_pattern, outer_html) if re_group is not None: image_url = unquote(re_group.group()[7:-14]) image_urls.append(image_url) if number is not None and number > image_urls.__len__(): number = image_urls.__len__() print("\nTotal {0} images scraped, {1} will be used.\n".format(image_urls.__len__(), number)) return image_urls[0:number]
def resolve_guid(guid, suffix=None): """Load GUID by primary key, look up the corresponding view function in the routing table, and return the return value of the view function without changing the URL. :param str guid: GUID primary key :param str suffix: Remainder of URL after the GUID :return: Return value of proxied view function """ try: # Look up guid_object = Guid.load(guid) except KeyError as e: if e.message == 'osfstorageguidfile': # Used when an old detached OsfStorageGuidFile object is accessed raise HTTPError(http_status.HTTP_404_NOT_FOUND) else: raise e if guid_object: # verify that the object implements a GuidStoredObject-like interface. If a model # was once GuidStoredObject-like but that relationship has changed, it's # possible to have referents that are instances of classes that don't # have a deep_url attribute or otherwise don't behave as # expected. if not hasattr(guid_object.referent, 'deep_url'): sentry.log_message('Guid resolved to an object with no deep_url', dict(guid=guid)) raise HTTPError(http_status.HTTP_404_NOT_FOUND) referent = guid_object.referent if referent is None: logger.error('Referent of GUID {0} not found'.format(guid)) raise HTTPError(http_status.HTTP_404_NOT_FOUND) if not referent.deep_url: raise HTTPError(http_status.HTTP_404_NOT_FOUND) # Handle file `/download` shortcut with supported types. if suffix and suffix.rstrip('/').lower() == 'download': file_referent = None if isinstance(referent, Preprint) and referent.primary_file: file_referent = referent.primary_file elif isinstance(referent, BaseFileNode) and referent.is_file: file_referent = referent if file_referent: if isinstance( file_referent.target, Preprint) and not file_referent.target.is_published: # TODO: Ideally, permissions wouldn't be checked here. # This is necessary to prevent a logical inconsistency with # the routing scheme - if a preprint is not published, only # admins and moderators should be able to know it exists. auth = Auth.from_kwargs(request.args.to_dict(), {}) # Check if user isn't a nonetype or that the user has admin/moderator/superuser permissions if auth.user is None or not ( auth.user.has_perm('view_submissions', file_referent.target.provider) or file_referent.target.has_permission( auth.user, permissions.ADMIN)): raise HTTPError(http_status.HTTP_404_NOT_FOUND) # Extend `request.args` adding `action=download`. request.args = request.args.copy() request.args.update({'action': 'download'}) # Do not include the `download` suffix in the url rebuild. url = _build_guid_url(unquote(file_referent.deep_url)) return proxy_url(url) # Handle Ember Applications if isinstance(referent, Preprint): if referent.provider.domain_redirect_enabled: # This route should always be intercepted by nginx for the branded domain, # w/ the exception of `<guid>/download` handled above. return redirect(referent.absolute_url, http_status.HTTP_301_MOVED_PERMANENTLY) if PROXY_EMBER_APPS: resp = requests.get(EXTERNAL_EMBER_APPS['preprints']['server'], stream=True, timeout=EXTERNAL_EMBER_SERVER_TIMEOUT) return Response(stream_with_context(resp.iter_content()), resp.status_code) return send_from_directory(preprints_dir, 'index.html') if isinstance(referent, BaseFileNode) and referent.is_file and (getattr( referent.target, 'is_quickfiles', False)): if referent.is_deleted: raise HTTPError(http_status.HTTP_410_GONE) if PROXY_EMBER_APPS: resp = requests.get( EXTERNAL_EMBER_APPS['ember_osf_web']['server'], stream=True, timeout=EXTERNAL_EMBER_SERVER_TIMEOUT) return Response(stream_with_context(resp.iter_content()), resp.status_code) return send_from_directory(ember_osf_web_dir, 'index.html') if isinstance( referent, Registration) and (not suffix or suffix.rstrip('/').lower() in ('comments', 'links', 'components')): if flag_is_active(request, features.EMBER_REGISTRIES_DETAIL_PAGE): # Route only the base detail view to ember if PROXY_EMBER_APPS: resp = requests.get( EXTERNAL_EMBER_APPS['ember_osf_web']['server'], stream=True, timeout=EXTERNAL_EMBER_SERVER_TIMEOUT) return Response(stream_with_context(resp.iter_content()), resp.status_code) return send_from_directory(ember_osf_web_dir, 'index.html') url = _build_guid_url(unquote(referent.deep_url), suffix) return proxy_url(url) # GUID not found; try lower-cased and redirect if exists guid_object_lower = Guid.load(guid.lower()) if guid_object_lower: return redirect(_build_guid_url(guid.lower(), suffix)) # GUID not found raise HTTPError(http_status.HTTP_404_NOT_FOUND)
def _initParameters(self): self._owner = self.request['userName'] # SPL-107168 Need to populate namespace & owner beforehand which are needed # to generate context specific URI while initializing conf defaults # get namespace/owner self._namespace = self.args.get(self.ARG_INPUT_NAMESPACE) if self.ARG_INPUT_OWNER in self.args: self._owner = self.args.get(self.ARG_INPUT_OWNER) self._initArgs() self._initWebDefaults() self._initLimitsDefaults() self._initAlertActionsDefaults() # initialize view type # the order matters, check dashboard xml first if self.ARG_INPUT_DASHBOARD_XML in self.args: self._dashboardXml = unquote( self.args.get(self.ARG_INPUT_DASHBOARD_XML)) self._viewType = self.VIEW_TYPE_DASHBOARD self._dashboardName = self.args.get(self.ARG_INPUT_DASHBOARD) logger.debug("pdfgen/render xml=%s" % self._dashboardXml) elif self.ARG_INPUT_DASHBOARD in self.args: self._dashboardName = self.args.get(self.ARG_INPUT_DASHBOARD) self._viewType = self.VIEW_TYPE_DASHBOARD elif self.ARG_INPUT_REPORT in self.args: self._reportName = self.args.get(self.ARG_INPUT_REPORT) self._viewType = self.VIEW_TYPE_REPORT elif self.ARG_INPUT_SEARCH in self.args: self._searchStr = self.args.get(self.ARG_INPUT_SEARCH, "No search query specified") self._et = self.args.get(self.ARG_INPUT_ET, 0) self._lt = self.args.get(self.ARG_INPUT_LT, '') # if et or lt is 0.000 change it to 0 if float(self._et) == 0.0: logger.debug("_et was %s, updating it to '0'" % self._et) self._et = '0' if self._lt and float(self._lt) == 0.0: logger.debug("_lt was %s, updating it to '0'" % self._lt) self._lt = '0' self._reportName = 'Splunk search results' self._viewType = self.VIEW_TYPE_SEARCH # initialize papersize if self.ARG_INPUT_PAPERSIZE in self.args: paperSizeArg = self.args.get(self.ARG_INPUT_PAPERSIZE).lower() if paperSizeArg in pdfrenderer.PAPERSIZES: self._paperSize = paperSizeArg else: logger.warn('Invalid paper size "%s"' % paperSizeArg) raise ArgError( "Paper size is not valid. Please check the pdfgen.log file for more information." ) logger.debug("pdf-init paper-size=%s" % self._paperSize) # initialize include-splunk-logo self._includeSplunkLogo = normalizeBoolean( self.args.get(self.ARG_INPUT_INCLUDE_SPLUNK_LOGO, self._includeSplunkLogo)) logger.debug("pdf-init include-splunk-logo=%s" % self._includeSplunkLogo) # initialize max-row-per-table if self.ARG_INPUT_MAX_ROWS_PER_TABLE in self.args: maxRowsPerTableArg = self.args.get( self.ARG_INPUT_MAX_ROWS_PER_TABLE) try: self._maxRowsPerTable = int(maxRowsPerTableArg) except: logger.warn( 'Max-rows-per-table="%s" is invalid, must be an integer' % maxRowsPerTableArg) raise ArgError( "max-rows-per-table is invalid, must be an integer. Please check the pdfgen.log file for more information." ) logger.debug("pdf-init max-rows-per-table=%s" % (str(self._maxRowsPerTable))) # initialize timeout if self.ARG_INPUT_TIMEOUT in self.args: self._timeoutDuration = int(self.args.get(self.ARG_INPUT_TIMEOUT)) logger.debug("pdf-init timeoutDuration=%s" % self._timeoutDuration) self._startTimeoutClock() if self.ARG_INPUT_REPORT_FILE_NAME in self.args: self._fileNamePattern = self.args.get( self.ARG_INPUT_REPORT_FILE_NAME) logger.debug("pdf-init report-file-name=%s" % self._fileNamePattern) # initialize time of report self._initTimeOfReport() # check for SIDs if self._viewType is self.VIEW_TYPE_REPORT: if self.ARG_INPUT_SID in self.args: self._inputSids[0] = self.args.get(self.ARG_INPUT_SID) else: for argK, argV in self.args.items(): if self.ARG_INPUT_SID in argK: # we want the panel sequence number which is retrieved from "sid_<seqNum>" match = self.sidRE.match(argK) if match != None and len(match.groups(0)) > 0: seqNum = match.groups(0)[0] if len(seqNum) > 0: self._inputSids[int(seqNum)] = argV logger.debug("sid seqNum=%s value=%s" % (seqNum, argV)) # allow override from http arguments for validArgs in pdfrenderer.ALL_PDF_SETTINGS: v = self.args.get(validArgs) or self._pdfSettings.get(validArgs) if v is not None: v = v.strip() # SPL-98329 convert value into lowercase except logo path if validArgs != pdfrenderer.SETTING_LOGO_PATH: v = v.lower() if validArgs in pdfrenderer.PDF_BOOLEAN_SETTINGS: self._pdfSettings[validArgs] = normalizeBoolean(v) else: self._pdfSettings[validArgs] = v logger.debug("pdfSettings=%s" % (str(self._pdfSettings))) self._validateParameters() self._timestampStr = splunk.search.searchUtils.getFormattedTimeForUser( self.sessionKey, now=self._now, timeFormat='%F %T %Z') self._locale = self.args.get(self.ARG_INPUT_LOCALE) logger.info("pdf-init locale=%s" % self._locale) self._server_zoneinfo = self.args.get(self.ARG_INPUT_TIMEZONE) if self._server_zoneinfo is None: self._server_zoneinfo = toDefaultStrings( rest.simpleRequest('/services/search/timeparser/tz', sessionKey=self.sessionKey)[1]) logger.info("pdf-init server_zoneinfo=%s" % self._server_zoneinfo)
def validate_uri(digest_uri, request_path): digest_url_components = urlparse(digest_uri) return unquote(digest_url_components[2]) == request_path
def get_synced_items(self, machine_id=None, client_id_filter=None, user_id_filter=None, rating_key_filter=None, sync_id_filter=None): if not machine_id: machine_id = plexpy.CONFIG.PMS_IDENTIFIER if isinstance(rating_key_filter, list): rating_key_filter = [str(k) for k in rating_key_filter] elif rating_key_filter: rating_key_filter = [str(rating_key_filter)] if isinstance(user_id_filter, list): user_id_filter = [str(k) for k in user_id_filter] elif user_id_filter: user_id_filter = [str(user_id_filter)] sync_list = self.get_plextv_sync_lists(machine_id, output_format='xml') user_data = users.Users() synced_items = [] try: xml_head = sync_list.getElementsByTagName('SyncList') except Exception as e: logger.warn("Tautulli PlexTV :: Unable to parse XML for get_synced_items: %s." % e) return {} for a in xml_head: client_id = helpers.get_xml_attr(a, 'clientIdentifier') # Filter by client_id if client_id_filter and str(client_id_filter) != client_id: continue sync_list_id = helpers.get_xml_attr(a, 'id') sync_device = a.getElementsByTagName('Device') for device in sync_device: device_user_id = helpers.get_xml_attr(device, 'userID') try: device_username = user_data.get_details(user_id=device_user_id)['username'] device_friendly_name = user_data.get_details(user_id=device_user_id)['friendly_name'] except: device_username = '' device_friendly_name = '' device_name = helpers.get_xml_attr(device, 'name') device_product = helpers.get_xml_attr(device, 'product') device_product_version = helpers.get_xml_attr(device, 'productVersion') device_platform = helpers.get_xml_attr(device, 'platform') device_platform_version = helpers.get_xml_attr(device, 'platformVersion') device_type = helpers.get_xml_attr(device, 'device') device_model = helpers.get_xml_attr(device, 'model') device_last_seen = helpers.get_xml_attr(device, 'lastSeenAt') # Filter by user_id if user_id_filter and device_user_id not in user_id_filter: continue for synced in a.getElementsByTagName('SyncItems'): sync_item = synced.getElementsByTagName('SyncItem') for item in sync_item: sync_media_type = None rating_key = None for location in item.getElementsByTagName('Location'): location_uri = unquote(helpers.get_xml_attr(location, 'uri')) if location_uri.startswith('library://'): if 'collection' in location_uri: sync_media_type = 'collection' clean_uri = location_uri.split('/') rating_key = next((j for i, j in zip(clean_uri[:-1], clean_uri[1:]) if i in ('metadata', 'collections')), None) elif location_uri.startswith('playlist://'): sync_media_type = 'playlist' tokens = users.Users().get_tokens(user_id=device_user_id) if tokens['server_token']: plex = Plex(token=tokens['server_token']) for playlist in plex.PlexServer.playlists(): if location_uri.endswith(playlist.guid): rating_key = str(playlist.ratingKey) # String for backwards consistency # Filter by rating_key if rating_key_filter and rating_key not in rating_key_filter: continue sync_id = helpers.get_xml_attr(item, 'id') # Filter by sync_id if sync_id_filter and str(sync_id_filter) != sync_id: continue sync_version = helpers.get_xml_attr(item, 'version') sync_root_title = helpers.get_xml_attr(item, 'rootTitle') sync_title = helpers.get_xml_attr(item, 'title') sync_metadata_type = helpers.get_xml_attr(item, 'metadataType') sync_content_type = helpers.get_xml_attr(item, 'contentType') for status in item.getElementsByTagName('Status'): status_failure_code = helpers.get_xml_attr(status, 'failureCode') status_failure = helpers.get_xml_attr(status, 'failure') status_state = helpers.get_xml_attr(status, 'state') status_item_count = helpers.get_xml_attr(status, 'itemsCount') status_item_complete_count = helpers.get_xml_attr(status, 'itemsCompleteCount') status_item_downloaded_count = helpers.get_xml_attr(status, 'itemsDownloadedCount') status_item_ready_count = helpers.get_xml_attr(status, 'itemsReadyCount') status_item_successful_count = helpers.get_xml_attr(status, 'itemsSuccessfulCount') status_total_size = helpers.get_xml_attr(status, 'totalSize') status_item_download_percent_complete = helpers.get_percent( status_item_downloaded_count, status_item_count) for settings in item.getElementsByTagName('MediaSettings'): settings_video_bitrate = helpers.get_xml_attr(settings, 'maxVideoBitrate') settings_video_quality = helpers.get_xml_attr(settings, 'videoQuality') settings_video_resolution = helpers.get_xml_attr(settings, 'videoResolution') settings_audio_boost = helpers.get_xml_attr(settings, 'audioBoost') settings_audio_bitrate = helpers.get_xml_attr(settings, 'musicBitrate') settings_photo_quality = helpers.get_xml_attr(settings, 'photoQuality') settings_photo_resolution = helpers.get_xml_attr(settings, 'photoResolution') sync_details = {"device_name": device_name, "platform": device_platform, "user_id": device_user_id, "user": device_friendly_name, "username": device_username, "root_title": sync_root_title, "sync_title": sync_title, "metadata_type": sync_metadata_type, "content_type": sync_content_type, "rating_key": rating_key, "state": status_state, "item_count": status_item_count, "item_complete_count": status_item_complete_count, "item_downloaded_count": status_item_downloaded_count, "item_downloaded_percent_complete": status_item_download_percent_complete, "video_bitrate": settings_video_bitrate, "audio_bitrate": settings_audio_bitrate, "photo_quality": settings_photo_quality, "video_quality": settings_video_quality, "total_size": status_total_size, "failure": status_failure, "client_id": client_id, "sync_id": sync_id, "sync_media_type": sync_media_type } synced_items.append(sync_details) return session.filter_session_info(synced_items, filter_key='user_id')
def get(self, session): """ Search for seen entries """ args = seen_search_parser.parse_args() # Filter params value = args['value'] local = args['local'] # Pagination and sorting params page = args['page'] per_page = args['per_page'] sort_by = args['sort_by'] sort_order = args['order'] # Handle max size limit if per_page > 100: per_page = 100 descending = sort_order == 'desc' # Unquotes and prepares value for DB lookup if value: value = unquote(value) value = '%{0}%'.format(value) start = per_page * (page - 1) stop = start + per_page kwargs = { 'value': value, 'status': local, 'stop': stop, 'start': start, 'order_by': sort_by, 'descending': descending, 'session': session, } total_items = db.search(count=True, **kwargs) if not total_items: return jsonify([]) raw_seen_entries_list = db.search(**kwargs).all() converted_seen_entry_list = [ entry.to_dict() for entry in raw_seen_entries_list ] # Total number of pages total_pages = int(ceil(total_items / float(per_page))) # Actual results in page actual_size = min(len(converted_seen_entry_list), per_page) # Invalid page request if page > total_pages and total_pages != 0: raise NotFoundError('page %s does not exist' % page) # Get pagination headers pagination = pagination_headers(total_pages, total_items, actual_size, request) # Create response rsp = jsonify(converted_seen_entry_list) # Add link header to response rsp.headers.extend(pagination) return rsp
def get(self, session): """ Search for seen entries """ args = seen_search_parser.parse_args() # Filter params value = args['value'] local = args['local'] # Pagination and sorting params page = args['page'] per_page = args['per_page'] sort_by = args['sort_by'] sort_order = args['order'] # Handle max size limit if per_page > 100: per_page = 100 descending = sort_order == 'desc' # Unquotes and prepares value for DB lookup if value: value = unquote(value) value = '%{0}%'.format(value) start = per_page * (page - 1) stop = start + per_page kwargs = { 'value': value, 'status': local, 'stop': stop, 'start': start, 'order_by': sort_by, 'descending': descending, 'session': session, } total_items = db.search(count=True, **kwargs) if not total_items: return jsonify([]) raw_seen_entries_list = db.search(**kwargs).all() converted_seen_entry_list = [entry.to_dict() for entry in raw_seen_entries_list] # Total number of pages total_pages = int(ceil(total_items / float(per_page))) # Actual results in page actual_size = min(len(converted_seen_entry_list), per_page) # Invalid page request if page > total_pages and total_pages != 0: raise NotFoundError('page %s does not exist' % page) # Get pagination headers pagination = pagination_headers(total_pages, total_items, actual_size, request) # Create response rsp = jsonify(converted_seen_entry_list) # Add link header to response rsp.headers.extend(pagination) return rsp
def handle_POST(self): """ Install a remote application in response to an HTTP POST. """ self.verifyAllowRemote() parts = len(self.pathParts) if parts == self.BASE_DEPTH + 2: default_version = True elif parts == self.BASE_DEPTH + 3: default_version = False else: raise splunk.BadRequest if HTTP_AUTH_TOKEN not in self.args: raise splunk.BadRequest("Missing argument: %s" % HTTP_AUTH_TOKEN) if HTTP_ACTION not in self.args: raise splunk.BadRequest("Missing argument: %s" % HTTP_ACTION) if self.args[HTTP_ACTION] not in (HTTP_ACTION_INSTALL, HTTP_ACTION_DOWNLOAD): raise splunk.BadRequest("Invalid value '%s' for argument '%s'" % (self.args[HTTP_ACTION], HTTP_ACTION)) # check if this is a cloud stack if isCloud(self.sessionKey): app_name = self.pathParts[self.BASE_DEPTH + 1] # Get all cloud apps and see if the app being installed is vetted for cloud # i.e install_method == simple # TODO: Change to just querying for the app in question when BASE-4074 # is finished. getargs = {'offset': 0, 'limit': 100} vetted_apps = [] while 1: serverResponse, serverContent = splunk.rest.simpleRequest( VETTED_APPS_URI, self.sessionKey, getargs) if serverResponse.status != 200: raise splunk.BadRequest( 'Error while querying Splunkbase. Splunkd returned %s' % serverContent) vetted_app_data = json.loads(serverContent) if not vetted_app_data['results']: break else: getargs['offset'] += 100 vetted_apps.extend(vetted_app_data['results']) for app in vetted_apps: if app['appid'] == app_name and app[ 'install_method'] == VETTED_APP_INSTALL_METHOD: break else: raise splunk.BadRequest( 'App %s is not vetted for Splunk Cloud.' % app_name) url = self._native_to_foreign_url() root = self._get_feed_root(url) if default_version: root = self._get_latest_version(root) href = self._parse_link(root) try: # Package up a Request with auth information. req = Request(href) # XXX: Converting the auth token from a POST arg to a header # requires us to unquote() it. If the client did not correctly # quote() the token, login will fail. req.add_header(HTTP_AUTH_HEADER, unquote(self.args[HTTP_AUTH_TOKEN])) # Install using this Request object. installer = bundle_paths.BundleInstaller() if self.args[HTTP_ACTION] == HTTP_ACTION_INSTALL: b, status = installer.install_from_url(req, sslpol=self._sslpol) self.response.setStatus(status) if ((status == bundle_paths.BundleInstaller.STATUS_INSTALLED) or (status == bundle_paths.BundleInstaller.STATUS_UPGRADED)): # Migrate old-style bundles. logger.debug("Configuring application contents") try: b.migrate() except Exception as e: logger.exception(e) self.addMessage("WARN", "Error during configuration: %s" % e) # Redirect to local application. self.response.setHeader("Location", self._redirect_to_local(b)) # Let splunkd know about newly-installed app. logger.debug( "Notifying splunkd that app has been installed") splunk.rest.simpleRequest('apps/local/_reload', sessionKey=self.sessionKey) if status == bundle_paths.BundleInstaller.STATUS_INSTALLED: self.addMessage("INFO", "Installed application: %s" % b.name()) elif status == bundle_paths.BundleInstaller.STATUS_UPGRADED: self.addMessage("INFO", "Upgraded application: %s" % b.name()) else: self.addMessage( "WARN", "Could not install application: %s" % b.name()) else: assert self.args[HTTP_ACTION] == HTTP_ACTION_DOWNLOAD downloaded = installer.download_from_url(req, sslpol=self._sslpol) self.addMessage("INFO", "Downloaded application file: %s" % downloaded) self.response.setHeader('content-type', 'application/json') response_json = {"downloaded": downloaded} self.response.write(json.dumps(response_json)) except splunk.ResourceNotFound: raise except splunk.AuthorizationFailed: raise except splunk.InternalServerError: raise except Exception as e: logger.exception(e) raise splunk.InternalServerError(e)
def download_entry(self, task, entry, url, tmp_path): """Downloads `entry` by using `url`. :raises: Several types of exceptions ... :raises: PluginWarning """ log.debug('Downloading url \'%s\'', url) # get content auth = None if 'download_auth' in entry: auth = entry['download_auth'] log.debug('Custom auth enabled for %s download: %s', entry['title'], entry['download_auth']) try: response = task.requests.get(url, auth=auth, raise_status=False) except UnicodeError: log.error('Unicode error while encoding url %s', url) return if response.status_code != 200: log.debug('Got %s response from server. Saving error page.', response.status_code) # Save the error page if response.content: self.save_error_page(entry, task, response.content) # Raise the error response.raise_for_status() return # expand ~ in temp path # TODO jinja? try: tmp_path = os.path.expanduser(tmp_path) except RenderError as e: entry.fail( 'Could not set temp path. Error during string replacement: %s' % e) return # Clean illegal characters from temp path name tmp_path = pathscrub(tmp_path) # create if missing if not os.path.isdir(tmp_path): log.debug('creating tmp_path %s' % tmp_path) os.mkdir(tmp_path) # check for write-access if not os.access(tmp_path, os.W_OK): raise plugin.PluginError( 'Not allowed to write to temp directory `%s`' % tmp_path) # download and write data into a temp file tmp_dir = tempfile.mkdtemp(dir=tmp_path) fname = hashlib.md5(url.encode('utf-8', 'replace')).hexdigest() datafile = os.path.join(tmp_dir, fname) outfile = io.open(datafile, 'wb') try: for chunk in response.iter_content(chunk_size=150 * 1024, decode_unicode=False): outfile.write(chunk) except Exception as e: # don't leave futile files behind # outfile has to be closed before we can delete it on Windows outfile.close() log.debug('Download interrupted, removing datafile') os.remove(datafile) if isinstance(e, socket.timeout): log.error('Timeout while downloading file') else: raise else: outfile.close() # Do a sanity check on downloaded file if os.path.getsize(datafile) == 0: entry.fail('File %s is 0 bytes in size' % datafile) os.remove(datafile) return # store temp filename into entry so other plugins may read and modify content # temp file is moved into final destination at self.output entry['file'] = datafile log.debug('%s field file set to: %s', entry['title'], entry['file']) if 'content-type' in response.headers: entry['mime-type'] = str( parse_header(response.headers['content-type'])[0]) else: entry['mime-type'] = "unknown/unknown" content_encoding = response.headers.get('content-encoding', '') decompress = 'gzip' in content_encoding or 'deflate' in content_encoding if 'content-length' in response.headers and not decompress: entry['content-length'] = int(response.headers['content-length']) # prefer content-disposition naming, note: content-disposition can be disabled completely # by setting entry field `content-disposition` to False if entry.get('content-disposition', True): self.filename_from_headers(entry, response) else: log.info('Content-disposition disabled for %s', entry['title']) self.filename_ext_from_mime(entry) if not entry.get('filename'): filename = unquote(url.rsplit('/', 1)[1]) log.debug('No filename - setting from url: %s', filename) entry['filename'] = filename log.debug('Finishing download_entry() with filename %s', entry.get('filename'))
def __init__(self, host='localhost', port=5672, ssl=None, connect_timeout=None, userid='guest', password='******', login_method='AMQPLAIN', virtual_host='/', locale='en_US', channel_max=65535, frame_max=131072, heartbeat=0, client_properties=None, on_blocked=None, on_unblocked=None): """Create a connection to the specified host If you are using SSL, make sure the correct port number is specified (usually 5671), as the default of 5672 is for non-SSL connections. You can define an AMQP connection string as the host, this will be used to set the `host`, `port`, `userid`, `password` and `virtual_host`. The connection string follows this format: `amqp://[userid:password@]host[:port][/virtual_host]` :param str host: host or amqp connection string :param int port: port :param ssl: dict of SSL options passed to :func:`ssl.wrap_socket()`, None to disable SSL :param float connect_timeout: connect timeout :param str userid: username :param str password: password :param str login_method: login method (this is server-specific); default is for RabbitMQ :param str virtual_host: virtual host :param str locale: locale :param int channel_max: maximum number of channels :param int frame_max: maximum frame payload size in bytes :param float heartbeat: heartbeat interval in seconds, 0 disables heartbeat :param client_properties: dict of client properties :param on_blocked: callback on connection blocked :param on_unblocked: callback on connection unblocked :type connect_timeout: float or None :type client_properties: dict or None :type ssl: dict or None :type on_blocked: Callable or None :type on_unblocked: Callable or None """ log.debug('amqpy {} Connection.__init__()'.format(__version__)) self.conn_lock = Lock() #: Map of `{channel_id: Channel}` for all active channels #: #: :type: dict[int, Channel] self.channels = {} # dict of {channel_id int: Channel} # the connection object itself is treated as channel 0 super(Connection, self).__init__(self, 0) # also sets channels[0] = self # instance variables #: :type: amqpy.transport.Transport self.transport = None self.method_reader = None self.method_writer = None self._wait_tune_ok = None # properties set in the start method, after a connection is established self.version_major = 0 self.version_minor = 0 self.server_properties = {} self.mechanisms = [] self.locales = [] # properties set in the Tune method self.channel_max = channel_max self.frame_max = frame_max if six.PY2: self._avail_channel_ids = array(b'H', range(self.channel_max, 0, -1)) else: self._avail_channel_ids = array('H', range(self.channel_max, 0, -1)) self._heartbeat_final = 0 # final heartbeat interval after negotiation self._heartbeat_server = None # detect amqp connection string if host.startswith('amqp://'): parts = urlparse("http://" + host[7:]) host = unquote(parts.hostname or '') or None port = parts.port or 5672 userid = unquote(parts.username or '') or 'guest' password = unquote(parts.password or '') or 'guest' virtual_host = unquote(parts.path[1:] or '/') # save connection parameters self._host = host self._port = port self._connect_timeout = connect_timeout self._ssl = ssl self._userid = userid self._password = password self._login_method = login_method self._virtual_host = virtual_host self._locale = locale self._heartbeat_client = heartbeat # original heartbeat interval value proposed by client self._client_properties = client_properties # callbacks self.on_blocked = on_blocked self.on_unblocked = on_unblocked # heartbeat self._close_event = Event() self._heartbeat_thread = None self.connect()