def test_explicit(self): """should support explicit """ result = uploader.explicit("cloudinary", type="twitter_name", eager=[TEST_TRANS_SCALE2_PNG], tags=[UNIQUE_TAG]) params = dict(TEST_TRANS_SCALE2_PNG, type="twitter_name", version=result["version"]) url = utils.cloudinary_url("cloudinary", **params)[0] actual = result["eager"][0]["url"] self.assertEqual(parse_url(actual).path, parse_url(url).path)
def test_explicit(self): """should support explicit """ result = uploader.explicit("cloudinary", type="twitter_name", eager=[dict(crop="scale", width="2.0")], tags=[UNIQUE_TAG]) url = utils.cloudinary_url("cloudinary", type="twitter_name", crop="scale", width="2.0", format="png", version=result["version"])[0] actual = result["eager"][0]["url"] self.assertEqual(parse_url(actual).path, parse_url(url).path)
def get_connection(self, url, proxies=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. :rtype: urllib3.ConnectionPool """ proxy = select_proxy(url, proxies) if proxy: proxy = prepend_scheme_if_needed(proxy, "http") proxy_url = parse_url(proxy) if not proxy_url.host: raise InvalidProxyURL( "Please check proxy URL. It is malformed" " and could be missing the host." ) proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) url = parsed.geturl() conn = self.poolmanager.connection_from_url(url) return conn
def _parse_connection_properties(self, host, port, username, password, use_ssl): hosts_list = [] if isinstance(host, str): # Force to a list, split on ',' if multiple host = host.split(',') for entity in host: # Loop over the hosts and parse connection properties host_properties = {} parsed_uri = parse_url(entity) host_properties['host'] = parsed_uri.host if parsed_uri.port is not None: host_properties['port'] = parsed_uri.port else: host_properties['port'] = port if parsed_uri.scheme == 'https' or use_ssl is True: host_properties['use_ssl'] = True if parsed_uri.auth is not None: host_properties['http_auth'] = parsed_uri.auth elif username is not None: if password is None or password == 'PROMPT': password = getpass.getpass() host_properties['http_auth'] = (username, password) hosts_list.append(host_properties) return hosts_list
def check_vul(url): """ Test if a GET to a URL is successful :param url: The URL to test :return: A dict with the exploit type as the keys, and the HTTP status code as the value """ if gl_args.mode == 'auto-scan' or gl_args.mode == 'file-scan': timeout = Timeout(connect=1.0, read=3.0) pool = PoolManager(timeout=timeout, retries=1, cert_reqs='CERT_NONE') else: timeout = Timeout(connect=3.0, read=6.0) pool = PoolManager(timeout=timeout, cert_reqs='CERT_NONE') url_check = parse_url(url) if '443' in str(url_check.port) and url_check.scheme != 'https': url = "https://"+str(url_check.host)+":"+str(url_check.port) print(GREEN + "\n ** Checking Host: %s **\n" % url) headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Connection": "keep-alive", "User-Agent": user_agents[randint(0, len(user_agents) - 1)]} paths = {"jmx-console": "/jmx-console/HtmlAdaptor?action=inspectMBean&name=jboss.system:type=ServerInfo", "web-console" : "/web-console/ServerInfo.jsp", "JMXInvokerServlet": "/invoker/JMXInvokerServlet", "admin-console" : "/admin-console/"} for i in paths.keys(): if gl_interrupted: break try: print(GREEN + " * Checking %s: \t" % i + ENDC), r = pool.request('HEAD', url +str(paths[i]), redirect=False, headers=headers) paths[i] = r.status # check if it's false positive if len(r.getheaders()) == 0: print(RED + "[ ERROR ]\n * The server %s is not an HTTP server.\n" % url + ENDC) paths = {"jmx-console": 505, "web-console": 505, "JMXInvokerServlet": 505, "admin-console": 505} break if paths[i] in (301, 302, 303, 307, 308): url_redirect = r.get_redirect_location() print(GREEN + "[ REDIRECT ]\n * The server sent a redirect to: %s\n" % url_redirect) elif paths[i] == 200 or paths[i] == 500: if i == "admin-console": print(RED + "[ EXPOSED ]" + ENDC) else: print(RED + "[ VULNERABLE ]" + ENDC) else: print(GREEN + "[ OK ]") except: print(RED + "\n * An error occurred while connecting to the host %s\n" % url + ENDC) paths[i] = 505 return paths
def urlparsing(value): try: loc = parse_url(value) except LocationParseError as error: return None, None, None return is_secure(loc.scheme), loc.host, loc.port
def read_articles(): ''' read all articles as dataframe from mongodb collection 'articles' - INPUT: None - OUTPUT: df. columns: title, url, uri, body_text, ''' my_mongo = MyMongo() t0 = time.time() cur_articles = my_mongo.get_article_body_text(testing=0) articles_cleaned = {} # print '%d unique articles ' % len(articles_cleaned) clean_articles(cur_articles, articles_cleaned) print '%d unique articles with body_text' % len(articles_cleaned) t1 = time.time() # time it print "finished in %4.4fmin for %s " % ((t1 - t0) / 60, 'read/clean articles') df = pd.DataFrame([{'url': k, 'body_text': v[1]} for k, v in articles_cleaned.items()]) article_dict, article_dt = MyMongo().get_article_attri() #article_dict_all = dict(article_dict) df['title'] = df['url'].map(lambda x: article_dict.get(x, 'Unknown')) df['uri'] = df['url'].map(lambda x: parse_url(x).host) df['dt'] = df['url'].map(lambda x: article_dt.get(x, '')) my_mongo.close() return df
def test_parse_url(self): url_host_map = { 'http://google.com/mail': Url('http', host='google.com', path='/mail'), 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), 'google.com/mail': Url(host='google.com', path='/mail'), 'http://google.com/': Url('http', host='google.com', path='/'), 'http://google.com': Url('http', host='google.com'), 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), # Path/query/fragment '': Url(), '/': Url(path='/'), '?': Url(path='', query=''), '#': Url(path='', fragment=''), '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'), '/foo': Url(path='/foo'), '/foo?bar=baz': Url(path='/foo', query='bar=baz'), '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), # Port 'http://google.com/': Url('http', host='google.com', path='/'), 'http://google.com:80/': Url('http', host='google.com', port=80, path='/'), 'http://google.com:/': Url('http', host='google.com', path='/'), 'http://google.com:80': Url('http', host='google.com', port=80), 'http://google.com:': Url('http', host='google.com'), # Auth 'http://*****:*****@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'), 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'), 'http://*****:*****@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'), 'http://@': Url('http', host=None, auth='') } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) self.assertEqual(returned_url, expected_url)
def uris(rabbit_config): amqp_uri = rabbit_config["AMQP_URI"] scheme, auth, host, port, path, _, _ = parse_url(amqp_uri) bad_port = Url(scheme, auth, host, port + 1, path).url bad_user = Url(scheme, "invalid:invalid", host, port, path).url bad_vhost = Url(scheme, auth, host, port, "/unknown").url return {"good": amqp_uri, "bad_port": bad_port, "bad_user": bad_user, "bad_vhost": bad_vhost}
def test_parse_url(self): url_host_map = { "http://google.com/mail": Url("http", host="google.com", path="/mail"), "http://google.com/mail/": Url("http", host="google.com", path="/mail/"), "google.com/mail": Url(host="google.com", path="/mail"), "http://google.com/": Url("http", host="google.com", path="/"), "http://google.com": Url("http", host="google.com"), "http://google.com?foo": Url("http", host="google.com", path="", query="foo"), # Path/query/fragment "": Url(), "/": Url(path="/"), "?": Url(path="", query=""), "#": Url(path="", fragment=""), "#?/!google.com/?foo#bar": Url(path="", fragment="?/!google.com/?foo#bar"), "/foo": Url(path="/foo"), "/foo?bar=baz": Url(path="/foo", query="bar=baz"), "/foo?bar=baz#banana?apple/orange": Url(path="/foo", query="bar=baz", fragment="banana?apple/orange"), # Port "http://google.com/": Url("http", host="google.com", path="/"), "http://google.com:80/": Url("http", host="google.com", port=80, path="/"), "http://google.com:/": Url("http", host="google.com", path="/"), "http://google.com:80": Url("http", host="google.com", port=80), "http://google.com:": Url("http", host="google.com"), # Auth "http://*****:*****@localhost/": Url("http", auth="foo:bar", host="localhost", path="/"), "http://foo@localhost/": Url("http", auth="foo", host="localhost", path="/"), "http://*****:*****@baz@localhost/": Url("http", auth="foo:bar@baz", host="localhost", path="/"), } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) self.assertEquals(returned_url, expected_url)
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. try: url = unicode(url) except NameError: # We're on Python 3. url = str(url) except UnicodeDecodeError: pass # Support for unicode domain names and paths. scheme, auth, host, port, path, query, fragment = parse_url(url) if not scheme: raise MissingSchema("Invalid URL %r: No schema supplied" % url) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # Only want to apply IDNA to the hostname try: host = host.encode('idna').decode('utf-8') except UnicodeError: raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if is_py2: if isinstance(scheme, str): scheme = scheme.encode('utf-8') if isinstance(netloc, str): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): fragment = fragment.encode('utf-8') enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def getPPDURL(self, source_url): """ Downloads the source_url, stores it locally and returns the local URL :param source_url: remote PPD URL :return: local URL to the cached PPD """ source = parse_url(source_url) host = source.host if host is None or host == "localhost": # no host: we assume that the PPD can be found on the current active master backend with make_session() as session: # get any other registered backend master_backend = session.query(RegisteredBackend) \ .filter(RegisteredBackend.uuid != self.env.core_uuid, RegisteredBackend.type == BackendTypes.active_master).first() if master_backend is None: self.log.error(C.make_error("NO_MASTER_BACKEND_FOUND")) return source_url # Try to log in with provided credentials url = parse_url(master_backend.url) host = url.host # check if file exists locally rel_path = source.path[1:] if source.path.startswith("/") else source.path local_path = path.join(self.ppd_dir, host, rel_path) if not path.exists(local_path): # cache locally try: r = requests.get(source_url) if r.ok: local_dir = path.dirname(local_path) if not path.exists(local_dir): makedirs(local_dir) with open(local_path, "w") as f: f.write(r.text) else: self.log.error("requesting PPD from %s failed with status code: %s" % (source_url, r.status_code)) return source_url except requests.exceptions.ConnectionError as e: self.log.error("requesting PPD from %s failed with error: %s" % (source_url, str(e))) return source_url return "%s%s/%s" % (self.base_url, host, rel_path)
def test_image_field(self): field = Poll.objects.get(question="with image") self.assertIsNotNone(field) self.assertEqual(field.image.public_id, API_TEST_ID) self.assertEqual( parse_url(field.image.url).path, "/{cloud}/image/upload/v1234/{name}.jpg".format(cloud=cloudinary.config().cloud_name, name=API_TEST_ID) ) self.assertTrue(False or field.image)
def test_bad_user(rabbit_config): scheme, auth, host, port, path, _, _ = parse_url(rabbit_config['AMQP_URI']) amqp_uri = Url(scheme, 'invalid:invalid', host, port, path).url with pytest.raises(IOError) as exc_info: verify_amqp_uri(amqp_uri) message = str(exc_info.value) assert 'Error connecting to broker' in message assert 'invalid credentials' in message
def test_bad_vhost(rabbit_config): scheme, auth, host, port, path, _, _ = parse_url(rabbit_config['AMQP_URI']) amqp_uri = Url(scheme, auth, host, port, '/unknown').url with pytest.raises(IOError) as exc_info: verify_amqp_uri(amqp_uri) message = str(exc_info.value) assert 'Error connecting to broker' in message assert 'invalid or unauthorized vhost' in message
def test_explicit(self): """should support explicit """ result = uploader.explicit("cloudinary", type="twitter_name", eager=[dict(crop="scale", width="2.0")]) url = utils.cloudinary_url("cloudinary", type="twitter_name", crop="scale", width="2.0", format="png", version=result["version"])[0] if result["eager"][0]["url"].startswith("/res/"): actual = result["eager"][0]["url"][4:] else: actual = result["eager"][0]["url"] self.assertEqual(actual, parse_url(url).path)
def test_netloc(self): url_netloc_map = { 'http://google.com/mail': 'google.com', 'http://google.com:80/mail': 'google.com:80', 'google.com/foobar': 'google.com', 'google.com:12345': 'google.com:12345', } for url, expected_netloc in url_netloc_map.items(): self.assertEqual(parse_url(url).netloc, expected_netloc)
def test_netloc(self): url_netloc_map = { "http://google.com/mail": "google.com", "http://google.com:80/mail": "google.com:80", "google.com/foobar": "google.com", "google.com:12345": "google.com:12345", } for url, expected_netloc in url_netloc_map.items(): self.assertEquals(parse_url(url).netloc, expected_netloc)
def pixel(path_domain=""): seen = {} sites = request.cookies.site sites = sites.replace('"', '') for c in sites.split(' '): if '.' in c: seen[c] = True ref_domain = parse_url(request.get_header('Referer')).host req_domain = parse_url(request.url).host if ref_domain and ref_domain != req_domain: seen[ref_domain] = True try: del(seen['ad.aloodo.com']) except KeyError: pass cdata = ' '.join(seen.keys()) if cdata: response.set_header('Set-Cookie', 'site="%s"; Max-Age=31536000; Path=/' % cdata) response.status=200 response.set_header('Tk', 'D') accept = request.get_header('Accept') if not "image" in accept and "text/html" in accept: response.set_header('Content-Type', 'text/html') return template('info', req_headers=format_headers(request.headers), res_headers=format_headers(response.headers), req_url=request.url) else: response.set_header('Content-Type', 'image/png') if len(seen) >= 3 or path_domain == ref_domain: expdt = datetime.now() + timedelta(days=7) exp = mktime(expdt.timetuple()) response.set_header('Expires', formatdate( timeval=exp, localtime=False, usegmt=True)) return buf
def get_url_data(serviceurl, params=None): """ :param serviceurl: url to retrieve data :param params: http://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls :return: json url_data """ # Get data from the url # Support https without verification of certificate # req = requests.get(serviceurl, verify=False, params=params) cnt = 0 max_retry = 3 purl = parse_url(serviceurl) if purl.auth: username = purl.auth.split(':')[0] password = purl.auth.split(':')[1] else: username = None password = None # Add url like http://host burl = '{}://{}'.format(purl.scheme, purl.host) if purl.port: # Add port like: http://host:8080 burl += ':{}'.format(purl.port) if purl.request_uri: # Add path and query like: http://host:8080/path/uri?query burl += '{}'.format(purl.request_uri) while cnt < max_retry: try: req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password)) if req.json(): return req.json() elif req.from_cache: # Clear cache to retry again requests_cache.clear() req = requests.get(burl, verify=False, params=params, timeout=timeout, auth=(username, password)) if req.json(): return req.json() else: # Raise a custom exception raise ValueError('No data from response') except requests.exceptions.RequestException as e: time.sleep(2 ** cnt) cnt += 1 if cnt >= max_retry: raise e data = req.json() return data
def test_request_uri(self): url_host_map = { 'http://google.com/mail': '/mail', 'http://google.com/mail/': '/mail/', 'http://google.com/': '/', 'http://google.com': '/', '': '/', '/': '/', '?': '/?', '#': '/', '/foo?bar=baz': '/foo?bar=baz', } for url, expected_request_uri in url_host_map.items(): returned_url = parse_url(url) self.assertEqual(returned_url.request_uri, expected_request_uri)
def test_request_uri(self): url_host_map = { "http://google.com/mail": "/mail", "http://google.com/mail/": "/mail/", "http://google.com/": "/", "http://google.com": "/", "": "/", "/": "/", "?": "/?", "#": "/", "/foo?bar=baz": "/foo?bar=baz", } for url, expected_request_uri in url_host_map.items(): returned_url = parse_url(url) self.assertEquals(returned_url.request_uri, expected_request_uri)
def get_services_and_ips(services, compute_nodes): service_ips = {} for name, data in services.items(): url = data['endpoints']['admin']['url'] if 'admin' in data['endpoints'] \ else data['endpoints']['public']['url'] if 'public' in data['endpoints'] \ else data['endpoints']['internal']['url'] service_ips[name] = socket.gethostbyname(parse_url(url).host) if url is not None else '0.0.0.0' for name, data in compute_nodes.items(): service_ips[name] = data['ip'] if data['ip'] is not None else socket.gethostbyname(data['name']) logging.info('Service IPs:\n{}'.format(pprint.PrettyPrinter(2).pformat(service_ips))) return service_ips
def test_parse_url(self): url_host_map = { "http://google.com/mail": Url("http", host="google.com", path="/mail"), "http://google.com/mail/": Url("http", host="google.com", path="/mail/"), "google.com/mail": Url(host="google.com", path="/mail"), "http://google.com/": Url("http", host="google.com", path="/"), "http://google.com": Url("http", host="google.com"), "http://google.com?foo": Url("http", host="google.com", path="", query="foo"), "": Url(), "/": Url(path="/"), "?": Url(path="", query=""), "#": Url(path="", fragment=""), "#?/!google.com/?foo#bar": Url(path="", fragment="?/!google.com/?foo#bar"), "/foo": Url(path="/foo"), "/foo?bar=baz": Url(path="/foo", query="bar=baz"), "/foo?bar=baz#banana?apple/orange": Url(path="/foo", query="bar=baz", fragment="banana?apple/orange"), } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) self.assertEquals(returned_url, expected_url)
def get_web_title(hyperlink): """Get title of given hyperlink. :param hyperlink: target url :type hyperlink: str :return: title of the website :rtype: str """ web_schemes = ["http", "https"] given_scheme = parse_url(hyperlink).scheme.lower() if given_scheme in web_schemes: resp = requests.get(hyperlink) if resp.status_code == requests.codes.ok: soup = BeautifulSoup(resp.text, 'lxml') title = soup.title.string return title else: return "<Unnamed>" else: return "<Not allowed>"
def test_parse_url(self): url_host_map = { 'http://google.com/mail': Url('http', host='google.com', path='/mail'), 'http://google.com/mail/': Url('http', host='google.com', path='/mail/'), 'google.com/mail': Url(host='google.com', path='/mail'), 'http://google.com/': Url('http', host='google.com', path='/'), 'http://google.com': Url('http', host='google.com'), 'http://google.com?foo': Url('http', host='google.com', path='', query='foo'), '': Url(), '/': Url(path='/'), '?': Url(path='', query=''), '#': Url(path='', fragment=''), '#?/!google.com/?foo#bar': Url(path='', fragment='?/!google.com/?foo#bar'), '/foo': Url(path='/foo'), '/foo?bar=baz': Url(path='/foo', query='bar=baz'), '/foo?bar=baz#banana?apple/orange': Url(path='/foo', query='bar=baz', fragment='banana?apple/orange'), } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) self.assertEquals(returned_url, expected_url)
def run_spider(self, target_ip, target_web, client): # Execute crawling using Scrapy. all_targets_log = [] for target_info in target_web: target_url = target_info[1] + target_ip + ':' + target_info[0] + '/' target_log = [target_url] response_log = target_ip + '_' + target_info[0] + '.log' now_time = self.get_current_date('%Y%m%d%H%M%S') result_file = os.path.join(self.output_base_path, now_time + self.output_filename) option = ' -a target_url=' + target_url + ' -a allow_domain=' + target_ip + \ ' -a delay=' + self.spider_delay_time + ' -a store_path=' + self.store_path + \ ' -a response_log=' + response_log + ' -a msgrpc_host=' + client.host + \ ' -a msgrpc_port=' + str(client.port) + ' -a msgrpc_token=' + client.token.decode('utf-8') + \ ' -a msgrpc_console_id=' + client.console_id.decode('utf-8') + ' -o ' + result_file command = 'scrapy runspider Spider.py' + option proc = Popen(command, shell=True) proc.wait() # Get crawling result. dict_json = {} if os.path.exists(result_file): with codecs.open(result_file, 'r', encoding='utf-8') as fin: target_text = self.delete_ctrl_char(fin.read()) if target_text != '': dict_json = json.loads(target_text) else: self.print_message(WARNING, '[{}] is empty.'.format(result_file)) # Exclude except allowed domains. for idx in range(len(dict_json)): items = dict_json[idx]['urls'] for item in items: try: if target_ip == util.parse_url(item).host: target_log.append(item) except Exception as err: self.print_exception(err, 'Parsed error: {}'.format(item)) all_targets_log.append([target_url, os.path.join(self.store_path, response_log), list(set(target_log))]) return all_targets_log
def preprocess_body(self, body: EntityDefinition) -> EntityDefinition: body = super().preprocess_body(body) if not self.pulp_ctx.has_plugin("core", min_version="3.11.dev"): # proxy_username and proxy_password are separate fields starting with 3.11 # https://pulp.plan.io/issues/8167 proxy_username = body.pop("proxy_username", None) proxy_password = body.pop("proxy_password", None) if proxy_username or proxy_password: if "proxy_url" in body: if proxy_username and proxy_password: parsed_url = parse_url(body["proxy_url"]) body["proxy_url"] = parsed_url._replace(auth=":".join( [proxy_username, proxy_password])).url else: raise click.ClickException( _("Proxy username and password can only be provided in conjunction." )) else: raise click.ClickException( _("Proxy credentials can only be provided with a proxy url." )) return body
def get_host_and_port(url): """Get the host, or the host:port pair if port is explicitly included, for the given URL. Examples: >>> get_host_and_port('example.com') 'example.com' >>> get_host_and_port('example.com:443') 'example.com:443' >>> get_host_and_port('http://example.com') 'example.com' >>> get_host_and_port('https://example.com/') 'example.com' >>> get_host_and_port('https://example.com:8081') 'example.com:8081' >>> get_host_and_port('ssh://example.com') 'example.com' :param url: the URL string to parse :return: a string with the host:port pair if the URL includes port number explicitly; otherwise, returns host only """ url = urllib3_util.parse_url(url) return "{}:{}".format(url.host, url.port) if url.port else url.host
def __init__(self, base_url, token=None, identity=None, default_429_wait_ms=5000, use_authorization_header=True): try: scheme, auth, host, port, path, query, fragment = parse_url( base_url) except LocationParseError: raise MatrixError("Invalid homeserver url %s" % base_url) if not scheme: raise MatrixError("No scheme in homeserver url %s" % base_url) self._base_url = base_url self.token = token self.identity = identity self.txn_id = 0 self.validate_cert = True self.session = Session() self.default_429_wait_ms = default_429_wait_ms self.use_authorization_header = use_authorization_header
def _http_request(method: str, endpoint: str, **kwargs) -> Optional[JT]: url = build_url(endpoint) parsed_url = parse_url(url) pm_args = { "num_pools": constants.HTTP_POOL_MANAGER_COUNT, "host": parsed_url.host, "port": parsed_url.port, "retries": Retry(connect=constants.HTTP_REQUEST_RETRIES_COUNT, read=constants.HTTP_REQUEST_RETRIES_COUNT, redirect=constants.HTTP_REQUEST_RETRIES_COUNT, backoff_factor=constants.HTTP_REQUEST_BACKOFF_FACTOR, method_whitelist=METHODS_WHITELIST), "ssl_context": _ssl_context, } if _ssl_context is not None and url.startswith("https"): pm_args["assert_hostname"] = False http_pool_manager: PoolManager = PoolManager(**pm_args) try: logger.trace("HTTP {0} to {1}", method, url) response = http_pool_manager.request( method=method, url=parsed_url.url, timeout=constants.HTTP_REQUEST_TIMEOUT, **kwargs) raise_for_status(response) except MaxRetryError as e: logger.info("{} to {} failed due to: {}.", method, url, e) return None except Exception as e: # pylint: disable=broad-except logger.error(log_messages.HTTP_REQUEST_RETURNED_ERROR, method, url, e) return None return json.loads(response.data)
def _stage_files_to_be_validated(self): upload_area_id = None file_names = [] for s3_file_url in self.s3_file_urls: url_bits = parse_url(s3_file_url) s3_bucket_name = url_bits.netloc s3_object_key = urllib.parse.unquote(url_bits.path.lstrip('/')) key_parts = s3_object_key.split('/') upload_area_id = key_parts.pop(0) file_name = "/".join(key_parts) file_names.append(file_name) staged_file_path = pathlib.Path(self.staging_folder, s3_object_key) self._log("Staging s3://{bucket}/{key} at {file_path}".format(bucket=s3_bucket_name, key=s3_object_key, file_path=staged_file_path)) staged_file_path.parent.mkdir(parents=True, exist_ok=True) self._download_file_from_bucket_to_filesystem(s3_bucket_name, s3_object_key, staged_file_path) if not staged_file_path.is_file(): raise UploadException(status=500, title="Staged file path is not a file", detail=f"Attempting to stage file path {staged_file_path} failed because it is " f"not a file.") self.staged_file_paths.append(staged_file_path) return upload_area_id, file_names
def join_proxy_url(address: str, username: Optional[str], password: Optional[str]) -> str: """ Gets a splitted address, username, password returns: Joined URL forming proxy_url """ # http://username:password@address if not address.startswith(("http://", "https://")): address = "http://" + address parsed = parse_url(address) auth_items = [] if username: auth_items.append(str(username)) # password is set only if there is a username # to avoid it being set as e.g: http://:[email protected] if password: auth_items.append(str(password)) return get_parsed_url(parsed, auth_items)
def classifier_signature(self, target_info): product_list = [] for target in target_info: for target_url in target[2]: # Get HTTP response (header + body). response = '' http = urllib3.PoolManager(timeout=self.util.http_timeout) try: self.util.print_message(OK, 'Accessing: {}'.format(target_url)) res = http.request('GET', target_url) for key in res.headers._container.keys(): response += key + ': ' + res.headers[key] + '\r\n' response += '\r\n\r\n' + res.data.decode('utf-8') except Exception as err: self.util.print_message(WARNING, '{}'.format(err)) for category in ['os', 'web', 'framework', 'cms']: prod_info = self.identify_product(category, response) for product in prod_info: parsed = util.parse_url(target_url) product_list.append([product, parsed.scheme, parsed.port, parsed.path]) return product_list
def prepend_scheme_if_needed(url, new_scheme): """Given a URL that may or may not have a scheme, prepend the given scheme. Does not replace a present scheme with the one provided as an argument. :rtype: str """ parsed = parse_url(url) scheme, auth, host, port, path, query, fragment = parsed # A defect in urlparse determines that there isn't a netloc present in some # urls. We previously assumed parsing was overly cautious, and swapped the # netloc and path. Due to a lack of tests on the original defect, this is # maintained with parse_url for backwards compatibility. netloc = parsed.netloc if not netloc: netloc, path = path, netloc if scheme is None: scheme = new_scheme if path is None: path = '' return urlunparse((scheme, netloc, path, '', query, fragment))
def is_url(self, url): #: Accept objects that have string representations. try: url = str(url) except NameError: # We're on Python 3. url = str(url) except UnicodeDecodeError: pass # Support for unicode domain names and paths. scheme, auth, host, port, path, query, fragment = parse_url(url) if not scheme or not host: return False # Only want to apply IDNA to the hostname try: host = host.encode('idna').decode('utf-8') except UnicodeError: return False return True
def get_github_url(api_url: str, repository_name: str, repository_branch: str) -> str: """ Generates the URL to the location of the pushed DAG :param api_url: url of the GitHub API :param repository_name: name of the GitHub repository in the form [name or org]/[repository name] :param repository_branch: name of the GitHub branch :return: a URL in string format """ parsed_url = parse_url(api_url) scheme = parsed_url.scheme + ":/" host = parsed_url.host port = '' if parsed_url.host.split('.')[0] == 'api': host = ".".join(parsed_url.host.split('.')[1:]) if parsed_url.port: port = ':' + parsed_url.port return "/".join( [scheme, host, port, repository_name, 'tree', repository_branch])
def is_url(url): #: Accept objects that have string representations. try: url = unicode(url) except NameError: # We're on Python 3. url = str(url) except UnicodeDecodeError: pass # Support for unicode domain names and paths. scheme, auth, host, port, path, query, fragment = parse_url(url) if not scheme or not host: return False # Only want to apply IDNA to the hostname try: host = host.encode('idna').decode('utf-8') except UnicodeError: return False return True
def check_url_is_allowed(self, value: str) -> str: """Check if a given URL is allowed or not and return the cleaned URL, e.g. fixed a mal-encoded URL. By default, only the protocol ``javascript`` is denied. Other protocols are allowed (HTTP(S), FTP, inline data (images, files, ...), ..) Fragments are allowed as well (``#my-content``). Query-strings and relative/absolute paths are allowed. :returns: The cleaned URL. :raises ValueError: If the URL is invalid or blacklisted. """ url = parse_url(value.strip()) if url.scheme in self.blacklisted_url_schemes: raise ValueError( "Scheme: {scheme} is blacklisted".format(scheme=url.scheme) ) return url.url
def dnevnik_authorization(self): """ Функция для проведения авторизации, возвращает ответ сервера с токеном и профайлами """ ss = requests.Session() # ss.proxies = { # 'http': '85.26.146.169:80', # 'https': '85.26.146.169:80' # } login_form_request = ss.get(self.OAUTH_URL, timeout=5) sleep(randint(10, 30) / 10) ss.get("https://stats.mos.ru/handler/handler.js?time={time}".format( time=datetime.today().timestamp()), timeout=5) sleep(randint(10, 30) / 10) login_request = ss.post( "https://login.mos.ru/sps/login/methods/password", data={ "isDelayed": False, "login": self._login, "password": self._password, }, allow_redirects=False, timeout=5) sleep(randint(10, 30) / 10) if login_request.status_code in range(300, 400): redirect_uri = login_request.headers["Location"] code = parse_url(redirect_uri).query.split("=")[1] req = ss.get( "https://dnevnik.mos.ru/lms/api/sudir/oauth/te?code={}".format( code), headers={"Accept": "application/vnd.api.v3+json"}, timeout=5) return json.loads(req.content.decode("utf-8")) else: if login_request.status_code == 200: raise Exception( f"Something went wrong! Status code ({login_request.status_code}) is incorrect." f" Maybe you entered incorrect login/password?")
def all_of(resource: str, session: requests.Session) -> List[Dict[str, Any]]: assert resource in ( "consumers", "services", "routes", "plugins", "acls", "key-auths", "basic-auths", ) logger.debug(f"Collecting all entries from `{resource}` ...") data: List[Dict[str, Any]] = [] next_ = f"/{resource}" while next_: resp = session.get(f"{next_}") _check_resp(resp) jresp = resp.json() data += jresp["data"] next_ = jresp.get("next") if next_: u = parse_url(next_) next_ = u.request_uri logger.debug(f"... next page `{next_}`") return data
def configure(self, *args, **kwargs): vars = filter(lambda x: x[0].startswith('OS_'), os.environ.iteritems()) conf_keys = self.conf.keys() for k, v in vars: # Try the full var first n = k.lower() cands = (n, n[3:]) for var in cands: if var in conf_keys: self.conf.set_default(name=var, default=v) break self.conf(args[0]) # bail using keystoneauth1 if not available. # FIXME: this is hacky... if self.conf.use_keystoneauth1 and not HAS_KEYSTONEAUTH1: raise Exception('Requested module keystoneauth1 is not available.') # adjust the logging if self.conf.debug: ch = logging.StreamHandler(stream=sys.stderr) ch.setLevel(logging.DEBUG) self.logger.addHandler(ch) # This is questionable... self._logging_handlers['debug'] = ch self.logger.removeHandler(self._logging_handlers['info']) self.logger.setLevel(logging.DEBUG) self.os_service_endpoint = self.conf.os_service_endpoint if self.os_service_endpoint is None: base = {'path': None} url = parse_url(self.conf.auth_url) l = list(url)[:4] + [None] * (len(url._fields) - 4) self.os_service_endpoint = Url(*l).url self.conf.set_default('os_service_endpoint', default=self.os_service_endpoint)
def custom_search(self, query, max_page_count=1, target_fqdn=''): # Google Custom Search API. self.utility.write_log(20, '[In] Execute Google custom search [{}].'.format(self.file_name)) # Setting of Google Custom Search. service = None if self.utility.proxy != '': # Set proxy. self.utility.print_message(WARNING, 'Set proxy server: {}'.format(self.utility.proxy)) parsed = util.parse_url(self.utility.proxy) proxy = None if self.utility.proxy_pass != '': proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host=parsed.host, proxy_port=parsed.port, proxy_user=self.utility.proxy_user, proxy_pass=self.utility.proxy_pass) else: proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host=parsed.host, proxy_port=parsed.port) my_http = httplib2.Http(proxy_info=proxy, disable_ssl_certificate_validation=True) service = build("customsearch", "v1", developerKey=self.api_key, http=my_http) else: # None proxy. service = build("customsearch", "v1", developerKey=self.api_key) # Execute search. urls = [] fqdn_list = [] result_count = 0 start_index = self.start_index try: search_count = 0 while search_count < max_page_count: self.utility.print_message(OK, 'Using query : {}'.format(query)) response = service.cse().list( q=query, cx=self.search_engine_id, num=10, start=start_index, filter='0', safe='off', ).execute() # Get finding counts. result_count = int(response.get('searchInformation').get('totalResults')) is_new_query = False # Get extracted link (url). search_urls = [] if result_count != 0: items = response['items'] for item in items: urls.append(item['link']) search_urls.append(item['link']) # Set new query. if result_count <= 10 or max_page_count == 1: fqdn_list.extend(self.utility.transform_url_hostname_list(search_urls)) break else: # Refine search range using "-inurl" option. tmp_list = self.utility.transform_url_hostname_list(search_urls) for fqdn in tmp_list: if fqdn not in fqdn_list: subdomain = self.utility.extract_subdomain(fqdn, target_fqdn) if target_fqdn != '' and subdomain == target_fqdn: query += ' -inurl:http://' + subdomain + ' -inurl:https://' + subdomain is_new_query = True search_count = -1 elif subdomain != '': query += ' -inurl:' + subdomain is_new_query = True search_count = -1 fqdn_list.append(fqdn) if is_new_query is False: if 'nextPage' in response.get('queries').keys(): start_index = response.get('queries').get('nextPage')[0].get('startIndex') else: self.utility.print_message(WARNING, 'There is not next page.') break search_count += 1 except Exception as e: msg = 'Google custom search is failure : {}'.format(e) self.utility.print_exception(e, msg) self.utility.write_log(30, msg) self.utility.write_log(20, '[Out] Execute Google custom search [{}].'.format(self.file_name)) return urls, result_count, fqdn_list self.utility.write_log(20, '[Out] Execute Google custom search [{}].'.format(self.file_name)) return urls, result_count, list(set(fqdn_list))
def main(): """ Run interactively. Call when the module is run by itself. :return: Exit code """ # check for Updates updates = check_updates() if updates: print(BLUE + BOLD + "\n\n * An update is available and is recommended update before continuing.\n" + " Do you want to update now?") pick = input(" YES/no ? ").lower() if version_info[0] >= 3 else raw_input(" YES/no ? ").lower() print (ENDC) if pick != "no": updated = auto_update() if updated: print(GREEN + BOLD + "\n * The JexBoss has been successfully updated. Please run again to enjoy the updates.\n" +ENDC) exit(0) else: print(RED + BOLD + "\n\n * An error occurred while updating the JexBoss. Please try again..\n" +ENDC) exit(1) vulnerables = False # check vulnerabilities for standalone mode if gl_args.mode == 'standalone': url = gl_args.host scan_results = check_vul(url) # performs exploitation for i in ["jmx-console", "web-console", "JMXInvokerServlet", "admin-console"]: if scan_results[i] == 200 or scan_results[i] == 500: vulnerables = True if gl_args.auto_exploit: auto_exploit(url, i) else: print(BLUE + "\n\n * Do you want to try to run an automated exploitation via \"" + BOLD + i + NORMAL + "\" ?\n" + " This operation will provide a simple command shell to execute commands on the server..\n" + RED + " Continue only if you have permission!" + ENDC) pick = input(" yes/NO ? ").lower() if version_info[0] >= 3 else raw_input(" yes/NO ? ").lower() if pick == "yes": auto_exploit(url, i) # check vulnerabilities for auto scan mode elif gl_args.mode == 'auto-scan': file_results = open(gl_args.results, 'w') file_results.write("JexBoss Scan Mode Report\n\n") for ip in gl_args.network.hosts(): if gl_interrupted: break for port in gl_args.ports.split(","): if check_connectivity(ip, port): url = "{0}:{1}".format(ip,port) ip_results = check_vul(url) for key in ip_results.keys(): if ip_results[key] == 200 or ip_results[key] == 500: vulnerables = True if gl_args.auto_exploit: result_exploit = auto_exploit(url, key) if result_exploit: file_results.write("{0}:\t[EXPLOITED VIA {1}]\n".format(url, key)) else: file_results.write("{0}:\t[FAILED TO EXPLOITED VIA {1}]\n".format(url, key)) else: file_results.write("{0}:\t[POSSIBLY VULNERABLE TO {1}]\n".format(url, key)) file_results.flush() else: print (RED+"\n * Host %s:%s does not respond."% (ip,port)+ENDC) file_results.close() elif gl_args.mode == 'file-scan': file_results = open(gl_args.out, 'w') file_results.write("JexBoss Scan Mode Report\n\n") file_input = open(gl_args.file, 'r') for url in file_input.readlines(): if gl_interrupted: break url = url.strip() ip = str(parse_url(url)[2]) port = parse_url(url)[3] if parse_url(url)[3] != None else 80 if check_connectivity(ip, port): url_results = check_vul(url) for key in url_results.keys(): if url_results[key] == 200 or url_results[key] == 500: vulnerables = True if gl_args.auto_exploit: result_exploit = auto_exploit(url, key) if result_exploit: file_results.write("{0}:\t[EXPLOITED VIA {1}]\n".format(url, key)) else: file_results.write("{0}:\t[FAILED TO EXPLOITED VIA {1}]\n".format(url, key)) else: file_results.write("{0}:\t[POSSIBLY VULNERABLE TO {1}]\n".format(url, key)) file_results.flush() else: print (RED + "\n * Host %s:%s does not respond." % (ip, port) + ENDC) file_results.close() # resume results if vulnerables: banner() print(RED + BOLD+" Results: potentially compromised server!" + ENDC) if gl_args.mode == 'file-scan': print(RED + BOLD + " ** Check more information on file {0} **".format(gl_args.out) + ENDC) elif gl_args.mode == 'auto-scan': print(RED + BOLD + " ** Check more information on file {0} **".format(gl_args.results) + ENDC) print(GREEN + " * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*\n" +BOLD+ " Recommendations: \n" +ENDC+ GREEN+ " - Remove web consoles and services that are not used, eg:\n" " $ rm web-console.war\n" " $ rm http-invoker.sar\n" " $ rm jmx-console.war\n" " $ rm jmx-invoker-adaptor-server.sar\n" " $ rm admin-console.war\n" " - Use a reverse proxy (eg. nginx, apache, F5)\n" " - Limit access to the server only via reverse proxy (eg. DROP INPUT POLICY)\n" " - Search vestiges of exploitation within the directories \"deploy\" and \"management\".\n\n" " References:\n" " [1] - https://developer.jboss.org/wiki/SecureTheJmxConsole\n" " [2] - https://issues.jboss.org/secure/attachment/12313982/jboss-securejmx.pdf\n" "\n" " - If possible, discard this server!\n" " * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*\n") else: print(GREEN + "\n\n * Results: \n" + " The server is not vulnerable to bugs tested ... :D\n\n" + ENDC) # infos print(ENDC + " * Info: review, suggestions, updates, etc: \n" + " https://github.com/joaomatosf/jexboss\n") print(GREEN + BOLD + " * DONATE: " + ENDC + "Please consider making a donation to help improve this tool,\n" " including research to new versions of JBoss and zero days. \n\n" + GREEN + BOLD + " * Paypal: " + ENDC + " [email protected] \n" + GREEN + BOLD + " * Bitcoin Address: " + ENDC + " 14x4niEpfp7CegBYr3tTzTn4h6DAnDCD9C \n" + GREEN + BOLD + " * URI: " + ENDC + " bitcoin:14x4niEpfp7CegBYr3tTzTn4h6DAnDCD9C?label=jexboss\n")
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. #: We're unable to blindly call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. #: https://github.com/requests/requests/pull/2238 if isinstance(url, bytes): url = url.decode('utf8') else: url = unicode(url) if is_py2 else str(url) # Remove leading whitespaces from url url = url.lstrip() # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. if ':' in url and not url.lower().startswith('http'): self.url = url return # Support for unicode domain names and paths. try: scheme, auth, host, port, path, query, fragment = parse_url(url) except LocationParseError as e: raise InvalidURL(*e.args) if not scheme: error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?") error = error.format(to_native_string(url, 'utf8')) raise MissingSchema(error) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # In general, we want to try IDNA encoding the hostname if the string contains # non-ASCII characters. This allows users to automatically get the correct IDNA # behaviour. For strings containing only ASCII characters, we need to also verify # it doesn't start with a wildcard (*), before allowing the unencoded hostname. if not unicode_is_ascii(host): try: host = self._get_idna_encoded_host(host) except UnicodeError: raise InvalidURL('URL has an invalid label.') elif host.startswith(u'*'): raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if is_py2: if isinstance(scheme, str): scheme = scheme.encode('utf-8') if isinstance(netloc, str): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): fragment = fragment.encode('utf-8') if isinstance(params, (str, bytes)): params = to_native_string(params) enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. #: We're unable to blindly call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. #: https://github.com/psf/requests/pull/2238 if isinstance(url, bytes): url = url.decode("utf8") else: url = str(url) # Remove leading whitespaces from url url = url.lstrip() # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. if ":" in url and not url.lower().startswith("http"): self.url = url return # Support for unicode domain names and paths. try: scheme, auth, host, port, path, query, fragment = parse_url(url) except LocationParseError as e: raise InvalidURL(*e.args) if not scheme: raise MissingSchema(f"Invalid URL {url!r}: No scheme supplied. " f"Perhaps you meant http://{url}?") if not host: raise InvalidURL(f"Invalid URL {url!r}: No host supplied") # In general, we want to try IDNA encoding the hostname if the string contains # non-ASCII characters. This allows users to automatically get the correct IDNA # behaviour. For strings containing only ASCII characters, we need to also verify # it doesn't start with a wildcard (*), before allowing the unencoded hostname. if not unicode_is_ascii(host): try: host = self._get_idna_encoded_host(host) except UnicodeError: raise InvalidURL("URL has an invalid label.") elif host.startswith(("*", ".")): raise InvalidURL("URL has an invalid label.") # Carefully reconstruct the network location netloc = auth or "" if netloc: netloc += "@" netloc += host if port: netloc += f":{port}" # Bare domains aren't valid URLs. if not path: path = "/" if isinstance(params, (str, bytes)): params = to_native_string(params) enc_params = self._encode_params(params) if enc_params: if query: query = f"{query}&{enc_params}" else: query = enc_params url = requote_uri( urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def prepare_url(self, url, params): """Prepares the given HTTP URL. Mostly copied from requests lib, removed python2 checks and added checks for https""" from urllib3.util import parse_url from urllib3.exceptions import LocationParseError from urllib.parse import urlunparse from requests.exceptions import InvalidURL from requests.utils import requote_uri if isinstance(url, bytes): url = url.decode('utf8') else: url = str(url) # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. if ':' in url and not url.lower().startswith('http'): self.url = url return # Support for unicode domain names and paths. try: scheme, auth, host, port, path, query, fragment = parse_url(url) except LocationParseError as e: raise InvalidURL(*e.args) if not scheme: # normally an error is thrown, we assume https scheme = 'https' elif scheme != 'https': raise InvalidURL('Invalid URL %r: must be https' % url) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # Only want to apply IDNA to the hostname try: host = host.encode('idna').decode('utf-8') except UnicodeError: raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if isinstance(params, (str, bytes)): params = requests.utils.to_native_string(params) enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. #: We're unable to blindy call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. #: https://github.com/kennethreitz/requests/pull/2238 if isinstance(url, bytes): url = url.decode('utf8') else: url = unicode(url) if is_py2 else str(url) # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. if ':' in url and not url.lower().startswith('http'): self.url = url return # Support for unicode domain names and paths. try: scheme, auth, host, port, path, query, fragment = parse_url(url) except LocationParseError as e: raise InvalidURL(*e.args) if not scheme: raise MissingSchema("Invalid URL {0!r}: No schema supplied. " "Perhaps you meant http://{0}?".format( to_native_string(url, 'utf8'))) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # Only want to apply IDNA to the hostname try: host = host.encode('idna').decode('utf-8') except UnicodeError: raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if is_py2: if isinstance(scheme, str): scheme = scheme.encode('utf-8') if isinstance(netloc, str): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): fragment = fragment.encode('utf-8') enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def get_name(self): parseUrl = parse_url(self.url) return parseUrl
with codecs.open(inventory_list_path, 'r', 'utf-8') as fin: targets = fin.readlines() for target in targets: items = target.replace('\r', '').replace('\n', '').split('\t') if len(items) != 2: utility.print_message(FAIL, 'Invalid inventory target : {}'.format(target)) continue # Check target URL. port_num = '' invent_url = '' keyword = '' try: invent_url = items[0] keyword = items[1] parsed = util.parse_url(invent_url) # Judge port number. if parsed.port is None and parsed.scheme == 'https': port_num = '443' elif parsed.port is None and parsed.scheme == 'http': port_num = '80' elif parsed.port is not None: port_num = str(parsed.port) else: utility.print_message(FAIL, 'Invalid URL : {}'.format(invent_url)) utility.write_log(30, 'Invalid URL : {}'.format(invent_url)) continue except Exception as e: utility.print_exception(e, 'Parsed error : {}'.format(invent_url)) utility.write_log(30, 'Parsed error : {}'.format(invent_url))
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. try: url = unicode(url) except NameError: # We're on Python 3. url = str(url) except UnicodeDecodeError: pass # Don't do any URL preparation for oddball schemes if ':' in url and not url.lower().startswith('http'): self.url = url return # Support for unicode domain names and paths. scheme, auth, host, port, path, query, fragment = parse_url(url) if not scheme: raise MissingSchema("Invalid URL {0!r}: No schema supplied. " "Perhaps you meant http://{0}?".format(url)) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # Only want to apply IDNA to the hostname try: host = host.encode('idna').decode('utf-8') except UnicodeError: raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if is_py2: if isinstance(scheme, str): scheme = scheme.encode('utf-8') if isinstance(netloc, str): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): fragment = fragment.encode('utf-8') enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri( urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def get_url_root(self, url): return util.parse_url(url).hostname
def read_legacy(component_type: str, ip_address: str, port: str, slave_id0: str, slave_id1: Optional[str] = None, slave_id2: Optional[str] = None, slave_id3: Optional[str] = None, batwrsame: Optional[int] = None, extprodakt: Optional[int] = None, zweiterspeicher: Optional[int] = None, subbat: Optional[int] = None, ip2address: Optional[str] = None, num: Optional[int] = None) -> None: def get_bat_state() -> Tuple[List, List]: def create_bat(modbus_id: int) -> bat.SolaredgeBat: component_config = SolaredgeBatSetup( id=num, configuration=SolaredgeBatConfiguration(modbus_id=modbus_id)) return bat.SolaredgeBat(dev.device_config.id, component_config, dev.client) bats = [create_bat(1)] if zweiterspeicher == 1: bats.append(create_bat(2)) soc_bat, power_bat = [], [] for battery in bats: state = battery.read_state() power_bat.append(state.power) soc_bat.append(state.soc) return power_bat, soc_bat def get_external_inverter_state(dev: Device, id: int) -> InverterState: component_config = SolaredgeExternalInverterSetup( id=num, configuration=SolaredgeExternalInverterConfiguration(modbus_id=id)) ext_inverter = external_inverter.SolaredgeExternalInverter( dev.device_config.id, component_config, dev.client) return ext_inverter.read_state() def create_inverter(modbus_id: int) -> inverter.SolaredgeInverter: component_config = SolaredgeInverterSetup( id=num, configuration=SolaredgeInverterConfiguration(modbus_id=modbus_id)) return inverter.SolaredgeInverter(dev.device_config.id, component_config, dev.client) log.debug("Solaredge IP: " + ip_address + ":" + str(port)) log.debug("Solaredge Slave-IDs: [" + str(slave_id0) + ", " + str(slave_id1) + ", " + str(slave_id2) + ", " + str(slave_id3) + "]") log.debug("Solaredge Bat-WR-gleiche IP: " + str(batwrsame) + ", Externer WR: " + str(extprodakt) + ", 2. Speicher: " + str(zweiterspeicher) + ", Speicherleistung subtrahieren: " + str(subbat) + " 2. IP: " + str(ip2address) + ", Num: " + str(num)) if port == "": parsed_url = parse_url(ip_address) ip_address = parsed_url.hostname if parsed_url.port: port = parsed_url.port else: port = 502 dev = Device( Solaredge(configuration=SolaredgeConfiguration(ip_address=ip_address, port=int(port)))) if component_type == "counter": dev.add_component( SolaredgeCounterSetup(id=num, configuration=SolaredgeCounterConfiguration( modbus_id=int(slave_id0)))) log.debug('Solaredge ModbusID: ' + str(slave_id0)) dev.update() elif component_type == "inverter": if ip2address == "none": modbus_ids = list( map( int, filter(lambda id: id.isnumeric(), [slave_id0, slave_id1, slave_id2, slave_id3]))) inverters = [ create_inverter(modbus_id) for modbus_id in modbus_ids ] with SingleComponentUpdateContext(inverters[0].component_info): total_power = 0 total_energy = 0 total_currents = [0.0] * 3 with dev.client: for inv in inverters: state = inv.read_state() total_power += state.power total_energy += state.exported total_currents = list( map(add, total_currents, state.currents)) if extprodakt: state = get_external_inverter_state( dev, int(slave_id0)) total_power -= state.power if batwrsame == 1: bat_power, soc_bat = get_bat_state() if subbat == 1: total_power -= sum(min(p, 0) for p in bat_power) else: total_power -= sum(bat_power) if batwrsame == 1: get_bat_value_store(1).set( BatState(power=sum(bat_power), soc=mean(soc_bat))) get_inverter_value_store(num).set( InverterState(exported=total_energy, power=min(0, total_power), currents=total_currents)) else: inv = create_inverter(int(slave_id0)) with SingleComponentUpdateContext(inv.component_info): with dev.client: state = inv.read_state() total_power = state.power * -1 total_energy = state.exported if batwrsame == 1: zweiterspeicher = 0 bat_power, _ = get_bat_state() total_power -= sum(bat_power) get_bat_value_store(1).set( BatState(power=sum(bat_power), soc=mean(soc_bat))) device_config = Solaredge(configuration=SolaredgeConfiguration( ip_address=ip2address)) dev = Device(device_config) inv = create_inverter(int(slave_id0)) with dev.client: state = inv.read_state() total_power -= state.power total_energy += state.exported if extprodakt: state = get_external_inverter_state( dev, int(slave_id0)) total_power -= state.power get_inverter_value_store(num).set( InverterState(exported=total_energy, power=total_power)) elif component_type == "bat": with SingleComponentUpdateContext( ComponentInfo(0, "Solaredge Speicher", "bat")): power_bat, soc_bat = get_bat_state() get_bat_value_store(1).set( BatState(power=sum(power_bat), soc=mean(soc_bat)))
def from_str_url(cls, url: str): return cls(parse_url(url))
def main(): global config df = pd.read_excel(config.table) log_df = pd.DataFrame(columns=[ "dataset_name", "resource_name", "resource_filename", "resource_url", "new_resource_url", "scraperwiki_name", "dir", "file", "status", "update_status", ]) c = urllib3.PoolManager() i = 0 additional_df = pd.DataFrame( columns=["decision", "dataset_name", "resource_name", "resource_url"]) for resource_index, row in df.iterrows(): i += 1 dataset_name = str(row.dataset_name) resource_name = str(row.resource_name) # print ("%(i)3d %(dataset_name)30s %(resource_name)30s"%locals()) resource_url = row.resource_url if resource_name.endswith(".csv") or resource_name.endswith( ".xls") or resource_name.endswith(".xlsx"): resource_filename = resource_name else: resource_filename = os.path.split( ut.parse_url(resource_url).path)[1] # localpath = os.path.join(os.path.join(config.target,dataset_name),resource_name) localpath = os.path.join(config.target, dataset_name) localfile = os.path.join(localpath, resource_filename) new_resource_url = None update_status = "" scraperwiki_resources = [] if row.decision == config.decision: new_resource_url = config.url_prefix if new_resource_url[-1] != "/": new_resource_url += "/" new_resource_url += dataset_name + "/" + resource_filename if config.new_url_pattern not in new_resource_url: logging.warning( "New url '%s' does not contain the new-url-pattern '%s'" % (new_resource_url, config.new_url_pattern)) dataset = Dataset.read_from_hdx(dataset_name) if dataset is None: status = "ERROR" update_status = "DATASET NOT FOUND" else: resource_index = resource_number_from_url( dataset, resource_url) for i, r in enumerate(dataset.get_resources()): if (config.old_url_pattern in r["url"] or config.new_url_pattern in r["url"]) and i != resource_index: additional_df = additional_df.append(dict( decision=row.decision, dataset_name=dataset_name, resource_name=r["name"], resource_url=r["url"]), ignore_index=True) additional_df.to_csv(config.additional, index_label='Index') if config.update_url: logging.info( "Update url %(dataset_name)s, resource: %(resource_name)s to %(new_resource_url)s" % locals()) try: resource = resource_from_name(dataset, resource_name) if resource is None: update_status = "RESOURCE NOT FOUND" else: resource["url"] = new_resource_url resource.update_in_hdx() update_status = "OK" except: logging.error( "Update url failed for %(dataset_name)s resource %(resource_name)s" % locals()) update_status = "ERROR" traceback.print_exc() try: os.makedirs(localpath) except: pass logging.info("Process dataset %(dataset_name)s" % locals()) logging.info("Fetch data from url %(dataset_name)s" % locals()) if config.refresh or not os.path.exists(localfile): try: with c.request( 'GET', resource_url, preload_content=False) as response, open( localfile, 'wb') as f: shutil.copyfileobj(response, f) status = "OK" except: logging.exception( "Download error for dataset %(dataset_name)s" % locals()) status = "ERROR" else: status = "Ok" if config.upload: logging.info( "Upload %(dataset_name)s, resource: %(resource_name)s" % locals()) try: resource = resource_from_name(dataset, resource_name) if resource is None: update_status = "RESOURCE NOT FOUND" else: try: file_type = os.path.splitext( localfile)[1][1:].lower() except: file_type = "csv" resource.set_file_type( file_type) # set the file type to eg. csv resource.set_file_to_upload(localfile) resource.update_in_hdx() update_status = "OK" except: logging.error( "Uploading %(dataset_name)s resource %(resource_name)s failed" % locals()) update_status = "ERROR" traceback.print_exc() else: status = "SKIPPED" log_df = log_df.append(dict(dataset_name=dataset_name, resource_name=resource_name, resource_filename=resource_filename, resource_url=resource_url, new_resource_url=new_resource_url, scraperwiki_name=row.scraperwiki_name, dir=localpath, file=localfile, status=status, update_status=update_status), ignore_index=True) log_df.to_csv(config.processed, index_label='Index') additional_df["additional"] = 1 df["additional"] = 0 df = df.append(additional_df, ignore_index=True) writer = pd.ExcelWriter(config.additional_table) df.to_excel(writer) writer.save() writer.close()
def custom_search(self, query, max_page_count=1, target_domain=''): # Google Custom Search API. self.utility.write_log( 20, '[In] Execute Google custom search [{}].'.format(self.file_name)) # Setting of Google Custom Search. service = None if self.utility.proxy != '': # Set proxy. self.utility.print_message( WARNING, 'Set proxy server: {}'.format(self.utility.proxy)) parsed = util.parse_url(self.utility.proxy) proxy = None if self.utility.proxy_pass != '': proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host=parsed.host, proxy_port=parsed.port, proxy_user=self.utility.proxy_user, proxy_pass=self.utility.proxy_pass) else: proxy = httplib2.ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host=parsed.host, proxy_port=parsed.port) my_http = httplib2.Http(proxy_info=proxy, disable_ssl_certificate_validation=True) service = build("customsearch", "v1", developerKey=self.api_key, http=my_http) else: # None proxy. service = build("customsearch", "v1", developerKey=self.api_key) # Execute search. urls = [] sub_domain_list = [] result_count = 0 start_index = self.start_index try: search_count = 0 search_urls = [] while search_count < max_page_count: self.utility.print_message(OK, 'Using query : {}'.format(query)) response = service.cse().list( q=query, cx=self.search_engine_id, num=10, start=start_index, filter='0', safe='off', ).execute() # Get finding counts. result_count = int( response.get('searchInformation').get('totalResults')) # Get extracted link (url). if result_count != 0: items = response['items'] for item in items: urls.append(item['link']) search_urls.append(item['link']) # Set new query. if result_count <= 10 or max_page_count == 1: tmp_sub_domain_list = self.utility.transform_url_hostname_list( search_urls) # Update query for report. for sub_domain in tmp_sub_domain_list: if target_domain != sub_domain and sub_domain not in sub_domain_list: query += ' -site:' + sub_domain sub_domain_list.extend(tmp_sub_domain_list) break else: # Refine search range using "-site" option. tmp_sub_domain_list = self.utility.transform_url_hostname_list( search_urls) for sub_domain in tmp_sub_domain_list: if target_domain != sub_domain and sub_domain not in sub_domain_list: query += ' -site:' + sub_domain sub_domain_list.append(sub_domain) search_count += 1 time.sleep(self.delay_time) except Exception as e: msg = 'Google custom search is failure : {}'.format(e) self.utility.print_exception(e, msg) self.utility.write_log(30, msg) self.utility.write_log( 20, '[Out] Execute Google custom search [{}].'.format( self.file_name)) return urls, result_count, query, sub_domain_list self.utility.write_log( 20, '[Out] Execute Google custom search [{}].'.format(self.file_name)) return urls, result_count, query, list(set(sub_domain_list))
target_list = random.sample(target[2], len(target[2])) if max_target_url != 0 and max_target_url < len(target_list): utility.print_message( WARNING, 'Cutting target list {} to {}.'.format( len(target[2]), max_target_url)) target_list = target_list[:max_target_url] for count, target_url in enumerate(target_list): utility.print_message( NOTE, '{}/{} Start analyzing: {}'.format( count + 1, len(target_list), target_url)) # Check target url. parsed = None try: parsed = util.parse_url(target_url) except Exception as e: utility.print_exception( e, 'Parsed error : {}'.format(target_url)) utility.write_log( 30, 'Parsed error : {}'.format(target_url)) continue # Get HTTP response (header + body). date = utility.get_current_date('%Y%m%d%H%M%S%f')[:-3] print_date = utility.transform_date_string( utility.transform_date_object(date[:-3], '%Y%m%d%H%M%S')) _, server_header, res_header, res_body, _ = utility.send_request( 'GET', target_url)
def get_url_name(url): if not isinstance(url, six.string_types): return return urllib3_util.parse_url(url).host
def _prepare_api_url(self, url, p): scheme, auth, host, port, path, query, fragment = parse_url(url) if scheme is None or scheme == "http": return f"http://{host}:{p}" else: raise InvalidSchema("Invalid scheme %r: Do not supply" % scheme)