def do_GET(self): request = parse_url(self.path).path.split('/')[1] query_vars = dict(parse_query(parse_url(self.path).query)) try: if self.path not in response_cache: info_getter = getattr(info, 'get_' + request) response_cache[self.path] = info_getter(**query_vars) self.respond(response_cache[self.path]) except AttributeError: self.not_found() except NotImplementedError: self.not_implemented()
def get_filepath_or_buffer(filepath_or_buffer, encoding=None): """ If the filepath_or_buffer is a url, translate and return the buffer passthru otherwise. Parameters ---------- filepath_or_buffer : a url, filepath, or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding """ if _is_url(filepath_or_buffer): req = _urlopen(str(filepath_or_buffer)) return maybe_read_encoded_stream(req,encoding) if _is_s3_url(filepath_or_buffer): try: import boto except: raise ImportError("boto is required to handle s3 files") # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY # are environment variables parsed_url = parse_url(filepath_or_buffer) conn = boto.connect_s3() b = conn.get_bucket(parsed_url.netloc) k = boto.s3.key.Key(b) k.key = parsed_url.path filepath_or_buffer = StringIO(k.get_contents_as_string()) return filepath_or_buffer, None return filepath_or_buffer, None
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST # are environment variables parsed_url = parse_url(filepath_or_buffer) s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com') try: conn = boto.connect_s3(host=s3_host) except boto.exception.NoAuthHandlerFound: conn = boto.connect_s3(host=s3_host, anon=True) b = conn.get_bucket(parsed_url.netloc, validate=False) if compat.PY2 and (compression == 'gzip' or (compression == 'infer' and filepath_or_buffer.endswith(".gz"))): k = boto.s3.key.Key(b, parsed_url.path) filepath_or_buffer = BytesIO(k.get_contents_as_string( encoding=encoding)) else: k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding) k.open('r') # Expose read errors immediately filepath_or_buffer = k return filepath_or_buffer, None, compression
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ If the filepath_or_buffer is a url, translate and return the buffer passthru otherwise. Parameters ---------- filepath_or_buffer : a url, filepath, or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding, the compression """ if _is_url(filepath_or_buffer): req = _urlopen(str(filepath_or_buffer)) if compression == 'infer': content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': compression = 'gzip' else: compression = None # cat on the compression to the tuple returned by the function to_return = list(maybe_read_encoded_stream(req, encoding, compression)) + \ [compression] return tuple(to_return) if _is_s3_url(filepath_or_buffer): try: import boto except: raise ImportError("boto is required to handle s3 files") # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY # are environment variables parsed_url = parse_url(filepath_or_buffer) try: conn = boto.connect_s3() except boto.exception.NoAuthHandlerFound: conn = boto.connect_s3(anon=True) b = conn.get_bucket(parsed_url.netloc, validate=False) if compat.PY2 and (compression == 'gzip' or (compression == 'infer' and filepath_or_buffer.endswith(".gz"))): k = boto.s3.key.Key(b, parsed_url.path) filepath_or_buffer = BytesIO(k.get_contents_as_string( encoding=encoding)) else: k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding) k.open('r') # Expose read errors immediately filepath_or_buffer = k return filepath_or_buffer, None, compression return _expand_user(filepath_or_buffer), None, compression
def _parse_host(host, port, resource): if not host.startswith('http'): host = 'http://' + host url_pack = parse_url(host) is_secure = url_pack.scheme == 'https' port = port or url_pack.port or (443 if is_secure else 80) base_url = '%s:%d%s/%s/%s' % ( url_pack.hostname, port, url_pack.path, resource, PROTOCOL_VERSION) return is_secure, base_url
def __init__(self, base_url, adapter_set="", user="", password=""): self._base_url = parse_url(base_url) self._adapter_set = adapter_set self._user = user self._password = password self._session = {} self._subscriptions = {} self._current_subscription_key = 0 self._stream_connection = None self._stream_connection_thread = None
def _set_control_link_url(self, custom_address=None): """Set the address to use for the Control Connection in such cases where Lightstreamer is behind a Load Balancer. """ if custom_address is None: self._control_url = self._base_url else: parsed_custom_address = parse_url("//" + custom_address) self._control_url = parsed_custom_address._replace( scheme=self._base_url[0] )
def _is_url(url): """Check to see if a URL has a valid protocol. Parameters ---------- url : str or unicode Returns ------- isurl : bool If `url` has a valid protocol return True otherwise False. """ try: return parse_url(url).scheme in _VALID_URLS except Exception: return False
def _read_json(self, path_or_url, compressed=True, advanced_path=False): ''' Load JSON for a path. Allows remote files in addition to local ones. ''' if parse_url(path_or_url).scheme in ['http', 'https']: try: req = _urlopen(path_or_url) filename_or_buffer = BytesIO(req.read()) except HTTPError: logging.exception("HTTP Error accessing %s" % path_or_url) raise compressed = False else: filename_or_buffer = path_or_url try: if compressed: f = bz2.BZ2File(filename_or_buffer) else: if (type(filename_or_buffer) != BytesIO) and not isinstance(filename_or_buffer, StringIO): f = codecs.open(filename_or_buffer, 'r+', encoding="utf-8") else: f = filename_or_buffer rawjson = f.readline() f.close() except IOError: logging.exception("Can't read %s. Did you pass the incorrect " "'compressed=' argument?", path_or_url) raise except: print(compressed, type(filename_or_buffer)) logging.exception("Can't open %s", path_or_url) raise # This is a bandaid for schema version 2.0, not over-engineered # since upcoming releases of the extracted features # dataset won't keep the basic/advanced split try: # For Python3 compatibility, decode to str object if PY3 and (type(rawjson) != str): rawjson = rawjson.decode() volumejson = json.loads(rawjson) except: logging.exception("Problem reading JSON for %s. One common reason" " for this error is an incorrect compressed= " "argument", path_or_url) raise return volumejson
def get_filepath_or_buffer(filepath_or_buffer, encoding=None): """ If the filepath_or_buffer is a url, translate and return the buffer passthru otherwise. Parameters ---------- filepath_or_buffer : a url, filepath, or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding """ if _is_url(filepath_or_buffer): req = _urlopen(str(filepath_or_buffer)) if compat.PY3: # pragma: no cover if encoding: errors = 'strict' else: errors = 'replace' encoding = 'utf-8' out = StringIO(req.read().decode(encoding, errors)) else: encoding = None out = req return out, encoding if _is_s3_url(filepath_or_buffer): try: import boto except: raise ImportError("boto is required to handle s3 files") # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY # are environment variables parsed_url = parse_url(filepath_or_buffer) conn = boto.connect_s3() b = conn.get_bucket(parsed_url.netloc) k = boto.s3.key.Key(b) k.key = parsed_url.path filepath_or_buffer = StringIO(k.get_contents_as_string()) return filepath_or_buffer, None return filepath_or_buffer, None
def is_gcs_url(url) -> bool: """Check for a gcs url""" if not isinstance(url, str): return False return parse_url(url).scheme in ["gcs", "gs"]
def is_s3_url(url) -> bool: """Check for an s3, s3n, or s3a url""" if not isinstance(url, str): return False return parse_url(url).scheme in ["s3", "s3n", "s3a"]
def _strip_schema(url): """Returns the url without the s3:// part""" result = parse_url(url, allow_fragments=False) return result.netloc + result.path
def _is_s3_url(url): """Check for an s3, s3n, or s3a url""" try: return parse_url(url).scheme in ['s3', 's3n', 's3a'] except: return False
def _is_url(url): """Check to see if *url* has a valid protocol.""" try: return parse_url(url).scheme in _VALID_URLS except Exception: return False
def _is_buffer_url(url: str) -> bool: res = parse_url(url) return res.scheme == "buffer"
def is_gcs_url(url): """Check for a gcs url""" try: return parse_url(url).scheme in ['gcs', 'gs'] except Exception: return False
def is_gcs_url(url) -> bool: """Check for a gcs url""" try: return parse_url(url).scheme in ["gcs", "gs"] except Exception: return False
def is_s3_url(url) -> bool: """Check for an s3, s3n, or s3a url""" try: return parse_url(url).scheme in ["s3", "s3n", "s3a"] except Exception: return False
def _is_url(url): try: return parse_url(url).scheme in _VALID_URLS except Exception: return False
def get_channel_data(client, channel_url): data = None retries = 2 for retry in list(range(retries))[::-1]: req = client.get(f"{channel_url}/about") source = req.text try: data = json.loads( source.split('var ytInitialData = ')[1].split(';</script>')[0]) except (KeyError, IndexError): if retry == 0: return False continue else: break handle = data["metadata"]["channelMetadataRenderer"][ "vanityChannelUrl"].split("/")[-1] tabs = [ x[list(x.keys())[0]] for x in data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"] ] about_tab = [x for x in tabs if x["title"].lower() == "about"][0] channel_details = about_tab["content"]["sectionListRenderer"]["contents"][ 0]["itemSectionRenderer"]["contents"][0][ "channelAboutFullMetadataRenderer"] out = { "name": None, "description": None, "channel_urls": [], "email_contact": False, "views": None, "joined_date": None, "primary_links": [], "country": None } out["name"] = data["metadata"]["channelMetadataRenderer"]["title"] out["channel_urls"].append( data["metadata"]["channelMetadataRenderer"]["channelUrl"]) out["channel_urls"].append(f"https://www.youtube.com/c/{handle}") out["channel_urls"].append(f"https://www.youtube.com/user/{handle}") out["email_contact"] = "businessEmailLabel" in channel_details out["description"] = channel_details["description"][ "simpleText"] if "description" in channel_details else None out["views"] = channel_details["viewCountText"]["simpleText"].split( " ")[0] if "viewCountText" in channel_details else None out["joined_date"] = channel_details["joinedDateText"]["runs"][1][ "text"] if "joinedDateText" in channel_details else None out["country"] = channel_details["country"][ "simpleText"] if "country" in channel_details else None if "primaryLinks" in channel_details: for primary_link in channel_details["primaryLinks"]: title = primary_link["title"]["simpleText"] url = parse_url(primary_link["navigationEndpoint"]["urlEndpoint"] ["url"].split("&q=")[-1]) out["primary_links"].append({"title": title, "url": url}) return out
def _is_s3_url(url): """Check for an s3 url""" try: return parse_url(url).scheme == 's3' except: return False
def _is_url(url): """Check to see if *url* has a valid protocol.""" try: return parse_url(url).scheme in _VALID_URLS except: return False
def _strip_schema(url): """Returns the url without the s3:// part""" result = parse_url(url) return result.netloc + result.path
def is_s3_url(url): """Check for an s3, s3n, or s3a url""" try: return parse_url(url).scheme in ['s3', 's3n', 's3a'] except Exception: return False
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ If the filepath_or_buffer is a url, translate and return the buffer passthru otherwise. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding, the compression """ if _is_url(filepath_or_buffer): req = _urlopen(str(filepath_or_buffer)) if compression == 'infer': content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': compression = 'gzip' else: compression = None # cat on the compression to the tuple returned by the function to_return = (list(maybe_read_encoded_stream(req, encoding, compression)) + [compression]) return tuple(to_return) if _is_s3_url(filepath_or_buffer): try: import boto except: raise ImportError("boto is required to handle s3 files") # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST # are environment variables parsed_url = parse_url(filepath_or_buffer) s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com') try: conn = boto.connect_s3(host=s3_host) except boto.exception.NoAuthHandlerFound: conn = boto.connect_s3(host=s3_host, anon=True) b = conn.get_bucket(parsed_url.netloc, validate=False) if compat.PY2 and (compression == 'gzip' or (compression == 'infer' and filepath_or_buffer.endswith(".gz"))): k = boto.s3.key.Key(b, parsed_url.path) filepath_or_buffer = BytesIO(k.get_contents_as_string( encoding=encoding)) else: k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding) k.open('r') # Expose read errors immediately filepath_or_buffer = k return filepath_or_buffer, None, compression # It is a pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) return _expand_user(filepath_or_buffer), None, compression
def _is_buffer_url(url): res = parse_url(url) return res.scheme == 'buffer'