Exemplo n.º 1
0
		def do_GET(self):
			request = parse_url(self.path).path.split('/')[1]
			query_vars = dict(parse_query(parse_url(self.path).query))

			try:
				if self.path not in response_cache:
					info_getter = getattr(info, 'get_' + request)
					response_cache[self.path] = info_getter(**query_vars)
				self.respond(response_cache[self.path])
			except AttributeError:
				self.not_found()
			except NotImplementedError:
				self.not_implemented()
Exemplo n.º 2
0
def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
    """
    If the filepath_or_buffer is a url, translate and return the buffer
    passthru otherwise.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath, or buffer
    encoding : the encoding to use to decode py3 bytes, default is 'utf-8'

    Returns
    -------
    a filepath_or_buffer, the encoding
    """

    if _is_url(filepath_or_buffer):
        req = _urlopen(str(filepath_or_buffer))
        return maybe_read_encoded_stream(req,encoding)

    if _is_s3_url(filepath_or_buffer):
        try:
            import boto
        except:
            raise ImportError("boto is required to handle s3 files")
        # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
        # are environment variables
        parsed_url = parse_url(filepath_or_buffer)
        conn = boto.connect_s3()
        b = conn.get_bucket(parsed_url.netloc)
        k = boto.s3.key.Key(b)
        k.key = parsed_url.path
        filepath_or_buffer = StringIO(k.get_contents_as_string())
        return filepath_or_buffer, None

    return filepath_or_buffer, None
Exemplo n.º 3
0
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                           compression=None):

    # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST
    # are environment variables
    parsed_url = parse_url(filepath_or_buffer)
    s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com')

    try:
        conn = boto.connect_s3(host=s3_host)
    except boto.exception.NoAuthHandlerFound:
        conn = boto.connect_s3(host=s3_host, anon=True)

    b = conn.get_bucket(parsed_url.netloc, validate=False)
    if compat.PY2 and (compression == 'gzip' or
                       (compression == 'infer' and
                        filepath_or_buffer.endswith(".gz"))):
        k = boto.s3.key.Key(b, parsed_url.path)
        filepath_or_buffer = BytesIO(k.get_contents_as_string(
            encoding=encoding))
    else:
        k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding)
        k.open('r')  # Expose read errors immediately
        filepath_or_buffer = k
    return filepath_or_buffer, None, compression
Exemplo n.º 4
0
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                           compression=None):
    """
    If the filepath_or_buffer is a url, translate and return the buffer
    passthru otherwise.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath, or buffer
    encoding : the encoding to use to decode py3 bytes, default is 'utf-8'

    Returns
    -------
    a filepath_or_buffer, the encoding, the compression
    """

    if _is_url(filepath_or_buffer):
        req = _urlopen(str(filepath_or_buffer))
        if compression == 'infer':
            content_encoding = req.headers.get('Content-Encoding', None)
            if content_encoding == 'gzip':
                compression = 'gzip'
            else:
                compression = None
        # cat on the compression to the tuple returned by the function
        to_return = list(maybe_read_encoded_stream(req, encoding, compression)) + \
                    [compression]
        return tuple(to_return)

    if _is_s3_url(filepath_or_buffer):
        try:
            import boto
        except:
            raise ImportError("boto is required to handle s3 files")
        # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
        # are environment variables
        parsed_url = parse_url(filepath_or_buffer)

        try:
            conn = boto.connect_s3()
        except boto.exception.NoAuthHandlerFound:
            conn = boto.connect_s3(anon=True)

        b = conn.get_bucket(parsed_url.netloc, validate=False)
        if compat.PY2 and (compression == 'gzip' or
                           (compression == 'infer' and
                            filepath_or_buffer.endswith(".gz"))):
            k = boto.s3.key.Key(b, parsed_url.path)
            filepath_or_buffer = BytesIO(k.get_contents_as_string(
                encoding=encoding))
        else:
            k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding)
            k.open('r')  # Expose read errors immediately
            filepath_or_buffer = k
        return filepath_or_buffer, None, compression

    return _expand_user(filepath_or_buffer), None, compression
Exemplo n.º 5
0
def _parse_host(host, port, resource):
    if not host.startswith('http'):
        host = 'http://' + host
    url_pack = parse_url(host)
    is_secure = url_pack.scheme == 'https'
    port = port or url_pack.port or (443 if is_secure else 80)
    base_url = '%s:%d%s/%s/%s' % (
        url_pack.hostname, port, url_pack.path, resource, PROTOCOL_VERSION)
    return is_secure, base_url
 def __init__(self, base_url, adapter_set="", user="", password=""):
     self._base_url = parse_url(base_url)
     self._adapter_set = adapter_set
     self._user = user
     self._password = password
     self._session = {}
     self._subscriptions = {}
     self._current_subscription_key = 0
     self._stream_connection = None
     self._stream_connection_thread = None
 def _set_control_link_url(self, custom_address=None):
     """Set the address to use for the Control Connection
     in such cases where Lightstreamer is behind a Load Balancer.
     """
     if custom_address is None:
         self._control_url = self._base_url
     else:
         parsed_custom_address = parse_url("//" + custom_address)
         self._control_url = parsed_custom_address._replace(
             scheme=self._base_url[0]
         )
Exemplo n.º 8
0
def _is_url(url):
    """Check to see if a URL has a valid protocol.

    Parameters
    ----------
    url : str or unicode

    Returns
    -------
    isurl : bool
        If `url` has a valid protocol return True otherwise False.
    """
    try:
        return parse_url(url).scheme in _VALID_URLS
    except Exception:
        return False
Exemplo n.º 9
0
    def _read_json(self, path_or_url, compressed=True, advanced_path=False):
        ''' Load JSON for a path. Allows remote files in addition to local ones. '''
        if parse_url(path_or_url).scheme in ['http', 'https']:
            try:
                req = _urlopen(path_or_url)
                filename_or_buffer = BytesIO(req.read())
            except HTTPError:
                logging.exception("HTTP Error accessing %s" % path_or_url)
                raise
            compressed = False
        else:
            filename_or_buffer = path_or_url
        
        try:
            if compressed:
                f = bz2.BZ2File(filename_or_buffer)
            else:
                if (type(filename_or_buffer) != BytesIO) and not isinstance(filename_or_buffer, StringIO):
                    f = codecs.open(filename_or_buffer, 'r+', encoding="utf-8")
                else:
                    f = filename_or_buffer
            rawjson = f.readline()
            f.close()
        except IOError:
            logging.exception("Can't read %s. Did you pass the incorrect "
                              "'compressed=' argument?", path_or_url)
            raise
        except:
            print(compressed, type(filename_or_buffer))
            logging.exception("Can't open %s", path_or_url)
            raise

        # This is a bandaid for schema version 2.0, not over-engineered
        # since upcoming releases of the extracted features
        # dataset won't keep the basic/advanced split

        try:
            # For Python3 compatibility, decode to str object
            if PY3 and (type(rawjson) != str):
                rawjson = rawjson.decode()
            volumejson = json.loads(rawjson)
        except:
            logging.exception("Problem reading JSON for %s. One common reason"
                              " for this error is an incorrect compressed= "
                              "argument", path_or_url)
            raise
        return volumejson
Exemplo n.º 10
0
Arquivo: common.py Projeto: esc/pandas
def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
    """
    If the filepath_or_buffer is a url, translate and return the buffer
    passthru otherwise.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath, or buffer
    encoding : the encoding to use to decode py3 bytes, default is 'utf-8'

    Returns
    -------
    a filepath_or_buffer, the encoding
    """

    if _is_url(filepath_or_buffer):
        req = _urlopen(str(filepath_or_buffer))
        if compat.PY3:  # pragma: no cover
            if encoding:
                errors = 'strict'
            else:
                errors = 'replace'
                encoding = 'utf-8'
            out = StringIO(req.read().decode(encoding, errors))
        else:
            encoding = None
            out = req
        return out, encoding

    if _is_s3_url(filepath_or_buffer):
        try:
            import boto
        except:
            raise ImportError("boto is required to handle s3 files")
        # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
        # are environment variables
        parsed_url = parse_url(filepath_or_buffer)
        conn = boto.connect_s3()
        b = conn.get_bucket(parsed_url.netloc)
        k = boto.s3.key.Key(b)
        k.key = parsed_url.path
        filepath_or_buffer = StringIO(k.get_contents_as_string())
        return filepath_or_buffer, None

    return filepath_or_buffer, None
Exemplo n.º 11
0
def is_gcs_url(url) -> bool:
    """Check for a gcs url"""
    if not isinstance(url, str):
        return False
    return parse_url(url).scheme in ["gcs", "gs"]
Exemplo n.º 12
0
def is_s3_url(url) -> bool:
    """Check for an s3, s3n, or s3a url"""
    if not isinstance(url, str):
        return False
    return parse_url(url).scheme in ["s3", "s3n", "s3a"]
Exemplo n.º 13
0
def _strip_schema(url):
    """Returns the url without the s3:// part"""
    result = parse_url(url, allow_fragments=False)
    return result.netloc + result.path
Exemplo n.º 14
0
def _is_s3_url(url):
    """Check for an s3, s3n, or s3a url"""
    try:
        return parse_url(url).scheme in ['s3', 's3n', 's3a']
    except:
        return False
Exemplo n.º 15
0
def _is_url(url):
    """Check to see if *url* has a valid protocol."""
    try:
        return parse_url(url).scheme in _VALID_URLS
    except Exception:
        return False
Exemplo n.º 16
0
def _is_buffer_url(url: str) -> bool:
    res = parse_url(url)
    return res.scheme == "buffer"
Exemplo n.º 17
0
def _strip_schema(url):
    """Returns the url without the s3:// part"""
    result = parse_url(url, allow_fragments=False)
    return result.netloc + result.path
Exemplo n.º 18
0
def is_gcs_url(url):
    """Check for a gcs url"""
    try:
        return parse_url(url).scheme in ['gcs', 'gs']
    except Exception:
        return False
Exemplo n.º 19
0
def is_gcs_url(url) -> bool:
    """Check for a gcs url"""
    try:
        return parse_url(url).scheme in ["gcs", "gs"]
    except Exception:
        return False
Exemplo n.º 20
0
def is_s3_url(url) -> bool:
    """Check for an s3, s3n, or s3a url"""
    try:
        return parse_url(url).scheme in ["s3", "s3n", "s3a"]
    except Exception:
        return False
Exemplo n.º 21
0
def _is_url(url):
    try:
        return parse_url(url).scheme in _VALID_URLS

    except Exception:
        return False
Exemplo n.º 22
0
def get_channel_data(client, channel_url):
    data = None

    retries = 2
    for retry in list(range(retries))[::-1]:
        req = client.get(f"{channel_url}/about")
        source = req.text
        try:
            data = json.loads(
                source.split('var ytInitialData = ')[1].split(';</script>')[0])
        except (KeyError, IndexError):
            if retry == 0:
                return False
            continue
        else:
            break

    handle = data["metadata"]["channelMetadataRenderer"][
        "vanityChannelUrl"].split("/")[-1]
    tabs = [
        x[list(x.keys())[0]]
        for x in data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
    ]
    about_tab = [x for x in tabs if x["title"].lower() == "about"][0]
    channel_details = about_tab["content"]["sectionListRenderer"]["contents"][
        0]["itemSectionRenderer"]["contents"][0][
            "channelAboutFullMetadataRenderer"]

    out = {
        "name": None,
        "description": None,
        "channel_urls": [],
        "email_contact": False,
        "views": None,
        "joined_date": None,
        "primary_links": [],
        "country": None
    }

    out["name"] = data["metadata"]["channelMetadataRenderer"]["title"]

    out["channel_urls"].append(
        data["metadata"]["channelMetadataRenderer"]["channelUrl"])
    out["channel_urls"].append(f"https://www.youtube.com/c/{handle}")
    out["channel_urls"].append(f"https://www.youtube.com/user/{handle}")

    out["email_contact"] = "businessEmailLabel" in channel_details

    out["description"] = channel_details["description"][
        "simpleText"] if "description" in channel_details else None
    out["views"] = channel_details["viewCountText"]["simpleText"].split(
        " ")[0] if "viewCountText" in channel_details else None
    out["joined_date"] = channel_details["joinedDateText"]["runs"][1][
        "text"] if "joinedDateText" in channel_details else None
    out["country"] = channel_details["country"][
        "simpleText"] if "country" in channel_details else None

    if "primaryLinks" in channel_details:
        for primary_link in channel_details["primaryLinks"]:
            title = primary_link["title"]["simpleText"]
            url = parse_url(primary_link["navigationEndpoint"]["urlEndpoint"]
                            ["url"].split("&q=")[-1])
            out["primary_links"].append({"title": title, "url": url})

    return out
Exemplo n.º 23
0
def _is_s3_url(url):
    """Check for an s3 url"""
    try:
        return parse_url(url).scheme == 's3'
    except:
        return False
Exemplo n.º 24
0
def _is_url(url):
    """Check to see if *url* has a valid protocol."""
    try:
        return parse_url(url).scheme in _VALID_URLS
    except:
        return False
Exemplo n.º 25
0
def is_gcs_url(url):
    """Check for a gcs url"""
    try:
        return parse_url(url).scheme in ['gcs', 'gs']
    except Exception:
        return False
Exemplo n.º 26
0
def _strip_schema(url):
    """Returns the url without the s3:// part"""
    result = parse_url(url)
    return result.netloc + result.path
Exemplo n.º 27
0
def _strip_schema(url):
    """Returns the url without the s3:// part"""
    result = parse_url(url)
    return result.netloc + result.path
Exemplo n.º 28
0
def is_s3_url(url):
    """Check for an s3, s3n, or s3a url"""
    try:
        return parse_url(url).scheme in ['s3', 's3n', 's3a']
    except Exception:
        return False
Exemplo n.º 29
0
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                           compression=None):
    """
    If the filepath_or_buffer is a url, translate and return the buffer
    passthru otherwise.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
                         or buffer
    encoding : the encoding to use to decode py3 bytes, default is 'utf-8'

    Returns
    -------
    a filepath_or_buffer, the encoding, the compression
    """

    if _is_url(filepath_or_buffer):
        req = _urlopen(str(filepath_or_buffer))
        if compression == 'infer':
            content_encoding = req.headers.get('Content-Encoding', None)
            if content_encoding == 'gzip':
                compression = 'gzip'
            else:
                compression = None
        # cat on the compression to the tuple returned by the function
        to_return = (list(maybe_read_encoded_stream(req, encoding,
                                                    compression)) +
                     [compression])
        return tuple(to_return)

    if _is_s3_url(filepath_or_buffer):
        try:
            import boto
        except:
            raise ImportError("boto is required to handle s3 files")
        # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST
        # are environment variables
        parsed_url = parse_url(filepath_or_buffer)
        s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com')

        try:
            conn = boto.connect_s3(host=s3_host)
        except boto.exception.NoAuthHandlerFound:
            conn = boto.connect_s3(host=s3_host, anon=True)

        b = conn.get_bucket(parsed_url.netloc, validate=False)
        if compat.PY2 and (compression == 'gzip' or
                           (compression == 'infer' and
                            filepath_or_buffer.endswith(".gz"))):
            k = boto.s3.key.Key(b, parsed_url.path)
            filepath_or_buffer = BytesIO(k.get_contents_as_string(
                encoding=encoding))
        else:
            k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding)
            k.open('r')  # Expose read errors immediately
            filepath_or_buffer = k
        return filepath_or_buffer, None, compression

    # It is a pathlib.Path/py.path.local or string
    filepath_or_buffer = _stringify_path(filepath_or_buffer)
    return _expand_user(filepath_or_buffer), None, compression
Exemplo n.º 30
0
def _is_s3_url(url):
    """Check for an s3 url"""
    try:
        return parse_url(url).scheme == 's3'
    except:
        return False
Exemplo n.º 31
0
def _is_buffer_url(url):
    res = parse_url(url)
    return res.scheme == 'buffer'