Пример #1
0
def get_client_id():
    response = http_get("https://soundcloud.com")
    rx_namespace = {"re": "http://exslt.org/regular-expressions"}

    if response.ok:
        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]",
                                 namespaces=rx_namespace)
        app_js_urls = [
            script_tag.get('src') for script_tag in script_tags
            if script_tag is not None
        ]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = re.search(r'client_id:"([^"]*)"', response.content,
                                 re.M | re.I)
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning(
        "Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Пример #2
0
def init(engine_settings=None):
    global cookies
    # initial cookies
    resp = http_get(url, allow_redirects=False)
    if resp.ok:
        for r in resp.history:
            cookies.update(r.cookies)
        cookies.update(resp.cookies)
Пример #3
0
def obtain_token():
    update_time = time() - (time() % 3600)
    try:
        token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
        token['value'] = loads(token_response.text)['code']
        token['last_updated'] = update_time
    except:
        pass
    return token
Пример #4
0
def obtain_token():
    update_time = time() - (time() % 3600)
    try:
        token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
        token['value'] = loads(token_response.text)['code']
        token['last_updated'] = update_time
    except:
        pass
    return token
Пример #5
0
def get_client_id():
    response = http_get("https://soundcloud.com")

    if response.ok:
        tree = html.fromstring(response.content)
        script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
        app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = cid_re.search(response.text)
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Пример #6
0
def get_client_id():
    response = http_get("https://soundcloud.com")
    rx_namespace = {"re": "http://exslt.org/regular-expressions"}

    if response.ok:
        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
        app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Пример #7
0
def get_client_id():
    response = http_get("https://soundcloud.com")

    if response.ok:
        tree = html.fromstring(response.content)
        # script_tags has been moved from /assets/app/ to /assets/ path.  I
        # found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js
        script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
        app_js_urls = [
            script_tag.get('src') for script_tag in script_tags
            if script_tag is not None
        ]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = cid_re.search(response.content.decode("utf-8"))
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning(
        "Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Пример #8
0
def image_proxy():
    url = request.args.get('url')

    if not url:
        return '', 400

    h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest()

    if h != request.args.get('h'):
        return '', 400

    headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
    headers['User-Agent'] = gen_useragent()

    resp = http_get(url,
                    stream=True,
                    timeout=settings['server'].get('request_timeout', 2),
                    headers=headers)

    if resp.status_code == 304:
        return '', resp.status_code

    if resp.status_code != 200:
        logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
        if resp.status_code >= 400:
            return '', resp.status_code
        return '', 400

    if not resp.headers.get('content-type', '').startswith('image/'):
        logger.debug('image-proxy: wrong content-type: {0}'.format(resp.get('content-type')))
        return '', 400

    img = ''
    chunk_counter = 0

    for chunk in resp.iter_content(1024*1024):
        chunk_counter += 1
        if chunk_counter > 5:
            return '', 502  # Bad gateway - file is too big (>5M)
        img += chunk

    headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})

    return Response(img, mimetype=resp.headers['content-type'], headers=headers)
Пример #9
0
def get(*args, **kwargs):
    if 'timeout' not in kwargs:
        kwargs['timeout'] = settings['outgoing']['request_timeout']

    return http_get(*args, **kwargs)
Пример #10
0
def get(*args, **kwargs):
    if 'timeout' not in kwargs:
        kwargs['timeout'] = settings['outgoing']['request_timeout']

    return http_get(*args, **kwargs)
Пример #11
0
def get(*args, **kwargs):
    if 'timeout' not in kwargs:
        kwargs['timeout'] = settings['outgoing']['request_timeout']
    kwargs['raise_for_httperror'] = True
    return http_get(*args, **kwargs)