Exemplo n.º 1
0
def get_http_data(url,
                  header=None,
                  data=None,
                  useragent=FIREFOX_UA,
                  referer=None,
                  cookiejar=None):
    """ Get the page to parse it for streams """
    if not cookiejar:
        cookiejar = CookieJar()

    log.debug("HTTP getting %r", url)
    starttime = time.time()

    request = Request(url)
    standard_header = {'Referer': referer, 'User-Agent': useragent}
    for key, value in [head for head in standard_header.items() if head[1]]:
        request.add_header(key, value)
    if header:
        for key, value in [head for head in header.items() if head[1]]:
            request.add_header(key, value)
    if data:
        request.add_data(data)

    opener = build_opener(HTTPCookieProcessor(cookiejar))

    try:
        response = opener.open(request)
    except HTTPError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.code)
        sys.exit(5)
    except URLError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.reason)
        sys.exit(5)
    except ValueError as e:
        log.error("Try adding http:// before the url")
        sys.exit(5)
    if is_py3:
        data = response.read()
        try:
            data = data.decode("utf-8")
        except UnicodeDecodeError:
            pass
    else:
        try:
            data = response.read()
        except socket.error as e:
            log.error("Lost the connection to the server")
            sys.exit(5)
    response.close()

    spent_time = time.time() - starttime
    bps = 8 * len(data) / max(spent_time, 0.001)

    log.debug("HTTP got %d bytes from %r in %.2fs (= %dbps)", len(data), url,
              spent_time, bps)

    return data
Exemplo n.º 2
0
def check_redirect(url):
    opener = build_opener(NoRedirectHandler())
    opener.addheaders += [('User-Agent', FIREFOX_UA)]
    response = opener.open(url)
    if response.code in (300, 301, 302, 303, 307):
        return response.headers["location"]
    else:
        return url
Exemplo n.º 3
0
def check_redirect(url):
    opener = build_opener(NoRedirectHandler())
    opener.addheaders += [('User-Agent', FIREFOX_UA)]
    response = opener.open(url)
    if response.code in (300, 301, 302, 303, 307):
        return response.headers["location"]
    else:
        return url
Exemplo n.º 4
0
def get_http_data(url, header=None, post=None, useragent=FIREFOX_UA,
                  referer=None, cookiejar=None):
    """ Get the page to parse it for streams """
    if not cookiejar:
        cookiejar = CookieJar()

    if url.find("manifest.f4m") > 0:
        parse = urlparse(url)
        url = "%s://%s%s?%s&hdcore=3.3.0" % (parse.scheme, parse.netloc, parse.path, parse.query)

    log.debug("HTTP getting %r", url)
    starttime = time.time()
    error = None
    if post:
        if is_py3:
            post = bytes(post, encoding="utf-8")
        request = Request(url, data=post)
    else:
        request = Request(url)
    standard_header = {'Referer': referer, 'User-Agent': useragent}
    for key, value in [head for head in standard_header.items() if head[1]]:
        request.add_header(key, value)
    if header:
        for key, value in [head for head in header.items() if head[1]]:
            request.add_header(key, value)

    opener = build_opener(HTTPCookieProcessor(cookiejar))

    try:
        response = opener.open(request)
    except HTTPError as e:
        error = True
        data = e.read()
        return error, data

    if is_py3:
        data = response.read()
        try:
            data = data.decode("utf-8")
        except UnicodeDecodeError:
            pass
    else:
        try:
            data = response.read()
        except socket.error as e:
            return True, "Lost the connection to the server"
    response.close()

    spent_time = time.time() - starttime
    bps = 8 * len(data) / max(spent_time, 0.001)

    log.debug("HTTP got %d bytes from %r in %.2fs (= %dbps)",
              len(data), url, spent_time, bps)

    return error, data
Exemplo n.º 5
0
def get_http_data(url, header=None, data=None, useragent=FIREFOX_UA,
                  referer=None, cookiejar=None):
    """ Get the page to parse it for streams """
    if not cookiejar:
        cookiejar = CookieJar()

    log.debug("HTTP getting %r", url)
    starttime = time.time()

    request = Request(url)
    standard_header = {'Referer': referer, 'User-Agent': useragent}
    for key, value in [head for head in standard_header.items() if head[1]]:
        request.add_header(key, value)
    if header:
        for key, value in [head for head in header.items() if head[1]]:
            request.add_header(key, value)
    if data:
        request.add_data(data)

    opener = build_opener(HTTPCookieProcessor(cookiejar))

    try:
        response = opener.open(request)
    except HTTPError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.code)
        sys.exit(5)
    except URLError as e:
        log.error("Something wrong with that url")
        log.error("Error code: %s", e.reason)
        sys.exit(5)
    except ValueError as e:
        log.error("Try adding http:// before the url")
        sys.exit(5)
    if is_py3:
        data = response.read()
        try:
            data = data.decode("utf-8")
        except UnicodeDecodeError:
            pass
    else:
        try:
            data = response.read()
        except socket.error as e:
            log.error("Lost the connection to the server")
            sys.exit(5)
    response.close()

    spent_time = time.time() - starttime
    bps = 8 * len(data) / max(spent_time, 0.001)

    log.debug("HTTP got %d bytes from %r in %.2fs (= %dbps)",
              len(data), url, spent_time, bps)

    return data