예제 #1
0
def get_soup(source: str,
             headers: dict = None,
             verify: bool = True,
             from_encoding: str = None,
             timeout=20) -> BeautifulSoup:
    """Get soup from website.

    Args:
        source (str): Website url.
        headers (dict, optional): Headers to be used for request. Defaults to general one.
        verify (bool, optional): Verify source URL. Defaults to True.
        from_encoding (str, optional): Encoding to use. Defaults to None.
        timeout (int, optional): If no response is received after `timeout` seconds, exception is raied. 
                                 Defaults to 20.
    Returns:
        BeautifulSoup: Website soup.
    """
    if headers is None:
        headers = get_headers()
    try:
        response = requests.get(source,
                                headers=headers,
                                verify=verify,
                                timeout=timeout)
    except Exception as err:
        raise err
    if not response.ok:
        raise HTTPError("Web {} not found! {response.content}")
    content = response.content
    return BeautifulSoup(content, "html.parser", from_encoding=from_encoding)
예제 #2
0
    def test_application_error(self):
        '''
        Facebook errors often look like 500s
        Its a silly system, but we need to support it
        This is actually an application error

        '''
        from io import StringIO
        graph = self.guy.graph()

        with mock.patch('urllib2.build_opener') as patched:
            from urllib.error import HTTPError

            opener = mock.MagicMock()
            response = StringIO('''{
              "error": {
                "type": "OAuthException",
                "message": "Error validating access token: USER_ID has not authorized application APP_ID"
              }
            }''')
            opener.open.side_effect = HTTPError(
                'bla', 500, 'bla', 'bla', response)

            patched.return_value = opener

            def make_request():
                graph.get('me')

            self.assertRaises(facebook_exceptions.OAuthException, make_request)
예제 #3
0
    def _http_request(url):
        ''' Perform an HTTP request and return request '''
        log('Request URL: {url}', url=url)
        filename = url.split('/')[-1]

        for retry in (False, True):
            try:
                req = urlopen(url)
                log('Response code: {code}', code=req.getcode())
                if 400 <= req.getcode() < 600:
                    raise HTTPError('HTTP %s Error for url: %s' %
                                    (req.getcode(), url),
                                    response=req)
                break
            except HTTPError:
                Dialog().ok(localize(30004), localize(
                    30013, filename=filename))  # Failed to retrieve file
                return None
            except BadStatusLine:
                if retry:
                    Dialog().ok(
                        localize(30004),
                        localize(30013,
                                 filename=filename))  # Failed to retrieve file
                    return None
        return req
    def _http_request(url):
        """Perform an HTTP request and return request"""

        try:  # Python 3
            from urllib.error import HTTPError
            from urllib.request import urlopen
        except ImportError:  # Python 2
            from urllib2 import HTTPError, urlopen

        log('Request URL: {url}', url=url)
        filename = url.split('/')[-1]

        try:
            req = urlopen(url, timeout=5)
            log('Response code: {code}', code=req.getcode())
            if 400 <= req.getcode() < 600:
                raise HTTPError('HTTP %s Error for url: %s' %
                                (req.getcode(), url),
                                response=req)
        except HTTPError:
            ok_dialog(localize(30004),
                      localize(30013,
                               filename=filename))  # Failed to retrieve file
            return None
        return req
예제 #5
0
def mock_raise_httperror(*args, **kwargs):
    print("mock_raise_httperror\nargs: %s\nkwargs: %s" % (args, kwargs))
    raise HTTPError(url="http://fake.come",
                    code=101,
                    msg="Fake HTTPError from Mock",
                    hdrs="Foo",
                    fp=StringIO("Bar"))
예제 #6
0
 def side_effect(*args, **kwargs):
     response = StringIO('''
     <title>Facebook | Error</title>
     Sorry, something went wrong.
     ''')
     http_exception = HTTPError('bla', 505, 'bla', 'bla', response)
     raise http_exception
예제 #7
0
def get_url(url, user=None, password=None):
    """
    Fetch a url and return the content as a byte array
    :param url: the url to go and fetch
    :param user: optional http username to apply to the request
    :param password: optional http password to apply to the request
    :return: byte array containing the file.
    """
    retry = 0
    max_retry = int(os.getenv("DOWNLOAD_RETRY", "3"))
    min_delay = int(os.getenv("DOWNLOAD_MIN_WAIT", "60"))
    max_delay = int(os.getenv("DOWNLOAD_MAX_WAIT", "6000"))

    while retry < max_retry:
        retry += 1
        r = requests.get(url, auth=(user, password))
        if not r.ok:
            if r.status_code == 429:
                delay = randint(min_delay, max_delay)
                logging.error(
                    f"Too many requests. {r.status_code} {r.content.decode('utf-8')}"
                )
                logging.info(f"sleeping for {delay} seconds")
                sleep(delay)
                logging.info("trying again...")
            else:
                logging.error(
                    f"could not make request {r.status_code} {r.content.decode('utf-8')}"
                )
                raise HTTPError(f"could not make request {r.status_code}")
        else:
            return r
예제 #8
0
def mock_raise_503_httperror(*args, **kwargs):
    print("mock_raise_httperror\nargs: %s\nkwargs: %s" % (args, kwargs))
    raise HTTPError(url="http://fake.come",
                    code=503,
                    msg="Service unavailable",
                    hdrs="Foo",
                    fp=StringIO("Bar"))
예제 #9
0
def test_client_parse_error_file():
    dbbuddy = Db.DbBuddy()
    client = Db.GenericClient(dbbuddy)

    assert not client.parse_error_file()
    assert not dbbuddy.failures
    client.http_errors_file.write("Casp9\n%s\n//\n" % HTTPError(
        "101", "Fake HTTPError from Mock", "Foo", "Bar", StringIO("Baz")))
    client.http_errors_file.write("Inx1\n%s\n//\n" %
                                  URLError("Fake URLError from Mock"))

    assert client.parse_error_file() == '''Casp9
HTTP Error Fake HTTPError from Mock: Foo

Inx1
<urlopen error Fake URLError from Mock>

'''
    assert len(dbbuddy.failures) == 2

    # Repeat to make sure that the same error is not added again
    client.http_errors_file.write("Inx1\n%s\n//\n" %
                                  URLError("Fake URLError from Mock"))
    assert not client.parse_error_file()
    assert len(dbbuddy.failures) == 2
예제 #10
0
def import_source_locations(
    vial_http: urllib3.connectionpool.ConnectionPool,
    import_run_id: str,
    import_locations: Iterable[load.ImportSourceLocation],
    import_batch_size: int = 500,
) -> None:
    """Import source locations"""
    for import_locations_batch in misc.batch(import_locations,
                                             import_batch_size):
        encoded_ndjson = "\n".join(
            [loc.json(exclude_none=True) for loc in import_locations_batch])

        rsp = vial_http.request(
            "POST",
            f"/api/importSourceLocations?import_run_id={import_run_id}",
            headers={
                **vial_http.headers, "Content-Type": "application/x-ndjson"
            },
            body=encoded_ndjson.encode("utf-8"),
        )

        if rsp.status != 200:
            raise HTTPError(
                f"/api/importSourceLocations?import_run_id={import_run_id}",
                rsp.status,
                rsp.data[:100],
                dict(rsp.headers),
                None,
            )
예제 #11
0
    def test_response_code(self, mocker, test_image_url, code, reason):
        side_effect = HTTPError(test_image_url, code, reason, {}, None)
        mocker.patch(make_mock_target("misc", "urlopen"),
                     side_effect=side_effect)

        with pytest.raises(RuntimeError):
            misc.download_file(test_image_url)
예제 #12
0
def check_response(response):
    res_json = xmltodict.parse(response.text)
    if response.status_code != 200:
        raise HTTPError(response.url, response.status_code,
                        res_json['ResponseStatus']['subStatusCode'], None,
                        None)
    return res_json
예제 #13
0
def download_from_url(url, dest_file):
    """
    Attempt to download file specified by url to 'dest_file'

    Raises:

        WrongFileTypeException

            when content-type is not in the supported types or cannot
            be derived from the URL

        FileExceptionsException

            If the filename (derived from the URL) already exists in
            the destination directory.

        HTTPError

            ...
    """
    # Don't download files multiple times!
    if pathexists(dest_file):
        raise FileExistsException('%s already downloaded.' %
                                  dest_file.split('/')[-1])

    response = request(url)
    info = response.info()
    actual_url = response.url
    if actual_url == 'http://i.imgur.com/removed.png':
        raise HTTPError(actual_url, 404,
                        "Imgur suggests the image was removed", None, None)

    # Work out file type either from the response or the url.
    if 'content-type' in list(info.keys()):
        filetype = info['content-type']
    elif url.endswith('.jpg') or url.endswith('.jpeg'):
        filetype = 'image/jpeg'
    elif url.endswith('.png'):
        filetype = 'image/png'
    elif url.endswith('.gif'):
        filetype = 'image/gif'
    elif url.endswith('.mp4'):
        filetype = 'video/mp4'
    elif url.endswith('.webm'):
        filetype = 'video/webm'
    else:
        filetype = 'unknown'

    # Only try to download acceptable image types
    if filetype not in [
            'image/jpeg', 'image/png', 'image/gif', 'video/webm', 'video/mp4'
    ]:
        raise WrongFileTypeException('WRONG FILE TYPE: %s has type: %s!' %
                                     (url, filetype))

    filedata = response.read()
    filehandle = open(dest_file, 'wb')
    filehandle.write(filedata)
    filehandle.close()
예제 #14
0
def test_handler_handles_http_error(mocker, event):
    """Ensures any HTTP client errors get handled"""
    mocker.patch(
        "service.urlopen",
        side_effect=HTTPError("http://example.com", 500, "Internal Error", {},
                              None),
    )
    assert handler(event, None) is False
예제 #15
0
def test_http_error():
    with patch("urllib.request.urlopen") as mock:
        mock.return_value.__enter__.return_value.read.side_effect = HTTPError(
            "https://itunes.apple.com/search", 503, "Service Unavailable", {},
            None)
        with pytest.raises(Exception) as exc:
            http.get("https://itunes.apple.com/search", {"term": "python"})
        assert str(exc.value) == "HTTP error 503: Service Unavailable"
예제 #16
0
async def raise_bytes_exception(scope: Scope, _info: Info,
                                _matches: RouteMatches,
                                _content: Content) -> HttpResponse:
    """A request handler which raises an exception with bytes content"""
    raise HTTPError(make_url(scope), 401, b'Unauthorized - bytes',
                    [(b'content-type', b'text/plain')], None)
    # pylint: disable=unreachable
    return 204
예제 #17
0
 def test_401(self):
     obj = BlazeMeterUploader()
     obj.engine = EngineEmul()
     mock = BZMock(obj._user)
     mock.mock_get.update({
         'https://a.blazemeter.com/api/v4/web/version': HTTPError(None, None, None, None, None, ),
     })
     self.assertRaises(HTTPError, obj.prepare)
예제 #18
0
def check_response(response):
    if response.status_code != 200:
        raise HTTPError(response.url,
                        response.status_code,
                        msg=response.text,
                        hdrs=response.headers,
                        fp="")
    return True
 def test_git_get_tag_release_description_fails_must_raise_http_error(self,
                                                                      mock_git_get_tag_release_description,
                                                                      mock_git_create_merge_request,
                                                                      mock_git_accept_merge_request):
     mock_git_get_tag_release_description.side_effect = HTTPError('url', 'cde', 'msg', 'hdrs', 'fp')
     with self.assertRaises(HTTPError):
         create_auto_merge_request('gitlab_endpoint', 'gitlab_token', 'project_id', 'source_branch', 'target_branch',
                                   ['user1'], 'tag_name')
    def test_404(self):
        with mock.patch('pandas.read_html') as readmock:
            readmock.side_effect = HTTPError('http://example.org', 404,
                                             "fake error message", None, None)
            wf_module = fetch(url='http://example.org')

        result = render(wf_module)
        self.assertEqual(result, ProcessResult(error='Page not found (404)'))
예제 #21
0
def test_remote_vocab_error(MockGraph):
    mock_graph = Mock()
    mock_graph.parse.side_effect = HTTPError('http://example.org/foo/', 503,
                                             '', {}, None)
    MockGraph.return_value = mock_graph
    # failure to retrieve the vocabulary over HTTP should
    # raise a ValidationError
    pytest.raises(ValidationError, get_vocabulary, 'http://example.org/foo/')
예제 #22
0
def test_scrape_raises_http_error():
    with mock.patch('scraper.urlopen') as urlopen_mock:
        urlopen_mock.side_effect = HTTPError('http://example.org', 404,
                                             'Not found', {}, mock.Mock())
        with pytest.raises(HTTPError) as exc:
            scraper.scrape('http://example.org')
        assert exc.value.code == 404
        assert exc.value.msg == 'Not found'
예제 #23
0
 def raise_for_status(self) -> None:
     """Raises :class:`HTTPError`, if one occurred."""
     if 400 <= self.status_code:
         from urllib.error import HTTPError
         reason_text = self._get_reason_text()
         http_error_msg = f'{self.status_code} {"Client" if self.status_code < 500 else "Server"} Error: {reason_text} for url: {self.url}'
         raise HTTPError(self.url, self.status_code, http_error_msg,
                         self.headers, None)
예제 #24
0
    def test_api_req_error(self):
        self.mocks['api_req'].side_effect = HTTPError(None, 403, None, None,
                                                      None)
        with pytest.raises(HTTPError):
            self.set_assignee()

        self.assert_api_req_call()
        self.mocks['post_comment'].assert_not_called()
예제 #25
0
    def test_get_version_from_bad_url_open(self, urlopen_mock):
        urlopen_mock.side_effect = HTTPError(None, None, None, None, None)
        base_image = vmimage.ImageProviderBase(version="[0-9]+", build=None, arch=None)

        with self.assertRaises(vmimage.ImageProviderError) as exc:
            base_image.get_version()

        self.assertIn("Cannot open", exc.exception.args[0])
예제 #26
0
def test_value_error_raise_http_error(monkeypatch):
    monkeypatch.setattr(
        urllib.request,
        "urlopen",
        MagicMock(side_effect=HTTPError(404, "NF", "", "", "")),
    )
    with pytest.raises(ValueError, match="Unreachable https://NF"):
        count_dots_on_i("https://NF")
예제 #27
0
 def test_reload_unexpected_url(self):
     """
     If the error URL is not the root, the error is propagated.
     """
     error = HTTPError(self.api.url, 503, "Service Unavailable", {}, None)
     error.url = "/foo"
     self.fakes.jenkins.responses[urljoin(self.api.url, "reload")] = error
     self.assertRaises(HTTPError, self.api.reload)
예제 #28
0
def login(request, next_page=None, required=False):
    """Forwards to CAS login URL or verifies CAS ticket

    Modified locally: honour next=??? in query string, don't deliver a message, catch HTTPError and IOError,
    generate LogEntry
    """
    if not next_page and 'next' in request.GET:
        next_page = request.GET['next']
    if not next_page:
        next_page = _redirect_url(request)
    if request.user.is_authenticated:
        #message = "You are logged in as %s." % request.user.username
        #messages.success(request, message)
        return HttpResponseRedirect(next_page)
    ticket = request.GET.get('ticket')
    service = _service_url(request, next_page)
    if ticket:
        from django.contrib import auth
        try:
            user = auth.authenticate(ticket=ticket,
                                     service=service,
                                     request=request)
        except IOError as e:
            # Here we want to catch only: connection reset, timeouts, name or service unknown
            if e.errno in [104, 110, 'socket error']:
                user = None
            # HTTPError is a type of OSError, which IOError is an alias for.
            # Sometimes, the CAS server seems to just return a 500 internal server error.  Let's handle that the
            # same way as the above case.
            elif isinstance(e, HTTPError):
                if e.code == 500:
                    user = None
                else:
                    # Any other HTTPError should bubble up and let us know something horrible has happened.
                    raise HTTPError(
                        "Got an HTTP Error when authenticating. The error is: {0!s}."
                        .format(e))
            else:
                raise IOError("The errno is %r: %s." % (e.errno, str(e)))
        except ParseError:
            user = None

        if user is not None:
            auth.login(request, user)
            #LOG EVENT#
            l = LogEntry(userid=user.username,
                         description=("logged in as %s from %s") %
                         (user.username, ip.get_ip(request)),
                         related_object=user)
            l.save()
            return HttpResponseRedirect(next_page)
        elif settings.CAS_RETRY_LOGIN or required:
            return HttpResponseRedirect(_login_url(service))
        else:
            error = "<h1>Forbidden</h1><p>Login failed.</p>"
            return HttpResponseForbidden(error)
    else:
        return HttpResponseRedirect(_login_url(service))
예제 #29
0
    def __init__(self, geokey, geoauth, loc, code, searchtype='R', mset=EH_STANDARD, \
      nset=EN_STANDARD, fuzzy=FUZ_STANDARD, max_return=None):
        self.location = loc

        if type(code) != str:
            raise TypeError(typerr_msg)
        if searchtype not in SEARCH_TYPES:
            raise GeoError(searchtype)

        extra_args = {'fuzzy': fuzzy} if searchtype == 'RF' else {}
        rows = max_return if max_return is not None else 1

        if searchtype in ['E', 'EH', 'EN', 'EH EN', 'EN EH']:
            extra_args['name_equals'] = loc
            if max_return is None:
                rows = 10

        search_res = geonames(loc,
                              key=geokey,
                              auth=geoauth,
                              country=code,
                              maxRows=rows,
                              **extra_args)

        if search_res.status_code == 200:
            if search_res:
                if searchtype in ['R', 'RF', 'E']:
                    self.result = search_res[0]
                    return

                if searchtype in ['EH EN', 'EH', 'RIH']:
                    fcl = mset
                else:
                    fcl = nset

                if searchtype in ['RIH', 'RIN']:
                    if search_res[0].feature_class in fcl and loc in search_res[
                            0].address:
                        self.result = search_res[0]
                    else:
                        self.result = None
                    return

                for res in search_res:
                    if res.feature_class in fcl:
                        self.result = res
                        return

                if searchtype in ['EH EN', 'EN EH']:
                    self.result = search_res[0]
                else:
                    self.result = None
            else:
                self.result = None
        else:
            raise HTTPError(search_res.url, search_res.status_code,
                            'GeoNames request failed', search_res.headers,
                            None)
예제 #30
0
 def size(self, name):
     conn = self._get_connection()
     conn.request('HEAD', self._location + self._get_name(name))
     res = conn.getresponse()
     conn.close()
     if res.status != 200:
         raise HTTPError(self._location + name, res.status, res.reason,
                         res.msg, res.fp)
     return res.getheader('Content-Length')