def test_request_api_for_authentication(context): "SleepyHollow supports requests-based authentication" sl = SleepyHollow() response = sl.get(context.route_to("/auth/simple"), auth=('lincoln', 'gabriel')) response.status_code.should.equal(200) expect('Very Simple').to.be.within(response.text)
def test_js_confirms_doesnt_disrupt(context): "SleepyHollow will not block sleepy hollow" sl = SleepyHollow() response = sl.get(context.route_to("/jsconfirm")) response.status_code.should.equal(200) expect("Confirmation dialogs don't block").to.be.within(response.html)
def test_requested_resources(context): "response object should contain the url of all subrequests" sl = SleepyHollow() response = sl.get(context.route_to('/fewresources')) response.status_code.should.equal(200) response.should.have.property('requested_resources').being.a(list) response.requested_resources.should.have.length_of(5) sorted(response.requested_resources).should.equal(sorted([ { 'status': 200, 'url': u'http://127.0.0.1:5000/fewresources' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/js/jquery-1.8.3.min.js' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/js/fewresources.js' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/js/fewresources.js' }, { 'status': 200, 'url': u'http://127.0.0.1:5000/media/img/funny.gif' } ]))
def test_patch_querystring(): (u"SleepyHollow._patch_querystring should generate a correct query string") sl = SleepyHollow() url, params = sl._patch_querystring("http://foo.com/?wee=waa&foo=bar", {"name": "Lincoln"}) expect(url).to.equal("http://foo.com/?foo=bar&name=Lincoln&wee=waa") expect(params).to.equal({"wee": ["waa"], "foo": ["bar"], "name": "Lincoln"})
def test_config_stuff(context): "The config dictionary should be forwarded to the C layer" sl = SleepyHollow() response = sl.get(context.route_to('/simple'), config={'screenshot': True}) response.screenshot_bytes.shouldnt.be.empty
def test_save_screenshot(context): "The save_screenshot method should complain if screenshot is not enabled" sl = SleepyHollow() response = sl.get(context.route_to('/simple'), config={'screenshot': False}) response.save_screenshot.when.called_with('stuff.png').should.throw( ValueError, "Screenshot should be enabled throught the config dict" )
def test_follows_meta_redirect(context): "SleepyHollow will follow meta redirects" sl = SleepyHollow() response = sl.get(context.route_to("/metaredirect")) response.status_code.should.equal(200) expect("Successfully redirected!").to.be.within(response.html) response.url.should.equal('http://localhost:5000/postredirect')
def test_request_api(context): "the get method should return exactly the same thing of request(get)" sl = SleepyHollow() response1 = sl.request('get', context.route_to("/simple")) response2 = sl.get(context.route_to("/simple")) response1.status_code.should.equal(response2.status_code) response1.reason.should.equal(response2.reason) response1.text.should.equal(response2.text) response1.content.should.equal(response2.content)
def test_can_authenticate_in_cookie_based_websites(context): "Sleepy Hollow can keep the session in cookie based websites" sl = SleepyHollow() response1 = sl.get(context.route_to('/admin')) response1.url.should.equal(u'http://127.0.0.1:5000/login') response1.status_code.should.equal(200) response2 = sl.post(context.route_to('/login'), {'email': '*****@*****.**'}) response2.url.should.equal(u'http://127.0.0.1:5000/admin') response2.status_code.should.equal(302) expect("Hello lincoln, welcome to the admin").to.be.within(response2.text)
def test_error_handling(context): "SleepyHollow#evaluate_javascript handles errors" sl = SleepyHollow() script1 = r'''(function(){ return foo; })()''' expect(sl.evaluate_javascript).when.called_with(script1, pedantic=True).to.throw( InvalidJSONError, "'ReferenceError: Can't find variable: foo' undefined:2" ) sl.evaluate_javascript("'SUCCESS!'").should.equal("SUCCESS!")
def test_error_handling(context): "SleepyHollow#evaluate_javascript handles errors" sl = SleepyHollow() script1 = r'''(function(){ return foo; })()''' expect(sl.evaluate_javascript).when.called_with( script1, pedantic=True).to.throw( InvalidJSONError, "'ReferenceError: Can't find variable: foo' undefined:2") sl.evaluate_javascript("'SUCCESS!'").should.equal("SUCCESS!")
def test_patch_querystring(): (u"SleepyHollow._patch_querystring should generate a correct query string") sl = SleepyHollow() url, params = sl._patch_querystring('http://foo.com/?wee=waa&foo=bar', {'name': 'Lincoln'}) expect(url).to.equal('http://foo.com/?foo=bar&name=Lincoln&wee=waa') expect(params).to.equal({ 'wee': ['waa'], 'foo': ['bar'], 'name': 'Lincoln', })
def test_response_headers(context): "It should be possible to inspect the headers of a response object" sl = SleepyHollow() response = sl.get(context.route_to('/status-200')) response.should.have.property('headers').being.a(dict) response.headers.should.have.key('Content-Type').being.equal( u'text/html; charset=UTF-8') response.headers.should.have.key('Server').being.equal( u'TornadoServer/2.4.1') response.headers.should.have.key('Content-Length').being.equal(u'91') response.headers.should.have.key('Etag').being.equal( u'"917c97d9437cbd1c1192f2f516e7155183b58232"')
def test_json_response(context): "Retrieving a JSON response object using the get method" sl = SleepyHollow() response = sl.get(context.route_to('/status-200.json')) # Let's test the types response.should.be.a(Response) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.json.should.equal({ u'success': True, u'status': 200, u'method': 'GET', })
def test_getting_js_errors(context): "response objects should contain js errors" sl = SleepyHollow() response = sl.get(context.route_to('/jserror')) # Let's test the types response.status_code.should.equal(200) response.should.have.property('js_errors').being.a(tuple) response.js_errors.should.have.length_of(1) response.js_errors.should.equal(({ 'line_number': 3, 'message': u'TypeError: \'undefined\' is not a function (evaluating \'window.intentional_error("javascript errors")\')', 'source_id': u'http://127.0.0.1:5000/media/js/jserror.js' },)) expect("IT WORKS").to.be.within(response.html)
def test_response(context): "Retrieving the response object using the get method" sl = SleepyHollow() response = sl.get(context.route_to('/simple')) # Let's test the types response.should.be.a(Response) response.url.should.be.a(unicode) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.json.should.be.none # Now let's test the values response.url.should.equal(context.route_to('/simple')) response.status_code.should.equal(200) expect('Very Simple').to.be.within(response.text)
def test_request_api_for_authentication_failing(context): "SleepyHollow supports requests-based authentication failing" sl = SleepyHollow() sl.get.when.called_with( context.route_to("/auth/simple"), auth=('wrong', 'credentials'), ).should.throw(BadCredentialsError)
def test_delete_parameters(context): "requesting with DELETE parameters" sl = SleepyHollow() response = sl.delete( context.route_to('/status-200.json'), params={'name': 'Gabriel'}, ) # Let's test the types response.should.be.a(Response) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.headers.should.have.key('X-success').being.equal('true') response.headers.should.have.key('X-method').being.equal('"DELETE"') response.headers.should.have.key('X-status').being.equal('200') response.headers.should.have.key('X-name').being.equal('"Gabriel"')
def test_response_status_codes(context): "The request method should report the right http status codes" sl = SleepyHollow() response = sl.get(context.route_to('/status-200')) response.status_code.should.equal(200) response.reason.should.equal('OK') expect('Status 200').to.be.within(response.text) response = sl.get(context.route_to('/status-404')) response.status_code.should.equal(404) response.reason.should.equal('Not Found') expect('Status 404').to.be.within(response.text) response = sl.get(context.route_to('/status-500')) response.status_code.should.equal(500) response.reason.should.equal('Internal Server Error') expect('Status 500').to.be.within(response.text)
def test_get_sending_headers(context): "requesting with GET adding custom headers" sl = SleepyHollow() response = sl.get( context.route_to('/status-200.json'), headers={'X-Name': 'Gabriel'} ) # Let's test the types response.should.be.a(Response) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.json.should.equal({ u'success': True, u'method': 'GET', u'status': 200, u'X-Name': u'Gabriel', })
def test_put_parameters(context): "requesting with PUT parameters" sl = SleepyHollow() response = sl.put( context.route_to('/status-200.json'), params={'name': 'Gabriel'} ) # Let's test the types response.should.be.a(Response) response.status_code.should.be.an(int) response.text.should.be.a(unicode) response.content.should.be.a(str) response.json.should.equal({ u'success': True, u'method': 'PUT', u'status': 200, u'name': u'Gabriel', })
class Scraper(object): base_url = 'http://m.saks.com' def __init__(self): self.http = SleepyHollow() def path(self, to): if to.startswith('http'): return to return '/'.join([self.base_url, to.lstrip('/')]) def get(self, path): return self.http.get(self.path(path)) def get_root_links(self): print "Getting root links..." response = self.get('/eSearch.jsp?sid=127F38CAD8BC&N_Dim=0&bmSingle=N_Dim&N=1553&Ns=P_0_sort') dom = lhtml.fromstring(response.html) return [l.attrib['href'] for l in dom.cssselect('#left-nav-content > div > a')] def get_subcategory_links(self, link): print "Getting category links on %r..." % link response = self.get(link) dom = lhtml.fromstring(response.html) return [l.attrib['href'] for l in dom.cssselect('#left-nav-content > div > a')] def get_product_links(self, parent_url): print "Getting product links on %r..." % parent_url response = self.get(parent_url) dom = lhtml.fromstring(response.html) return [l.attrib['href'] for l in dom.cssselect('.productRow > a')] def get_product_details(self, product_url): print "Getting product details on %r..." % product_url response = self.get(product_url) dom = lhtml.fromstring(response.html) img = dom.cssselect("#productMainImg")[0] name = dom.cssselect('form h1')[0] return dict( name=name.text.strip(), img=img.attrib['src'], ) def scrape(self): root_links = self.get_root_links() for root in root_links: subcategories = self.get_subcategory_links(root) for subcat in subcategories: products = self.get_product_links(subcat) for prod in products: print self.get_product_details(prod)
def test_decode_complex_object(context): "SleepyHollow#evaluate_javascript maps the type `dict`" sl = SleepyHollow() evaluated = sl.evaluate_javascript(r'''(function(){ var data = {}; data["name"] = "Gabriel Falcão \"gabrielfalcao\""; data["github"] = "http://github.com/gabrielfalcao"; data["projects"] = ["cello", "sleepyhollow"]; data["coder"] = true; data["age"] = 24; data["weight"] = 77.5; data["more"] = null; return data; })()''') expect(evaluated).to.equal({ 'name': 'Gabriel Falcão "gabrielfalcao"', 'github': u'http://github.com/gabrielfalcao', 'projects': [u'cello', u'sleepyhollow'], 'coder': True, 'age': 24, 'weight': 77.5, 'more': "", })
class GetASaleProduct(object): meta_redirect_url = re.compile(r'meta\s+' 'http-equiv="refresh"\s+' 'content="\d+;URL=(?P<url>.*?)"', re.I) def __init__(self): self.http = SleepyHollow() def get_response_with_dom(self, url): if not url.startswith('http'): url = 'http://www.bananarepublic.com/%s' % url.lstrip('/') response = self.http.get(url, config=dict(screenshot=True)) meta_refresh = self.meta_redirect_url.search(response.html) if meta_refresh is not None: return self.get_response_with_dom(meta_refresh.group('url')) response.dom = lhtml.fromstring(response.html) return response def find_sale_links(self): print "Getting sales links..." response = self.get_response_with_dom('http://www.bananarepublic.com/products/index.jsp') return response.dom.xpath("//ul/li[contains(@class, 'idxBottomCat')]/a[" "contains(text(), 'Sale') or " "contains(text(), 'Clearance') or " "contains(text(), 'Discount')]/@href") def find_product_links(self, category_link): print "Getting product links..." response = self.get_response_with_dom(category_link) return response.dom.xpath("//a[contains(@class, 'productItemName')]/@href") def start(self): for category_link in self.find_sale_links(): for product_link in self.find_product_links(category_link): response = self.get_response_with_dom(product_link) img = response.dom.cssselect("#product_image")[0] src = img.attrib['src'] assert src.lower().endswith('jpg'), 'Expected %r to be a JPG' % src break break
def test_decode_list(context): "SleepyHollow#evaluate_javascript maps the type `list`" sl = SleepyHollow() sl.evaluate_javascript("['gabriel', 'falcao']").should.equal( ['gabriel', 'falcao'])
def test_decode_dict(context): "SleepyHollow#evaluate_javascript maps the type `dict`" sl = SleepyHollow() sl.evaluate_javascript( u'(function(){return {name: "GABRIEL FALCÃO".toLowerCase()}})();' ).should.equal({'name': 'gabriel falcão'})
def test_error_handling_non_strict(context): "SleepyHollow#evaluate_javascript returns None if code is invalid" sl = SleepyHollow() script1 = r'''(foo)''' expect(sl.evaluate_javascript(script1)).to.be.none
def __init__(self): self.http = SleepyHollow()
def test_decode_string_with_double_quotes(context): "SleepyHollow#evaluate_javascript maps the type `string` with double quotes" sl = SleepyHollow() sl.evaluate_javascript(r'"\"NICE\""').should.equal('"NICE"')
def test_connection_refused(): "The request method should fail for unreachable urls" sl = SleepyHollow() sl.get.when.called_with('http://blah').should.throw( ConnectionRefusedError)
def test_decode_float(context): "SleepyHollow#evaluate_javascript maps the type `float`" sl = SleepyHollow() sl.evaluate_javascript("1.4").should.equal(1.4)
def test_decode_int(context): "SleepyHollow#evaluate_javascript maps the type `int`" sl = SleepyHollow() sl.evaluate_javascript("1").should.equal(1)
def test_decode_list(context): "SleepyHollow#evaluate_javascript maps the type `list`" sl = SleepyHollow() sl.evaluate_javascript("['gabriel', 'falcao']").should.equal(['gabriel', 'falcao'])
def test_invalid_url(context): "The request method should report an error if the received url is invalid" sl = SleepyHollow() sl.get.when.called_with('invalid url').should.throw( InvalidUrlError, 'The url "invalid url" is not valid: You need to inform a scheme')
def test_decode_dict(context): "SleepyHollow#evaluate_javascript maps the type `dict`" sl = SleepyHollow() sl.evaluate_javascript(u'(function(){return {name: "GABRIEL FALCÃO".toLowerCase()}})();').should.equal({'name': 'gabriel falcão'})
# -*- coding: utf-8 -*- from __future__ import unicode_literals import sys from sleepyhollow import SleepyHollow browser = SleepyHollow() response = browser.get("http://localhost:5000", config={ 'screenshot': True, 'width': 1300, 'height': 600, }) response.save_screenshot("../spec/screenshots/{0}.png".format(sys.argv[1]))
def test_decode_string(context): "SleepyHollow#evaluate_javascript maps the type `string`" sl = SleepyHollow() sl.evaluate_javascript(r'"GABRIEL\'s".toLowerCase()').should.equal("gabriel's")