def test_basic_auth(self): headers = simplefetch.Headers() headers.basic_auth('simplefetch_username', 'simplefetch_password') res = simplefetch.get("%sbasic_auth" % testall.test_server_host, headers=headers.items()) content = json.loads(res.content) self.assertEqual(res.status, 200) self.assertEqual(content['method'], 'GET')
def test_fragment_query_string(self): qs = testall.randdict(5) query_string = simplefetch.urlencode(qs) res = simplefetch.get("%s?%s#fragment" % (testall.test_server_host, query_string)) content = json.loads(res.content) self.assertEqual(res.status, 200) self.assertEqual(content['method'], 'GET') self.assertEqual(content['query_string'], "%s#fragment" % query_string)
def test_fragment_basic_auth_query_string(self): qs = testall.randdict(5) query_string = simplefetch.urlencode(qs) headers = simplefetch.Headers() headers.basic_auth('simplefetch_username', 'simplefetch_password') res = simplefetch.get("%sbasic_auth?%s#fragment" % (testall.test_server_host, query_string), headers=headers.items()) content = json.loads(res.content) self.assertEqual(res.status, 200) self.assertEqual(content['method'], 'GET') self.assertEqual(content['query_string'], "%s#fragment" % query_string)
def run(self): ''' run massfetch ''' #self.init() self.urls_queue.extend(self.start_urls) for url in self.urls_queue: if url in self._processed_urls: continue response = simplefetch.get(url, headers=self.base_headers) if not response: print 'url: %s, status: %d' % (url, -1) # not available continue if response.status <> 200: print 'url: %s, status: %d' % (url, response.status) continue else: print url data = { 'headers': response.headers, 'content': response.content } data['headers']['source_url'] = url data['headers']['status_code'] = response.status self._container.put(hashlib.sha1(url).hexdigest(), data) self._container.commit() self.parse(data) self._processed_urls.append(url) if self.fetch_delay: if self.random_fetch_delay: delay = int(random.uniform(0.5, 1.5) * self.fetch_delay) else: delay = self.fetch_delay time.sleep(delay)
def run(self): """ run massfetch """ # self.init() self.urls_queue.extend(self.start_urls) for url in self.urls_queue: if url in self._processed_urls: continue response = simplefetch.get(url, headers=self.base_headers) if not response: print "url: %s, status: %d" % (url, -1) # not available continue if response.status <> 200: print "url: %s, status: %d" % (url, response.status) continue else: print url data = {"headers": response.headers, "content": response.content} data["headers"]["source_url"] = url data["headers"]["status_code"] = response.status self._container.put(hashlib.sha1(url).hexdigest(), data) self._container.commit() self.parse(data) self._processed_urls.append(url) if self.fetch_delay: if self.random_fetch_delay: delay = int(random.uniform(0.5, 1.5) * self.fetch_delay) else: delay = self.fetch_delay time.sleep(delay)
def test_fragment(self): res = simplefetch.get('%s#fragment' % testall.test_server_host) self.assertEqual(res.status, 200)
def test_get(self): res = simplefetch.get(testall.test_server_host) self.assertEqual(res.status, 200)
def test_no_env_defined(self): simplefetch.PROXIES = dict() resp = simplefetch.get('http://127.0.0.1:8800') self.assertEqual(resp.status,200)
def test_timeout(self): self.assertRaises( simplefetch.ConnectionRequestException, \ lambda:simplefetch.get("%ssleep/1" % testall.test_server_host, timeout=0.001) )
def test_env_defined_but_empty(self): simplefetch.PROXIES['http'] = None resp = simplefetch.get('http://127.0.0.1:8800') self.assertEqual(resp.status,200)
def test_no_env_defined(self): simplefetch.PROXIES = dict() resp = simplefetch.get('http://127.0.0.1:8800') self.assertEqual(resp.status, 200)
def test_ignored_host(self): simplefetch.PROXIES['http'] = 'http://127.0.0.1:8800' resp = simplefetch.get('http://localhost:8800') self.assertEqual(resp.status, 200)
def test_get_via_proxy(self): simplefetch.PROXIES['http'] = 'http://127.0.0.1:8800' resp = simplefetch.get('http://www.example.com') self.assertEqual(resp.status, 200)
def test_env_defined_but_empty(self): simplefetch.PROXIES['http'] = None resp = simplefetch.get('http://127.0.0.1:8800') self.assertEqual(resp.status, 200)
def test_wrong_basic_auth(self): headers = simplefetch.Headers() headers.basic_auth('wrong_username', 'wrong_password') res = simplefetch.get("%sbasic_auth" % testall.test_server_host, headers=headers.items()) self.assertEqual(res.status, 401)
def test_get_via_proxy(self): simplefetch.PROXIES['http'] = 'http://127.0.0.1:8800' resp = simplefetch.get('http://www.example.com') self.assertEqual(resp.status,200)
def test_ignored_host(self): simplefetch.PROXIES['http'] = 'http://127.0.0.1:8800' resp = simplefetch.get('http://localhost:8800') self.assertEqual(resp.status,200)
def fetch(url): resp = simplefetch.get(url) return (resp.headers, resp.content)