def from_url(self, url, discard=None, data_found_at=None, next_url_generators=[]): self._url_extractor = UrlExtractor(url, next_url_generators) self._data_sanitizer = DataSanitizer(discard, data_found_at) self._informer = Informer(self._username, self._password) return self
def test_get_next_url_should_return_none_in_case_of_error(self): url_extractor = UrlExtractor(['links', 'other', 'url']) self.assertIsNone( url_extractor.get_next_url( {'links': { 'self': { 'url': 'http://localhost' } }})) url_extractor = UrlExtractor([['links', 'other', 'url'], ['self', 'other', 'urllib2']]) self.assertIsNone( url_extractor.get_next_url( {'links': { 'self': { 'url': 'http://localhost' } }}))
def test_get_next_url_should_return_url_from_data_passed_in_if_descriptor_is_list( self): url_extractor = UrlExtractor(['links', 'self', 'url']) self.assertEquals( 'http://localhost', url_extractor.get_next_url( {'links': { 'self': { 'url': 'http://localhost' } }}))
def test_get_next_url_should_return_url_using_a_list_of_descriptors(self): url_extractor = UrlExtractor([['links', 'other', 'url'], ['links', 'self', 'url']]) self.assertEquals( 'http://localhost', url_extractor.get_next_url( {'links': { 'self': { 'url': 'http://localhost' } }}))
def test_get_next_url_should_call_apply_all_rules_on_url(self): def_rule = Mock(spec=DefaultRule) def_rule.apply_rule_to.return_value = 'http://localhost/2020' generation_rule = Mock(spec=LimitOffset) generation_rule.apply_rule_to.return_value = 'http://localhost?limit=5&offset=10' url_extractor = UrlExtractor( 'http://localhost', next_url_generators=[def_rule, generation_rule]) url = url_extractor.get_next_url() def_rule.apply_rule_to.assert_called_with('http://localhost') generation_rule.apply_rule_to.assert_called_with( 'http://localhost/2020') self.assertEquals('http://localhost?limit=5&offset=10', url)
def test_get_next_url_should_return_url_if_set(self): url_extractor = UrlExtractor('http://localhost') self.assertEquals('http://localhost', url_extractor.get_next_url())
def test_shouldnt_accept_anything_other_than_list_or_string_for_url_descriptor_rest_can_be( self): with self.assertRaises(AssertionError): UrlExtractor({'hello': 'world'})
def test_shouldaccept_string_or_list(self): url_extractor = UrlExtractor('http://localhost') self.assertEquals('http://localhost', url_extractor._url_descriptor) url_extractor = UrlExtractor(['hello', 'world']) self.assertEquals(['hello', 'world'], url_extractor._url_descriptor)
def test_get_next_url_should_throw_error_if_data_passed_in_is_none_and_descriptor_is_list( self): url_extractor = UrlExtractor(['links', 'self', 'url']) with self.assertRaises(AssertionError): url_extractor.get_next_url()