Exemplo n.º 1
0
 def download_resources(self, query, directory, filename_model=None, ids=[], index=1,
                        ids_digit_len=[], index_digit_len=0, duplicate_check=False):
     self._open()
     if not self.request:
         return
     resources = search_in_html(self.html, query, self.url)
     for url in resources:
         rg = ResourceGrabber(url)
         rg.download(directory, filename_model=filename_model, ids=ids, index=index,
                     ids_digit_len=ids_digit_len, index_digit_len=ids_digit_len,
                     duplicate_check=duplicate_check)
Exemplo n.º 2
0
 def get_internal_links(self, *args, **kwargs):
     self._open()
     if not self.request:
         return
     level = kwargs.get('level', 0)
     if self.request.status_code >=200 and self.request.status_code<300:
         links = search_in_html(self.html, args[level], self.url)
         for link in links:
             rg = ResourceGrabber(link)
             if len(args)>level+1:
                 for inner_link in rg.get_internal_links(*args, level=level+1):
                     yield inner_link
             else:
                 yield link
Exemplo n.º 3
0
 def test_relative_urls(self):
     self.assertEqual(list(search_in_html(self.html, 'div.relLinks a',
                                          'http://foofiles.org/')),
                      ['http://foofiles.org/text1.txt',
                       'http://foofiles.org/text2.txt'])
Exemplo n.º 4
0
 def test_multiple_filter(self):
     self.assertEqual(list(search_in_html(self.html, 'div.links a')),
                      ['http://foofiles.org/text1.txt',
                       'http://foofiles.org/text2.txt'])
Exemplo n.º 5
0
 def test_basic_filter_textual(self):
     self.assertEqual(list(search_in_html(self.html, 'div:eq(3)'))[0],
                      'http://fooimages.org/image1.png')
Exemplo n.º 6
0
 def test_basic_filter_ahref(self):
     self.assertEqual(list(search_in_html(self.html, 'div.links a'))[0],
                      'http://foofiles.org/text1.txt')