Пример #1
0
 def test_url(self):
     url = 'http://www.google.com'
     data = {'a': 1, 'b': 2, 'c': 3}
     full = DefaultScraper.encodeurl('POST', url, data)
     self.assertTrue('<args>' in full)
     url2, data2 = DefaultScraper.parseurl(full)
     self.assertEqual(url, url2)
     self.assertEqual(data, data2)
Пример #2
0
 def _parsepage(self, page, oriurl):
     page = page[page.index('> ')+2:-5].strip()
     if ',' in page:
         page = page.replace(',', '')
     if page.startswith('1-'):
         url, data = DefaultScraper.parseurl(oriurl)
         keyword = data['KEYWORDS']
         total = int(page.split(' of ')[1])
         phase = Phase(data['fromDate'], data['toDate'], keyword, total)
         urls = []
         for i in xrange(2, phase.pages+1):
             data['page_no'] = i
             urls.append(DefaultScraper.encodeurl('POST', url, data))
         self._spider.addtask(urls)
         return phase
Пример #3
0
 def _parsekeyword(self, oriurl):
     _, data = DefaultScraper.parseurl(oriurl)
     return data['KEYWORDS']