def parse_index_size(grab): """ Extract number of results from grab instance which has received google search results. """ text = None if grab.search(u'did not match any documents'): return 0 if len(grab.css_list('#resultStats')): text = grab.css_text('#resultStats') if len(grab.xpath_list('//div[@id="subform_ctrl"]/div[2]')): text = grab.xpath_text('//div[@id="subform_ctrl"]/div[2]') if text is None: logging.error('Unknown google page format') return 0 text = text.replace(',', '').replace('.', '') if 'about' in text: number = find_number(text.split('about')[1]) return int(number) elif 'of' in text: number = find_number(text.split('of')[1]) return int(number) else: number = find_number(text) return int(number)
def find_node_number(node, ignore_spaces=False, make_int=True): """ Find number in text content of the `node`. """ text = get_node_text(node) return find_number(text, ignore_spaces=ignore_spaces, make_int=make_int)
def test_find_number(self): self.assertEqual(2, find_number('2')) self.assertEqual(2, find_number('foo 2 4 bar')) self.assertEqual('2', find_number('foo 2 4 bar', make_int=False)) self.assertEqual(24, find_number('foo 2 4 bar', ignore_spaces=True)) self.assertEqual( 24, find_number(u'бешеный 2 4 барсук', ignore_spaces=True)) self.assertRaises(DataNotFound, lambda: find_number('foo')) self.assertRaises(DataNotFound, lambda: find_number(u'фыва'))
def number(self, default=NULL, ignore_spaces=False, smart=False, make_int=True): try: return find_number(self.text(smart=smart), ignore_spaces=ignore_spaces, make_int=make_int) except IndexError: if default is NULL: raise else: return default
def test_find_number(self): self.assertEqual(2, find_number('2')) self.assertEqual(2, find_number('foo 2 4 bar')) self.assertEqual('2', find_number('foo 2 4 bar', make_int=False)) self.assertEqual(24, find_number('foo 2 4 bar', ignore_spaces=True)) self.assertEqual(24, find_number(u'бешеный 2 4 барсук', ignore_spaces=True)) self.assertRaises(DataNotFound, lambda: find_number('foo')) self.assertRaises(DataNotFound, lambda: find_number(u'фыва'))
def css_number(self, path, default=NULL, ignore_spaces=False, smart=False, make_int=True): """ Find number in normalized text of node which matches the given css path. """ try: text = self.css_text(path, smart=smart) return find_number(text, ignore_spaces=ignore_spaces, make_int=make_int) except IndexError: if default is NULL: raise else: return default