def page_hybridflightresults_aspx(request): ''' Функция парсинга страницы результатов ''' if request["return_date"] is None: request["return_date"] = request["depart_date"] get_params = { 'TripType': 'Return', 'CityFrom': request['origin_iata'], 'DateOut': request['depart_date'].replace('-', '%2F'), 'CityTo': request['destination_iata'].replace(' ', '+'), 'DateBack': request['return_date'].replace('-', '%2F'), 'TravelClass': request['trip_class'], 'NumAdult': request['adults'], 'NumChild': request['children'], 'NumInfant': request['infants'], 'EntryPoint': 'Flight', 'RequestFrom': 'Outside' } temp = [] for k, v in get_params.items(): temp.append(k + '=' + v) get_params = '&'.join(temp) content = utils.run_php_script( 'avsl_parsers/webjet_com_au_get_content.php', get_params) if content.strip() == 'timeout': return False, "Airline server is not responding" try: g = Grab() g.response.body = content pt = '<span id="devFooter".*jQuery\.parseJSON\(\'(.*)\'\), jQuery\.parseJSON\(\'(.*)\'\)\];' m = g.rex(re.compile(pt, re.M | re.S)) except Exception: return False, 'Has Been A Changes in Airline Service' r = [] try: r.extend( page_hybridflightresults_aspx_parse_json(request, m.group(1), 0)) if m.group(2) != 'null': r.extend( page_hybridflightresults_aspx_parse_json( request, m.group(2), 1)) except Exception, e: print "Parsing Exception " + str(e)
def test_declaration_bug(self): """ 1. Build Grab instance with XML with xml declaration 2. Call search method 3. Call xpath 4. Get ValueError: Unicode strings with encoding declaration are not supported. """ xml = '<?xml version="1.0" encoding="UTF-8"?><tree><leaf>text</leaf></tree>' g = Grab(xml) self.assertTrue(g.search(u'text')) self.assertEqual(g.xpath_one('//leaf').text, u'text') # Similar bugs g = Grab(xml) self.assertTrue(g.rex(u'text')) self.assertEqual(g.xpath_one('//leaf').text, u'text')
def page_hybridflightresults_aspx(request): ''' Функция парсинга страницы результатов ''' if request["return_date"] is None: request["return_date"] = request["depart_date"] get_params = { 'TripType':'Return', 'CityFrom':request['origin_iata'], 'DateOut':request['depart_date'].replace('-', '%2F'), 'CityTo':request['destination_iata'].replace(' ', '+'), 'DateBack':request['return_date'].replace('-', '%2F'), 'TravelClass':request['trip_class'], 'NumAdult':request['adults'], 'NumChild':request['children'], 'NumInfant':request['infants'], 'EntryPoint':'Flight', 'RequestFrom':'Outside' } temp = [] for k,v in get_params.items(): temp.append(k + '=' + v) get_params = '&'.join(temp) content = utils.run_php_script('avsl_parsers/webjet_com_au_get_content.php', get_params) if content.strip() == 'timeout': return False, "Airline server is not responding" try: g = Grab() g.response.body = content pt = '<span id="devFooter".*jQuery\.parseJSON\(\'(.*)\'\), jQuery\.parseJSON\(\'(.*)\'\)\];' m = g.rex(re.compile(pt, re.M|re.S)) except Exception: return False, 'Has Been A Changes in Airline Service' r = [] try: r.extend(page_hybridflightresults_aspx_parse_json(request, m.group(1), 0)) if m.group(2) != 'null': r.extend(page_hybridflightresults_aspx_parse_json(request, m.group(2), 1)) except Exception, e: print "Parsing Exception " + str(e)
class TextExtensionTest(TestCase): def setUp(self): SERVER.reset() # Create fake grab instance with fake response self.g = Grab(transport=GRAB_TRANSPORT) self.g.fake_response(HTML, charset='cp1251') def test_search(self): self.assertTrue(self.g.search(u'фыва'.encode('cp1251'), byte=True)) self.assertTrue(self.g.search(u'фыва')) self.assertFalse(self.g.search(u'фыва2')) def test_search_usage_errors(self): self.assertRaises(GrabMisuseError, lambda: self.g.search(u'фыва', byte=True)) self.assertRaises(GrabMisuseError, lambda: self.g.search('фыва')) def test_rex(self): # Search unicode rex in unicode body - default case rex = re.compile(u'(фыва)', re.U) self.assertEqual(u'фыва', self.g.rex(rex).group(1)) # Search non-unicode rex in byte-string body rex = re.compile(u'(фыва)'.encode('cp1251')) self.assertEqual(u'фыва'.encode('cp1251'), self.g.rex(rex, byte=True).group(1)) ## Search for non-unicode rex in unicode body shuld fail rex = re.compile('(фыва)') self.assertRaises(DataNotFound, lambda: self.g.rex(rex)) ## Search for unicode rex in byte-string body shuld fail rex = re.compile(u'фыва', re.U) self.assertRaises(DataNotFound, lambda: self.g.rex(rex, byte=True)) ## Search for unexesting fragment rex = re.compile(u'(фыва2)', re.U) self.assertRaises(DataNotFound, lambda: self.g.rex(rex)) def test_assert_substring(self): self.g.assert_substring(u'фыва') self.g.assert_substring(u'фыва'.encode('cp1251'), byte=True) self.assertRaises(DataNotFound, lambda: self.g.assert_substring(u'фыва2')) def test_assert_substrings(self): self.g.assert_substrings((u'фыва',)) self.g.assert_substrings((u'фывы нет', u'фыва')) self.g.assert_substrings((u'фыва'.encode('cp1251'), 'где ты фыва?'), byte=True) self.assertRaises(DataNotFound, lambda: self.g.assert_substrings((u'фыва, вернись', u'фыва-а-а-а'))) def test_assert_rex(self): self.g.assert_rex(re.compile(u'фыва')) self.g.assert_rex(re.compile(u'фыва'.encode('cp1251')), byte=True) self.assertRaises(DataNotFound, lambda: self.g.assert_rex(re.compile(u'фыва2'))) def test_assert_rex_text(self): self.assertEqual(u'ха', self.g.rex_text('<em id="fly-em">([^<]+)'))