def testFileConnector(self): data = {} model = "trip_review_detail" webconfigs = config.load_pages("../config/tripadvisor.json") web_config = config.get_model_config(webconfigs, model) test_page = "http://www.tripadvisor.com.sg/ShowUserReviews-g294265-d2516429-r165190313-Auld_Alliance-Singapore.html#REVIEWS" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, web_config) for edata in data: database.save_data(edata, web_config["model"], web_config) self.assertEqual(1, 1) self.assertTrue(True)
def testParser_trip_review_detail(self): pages_config = config.load_pages("../config/tripadvisor.json") model = "trip_review_detail" page_config = config.get_model_config(pages_config, model) test_page = "http://www.tripadvisor.com.sg/ShowUserReviews-g294265-d2516429-r165190313-Auld_Alliance-Singapore.html" test_page = "http://www.tripadvisor.com.sg/ShowUserReviews-g294265-d2516429-r139369230-Auld_Alliance-Singapore.html#REVIEWS" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, page_config) print data print links print len(data) print len(links)