def get(self): """ Attempt to get the initial version of the form from the website. """ if self.cookies is None: get_or_post_a_url(_make_url('Default.aspx', False)) response = get_or_post_a_url( _make_url( 'ReportManager.aspx?ReportVisibility=1&ReportCategory=0')) self.cookies = { 'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId') } response = get_or_post_a_url(self.start_url, cookies=self.cookies) self.form_data = FormData(response.content) if self.action_url is None: self.action_url = _make_url(self.form_data.action) return True
def test_02(self): """ Parse and test files/ofgem_certificate_search.html """ fnn = os.path.join(self.HERE, 'files', 'ofgem_certificate_search.html') with open(fnn, 'r') as cfh: content = cfh.read() self.assertIsNotNone(content) ofd = FormData(content) self.assertIsInstance(ofd, FormData) self.assertTrue('__ASYNCPOST' in ofd.elements) self.assertEqual(ofd.elements['__ASYNCPOST'], {'value': 'true'})
def get(self): """ Attempt to get the initial version of the form from the website. """ if self.cookies is None: get_or_post_a_url(_make_url('Default.aspx', False)) response = get_or_post_a_url(_make_url('ReportManager.aspx?ReportVisibility=1&ReportCategory=0')) self.cookies = {'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId')} response = get_or_post_a_url(self.start_url, cookies=self.cookies) self.form_data = FormData(response.content) if self.action_url is None: self.action_url = _make_url(self.form_data.action) return True
def test_01(self): """ Parse and test files/ofgem_station_search.html (this will take a while...) """ fnn = os.path.join(self.HERE, 'files', 'ofgem_station_search.html') with open(fnn, 'r') as cfh: content = cfh.read() self.assertIsNotNone(content) ofd = FormData(content) self.assertIsInstance(ofd, FormData) self.assertEqual(len(ofd.elements), 117) # Check for some elements... for name in ['__VIEWSTATE', 'ReportViewer$ctl03$ctl00', 'ReportViewer$ctl11', 'ReportViewer$AsyncWait$HiddenCancelField', 'ReportViewer$ctl04$ctl03$ddValue', 'ReportViewer$ctl04$ctl05$txtValue', 'ReportViewer$ctl04$ctl25$cbNull']: self.assertTrue(name in ofd.elements) self.assertTrue('__ASYNCPOST' in ofd.elements) self.assertEqual(ofd.elements['__ASYNCPOST'], {'value': 'true'})
class OfgemForm(object): """ Class to represent an instance of an Ofgem form. """ def __init__(self, url): self.start_url = _make_url(url) self.cookies = None self.action_url = None self.form_data = None self.export_url = None self.raw_data = None self.logger = logging.getLogger(__name__) def get(self): """ Attempt to get the initial version of the form from the website. """ if self.cookies is None: get_or_post_a_url(_make_url('Default.aspx', False)) response = get_or_post_a_url(_make_url('ReportManager.aspx?ReportVisibility=1&ReportCategory=0')) self.cookies = {'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId')} response = get_or_post_a_url(self.start_url, cookies=self.cookies) self.form_data = FormData(response.content) if self.action_url is None: self.action_url = _make_url(self.form_data.action) return True def update(self): """ Submit the form data and update based on response. Given how slow the parsing of a 3M HTML page is, try and use the X-MicrosoftAjax: Delta=true header to get smaller blocks for processing. """ response = self._do_post() if response is None: return False return self.form_data.update(response.content) def submit(self): """ Submit the form data and update based on response. Given how slow the parsing of a 3M HTML page is, try and use the X-MicrosoftAjax: Delta=true header to get smaller blocks for processing. """ is_set, upd = self.form_data.set_value_by_label('Page Size', '25') if is_set is False: return False response = self._do_post(True) if response is None: return False if self.form_data.update(response.content) is False: self.logger.warning("Submit failed :-(") return False if self.form_data.export_url is None: self.logger.warning("Unable to find the export url. Cannot continue.") return False export_url = _make_url(self.form_data.export_url) + 'XML' response = get_or_post_a_url(export_url, cookies=self.cookies) self.raw_data = response.content return True def save_original(self, filename): """ Save the original, downloaded source into the filename provided. :param filename: Filename to save the file to. :returns: True or False :rtype: boolean """ if self.raw_data is None: return False etree.write(filename, self.raw_data, encoding='utf-8') return True def set_value(self, lbl, value): is_set, cb_rqd = self.form_data.set_value_by_label(lbl, value) self.logger.debug("set_value_by_label [%s] -> %s, %s", lbl, is_set, cb_rqd) if is_set and cb_rqd: return self.update() return is_set def _do_post(self, submit=False): """ Submit the form data and update based on response. Given how slow the parsing of a 3M HTML page is, try and use the X-MicrosoftAjax: Delta=true header to get smaller blocks for processing. """ if self.form_data.action is None: self.logger.info("Unable to post due no action URL available. Did you call get()?") return None action_url = _make_url(self.form_data.action) form_hdrs = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'User-Agent': 'Mozilla', 'X-Requested-With': 'XMLHttpRequest', 'X-MicrosoftAjax': 'Delta=true', 'Referer': unquote(action_url) } post_dict = self.form_data.as_post_data(submit=submit) post_data = "&".join(["{}={}".format(key, post_dict[key]) for key in post_dict.keys()]) response = get_or_post_a_url(action_url, post=True, cookies=self.cookies, headers=form_hdrs, data=post_data) return response
class OfgemForm(object): """ Class to represent an instance of an Ofgem form. """ def __init__(self, url): self.start_url = _make_url(url) self.cookies = None self.action_url = None self.form_data = None self.export_url = None self.raw_data = None self.logger = logging.getLogger(__name__) def get(self): """ Attempt to get the initial version of the form from the website. """ if self.cookies is None: get_or_post_a_url(_make_url('Default.aspx', False)) response = get_or_post_a_url( _make_url( 'ReportManager.aspx?ReportVisibility=1&ReportCategory=0')) self.cookies = { 'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId') } response = get_or_post_a_url(self.start_url, cookies=self.cookies) self.form_data = FormData(response.content) if self.action_url is None: self.action_url = _make_url(self.form_data.action) return True def update(self): """ Submit the form data and update based on response. Given how slow the parsing of a 3M HTML page is, try and use the X-MicrosoftAjax: Delta=true header to get smaller blocks for processing. """ response = self._do_post() if response is None: return False return self.form_data.update(response.content) def submit(self): """ Submit the form data and update based on response. Given how slow the parsing of a 3M HTML page is, try and use the X-MicrosoftAjax: Delta=true header to get smaller blocks for processing. """ is_set, upd = self.form_data.set_value_by_label('Page Size', '25') if is_set is False: return False response = self._do_post(True) if response is None: return False if self.form_data.update(response.content) is False: self.logger.warning("Submit failed :-(") return False if self.form_data.export_url is None: self.logger.warning( "Unable to find the export url. Cannot continue.") return False export_url = _make_url(self.form_data.export_url) + 'XML' response = get_or_post_a_url(export_url, cookies=self.cookies) self.raw_data = response.content return True def save_original(self, filename): """ Save the original, downloaded source into the filename provided. :param filename: Filename to save the file to. :returns: True or False :rtype: boolean """ if self.raw_data is None: return False etree.write(filename, self.raw_data, encoding='utf-8') return True def set_value(self, lbl, value): is_set, cb_rqd = self.form_data.set_value_by_label(lbl, value) self.logger.debug("set_value_by_label [%s] -> %s, %s", lbl, is_set, cb_rqd) if is_set and cb_rqd: return self.update() return is_set def _do_post(self, submit=False): """ Submit the form data and update based on response. Given how slow the parsing of a 3M HTML page is, try and use the X-MicrosoftAjax: Delta=true header to get smaller blocks for processing. """ if self.form_data.action is None: self.logger.info( "Unable to post due no action URL available. Did you call get()?" ) return None action_url = _make_url(self.form_data.action) form_hdrs = { 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'User-Agent': 'Mozilla', 'X-Requested-With': 'XMLHttpRequest', 'X-MicrosoftAjax': 'Delta=true', 'Referer': unquote(action_url) } post_dict = self.form_data.as_post_data(submit=submit) post_data = "&".join( ["{}={}".format(key, post_dict[key]) for key in post_dict.keys()]) response = get_or_post_a_url(action_url, post=True, cookies=self.cookies, headers=form_hdrs, data=post_data) return response