Пример #1
0
    def get(self):
        """ Attempt to get the initial version of the form from the website. """
        if self.cookies is None:
            get_or_post_a_url(_make_url('Default.aspx', False))
            response = get_or_post_a_url(
                _make_url(
                    'ReportManager.aspx?ReportVisibility=1&ReportCategory=0'))
            self.cookies = {
                'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId')
            }

        response = get_or_post_a_url(self.start_url, cookies=self.cookies)
        self.form_data = FormData(response.content)
        if self.action_url is None:
            self.action_url = _make_url(self.form_data.action)
        return True
Пример #2
0
    def test_02(self):
        """ Parse and test files/ofgem_certificate_search.html """
        fnn = os.path.join(self.HERE, 'files', 'ofgem_certificate_search.html')
        with open(fnn, 'r') as cfh:
            content = cfh.read()
        self.assertIsNotNone(content)
        ofd = FormData(content)
        self.assertIsInstance(ofd, FormData)

        self.assertTrue('__ASYNCPOST' in ofd.elements)
        self.assertEqual(ofd.elements['__ASYNCPOST'], {'value': 'true'})
Пример #3
0
    def get(self):
        """ Attempt to get the initial version of the form from the website. """
        if self.cookies is None:
            get_or_post_a_url(_make_url('Default.aspx', False))
            response = get_or_post_a_url(_make_url('ReportManager.aspx?ReportVisibility=1&ReportCategory=0'))
            self.cookies = {'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId')}

        response = get_or_post_a_url(self.start_url, cookies=self.cookies)
        self.form_data = FormData(response.content)
        if self.action_url is None:
            self.action_url = _make_url(self.form_data.action)
        return True
Пример #4
0
    def test_01(self):
        """ Parse and test files/ofgem_station_search.html (this will take a while...) """
        fnn = os.path.join(self.HERE, 'files', 'ofgem_station_search.html')
        with open(fnn, 'r') as cfh:
            content = cfh.read()
        self.assertIsNotNone(content)

        ofd = FormData(content)
        self.assertIsInstance(ofd, FormData)
        self.assertEqual(len(ofd.elements), 117)
        # Check for some elements...
        for name in ['__VIEWSTATE',
                     'ReportViewer$ctl03$ctl00',
                     'ReportViewer$ctl11',
                     'ReportViewer$AsyncWait$HiddenCancelField',
                     'ReportViewer$ctl04$ctl03$ddValue',
                     'ReportViewer$ctl04$ctl05$txtValue',
                     'ReportViewer$ctl04$ctl25$cbNull']:
            self.assertTrue(name in ofd.elements)

        self.assertTrue('__ASYNCPOST' in ofd.elements)
        self.assertEqual(ofd.elements['__ASYNCPOST'], {'value': 'true'})
Пример #5
0
class OfgemForm(object):
    """ Class to represent an instance of an Ofgem form. """

    def __init__(self, url):
        self.start_url = _make_url(url)
        self.cookies = None
        self.action_url = None
        self.form_data = None
        self.export_url = None
        self.raw_data = None
        self.logger = logging.getLogger(__name__)

    def get(self):
        """ Attempt to get the initial version of the form from the website. """
        if self.cookies is None:
            get_or_post_a_url(_make_url('Default.aspx', False))
            response = get_or_post_a_url(_make_url('ReportManager.aspx?ReportVisibility=1&ReportCategory=0'))
            self.cookies = {'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId')}

        response = get_or_post_a_url(self.start_url, cookies=self.cookies)
        self.form_data = FormData(response.content)
        if self.action_url is None:
            self.action_url = _make_url(self.form_data.action)
        return True

    def update(self):
        """ Submit the form data and update based on response.
            Given how slow the parsing of a 3M HTML page is, try and use the
            X-MicrosoftAjax: Delta=true header to get smaller blocks for processing.
        """
        response = self._do_post()
        if response is None:
            return False
        return self.form_data.update(response.content)

    def submit(self):
        """ Submit the form data and update based on response.
            Given how slow the parsing of a 3M HTML page is, try and use the
            X-MicrosoftAjax: Delta=true header to get smaller blocks for processing.
        """
        is_set, upd = self.form_data.set_value_by_label('Page Size', '25')
        if is_set is False:
            return False
        response = self._do_post(True)
        if response is None:
            return False
        if self.form_data.update(response.content) is False:
            self.logger.warning("Submit failed :-(")
            return False

        if self.form_data.export_url is None:
            self.logger.warning("Unable to find the export url. Cannot continue.")
            return False

        export_url = _make_url(self.form_data.export_url) + 'XML'
        response = get_or_post_a_url(export_url, cookies=self.cookies)
        self.raw_data = response.content
        return True

    def save_original(self, filename):
        """ Save the original, downloaded source into the filename provided.

        :param filename: Filename to save the file to.
        :returns: True or False
        :rtype: boolean
        """
        if self.raw_data is None:
            return False
        etree.write(filename, self.raw_data, encoding='utf-8')
        return True

    def set_value(self, lbl, value):
        is_set, cb_rqd = self.form_data.set_value_by_label(lbl, value)
        self.logger.debug("set_value_by_label [%s] -> %s, %s", lbl, is_set, cb_rqd)
        if is_set and cb_rqd:
            return self.update()
        return is_set

    def _do_post(self, submit=False):
        """ Submit the form data and update based on response.
            Given how slow the parsing of a 3M HTML page is, try and use the
            X-MicrosoftAjax: Delta=true header to get smaller blocks for processing.
        """
        if self.form_data.action is None:
            self.logger.info("Unable to post due no action URL available. Did you call get()?")
            return None

        action_url = _make_url(self.form_data.action)
        form_hdrs = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                     'User-Agent': 'Mozilla',
                     'X-Requested-With': 'XMLHttpRequest',
                     'X-MicrosoftAjax': 'Delta=true',
                     'Referer': unquote(action_url)
                     }
        post_dict = self.form_data.as_post_data(submit=submit)
        post_data = "&".join(["{}={}".format(key, post_dict[key]) for key in post_dict.keys()])

        response = get_or_post_a_url(action_url,
                                     post=True,
                                     cookies=self.cookies,
                                     headers=form_hdrs,
                                     data=post_data)
        return response
Пример #6
0
class OfgemForm(object):
    """ Class to represent an instance of an Ofgem form. """
    def __init__(self, url):
        self.start_url = _make_url(url)
        self.cookies = None
        self.action_url = None
        self.form_data = None
        self.export_url = None
        self.raw_data = None
        self.logger = logging.getLogger(__name__)

    def get(self):
        """ Attempt to get the initial version of the form from the website. """
        if self.cookies is None:
            get_or_post_a_url(_make_url('Default.aspx', False))
            response = get_or_post_a_url(
                _make_url(
                    'ReportManager.aspx?ReportVisibility=1&ReportCategory=0'))
            self.cookies = {
                'ASP.NET_SessionId': response.cookies.get('ASP.NET_SessionId')
            }

        response = get_or_post_a_url(self.start_url, cookies=self.cookies)
        self.form_data = FormData(response.content)
        if self.action_url is None:
            self.action_url = _make_url(self.form_data.action)
        return True

    def update(self):
        """ Submit the form data and update based on response.
            Given how slow the parsing of a 3M HTML page is, try and use the
            X-MicrosoftAjax: Delta=true header to get smaller blocks for processing.
        """
        response = self._do_post()
        if response is None:
            return False
        return self.form_data.update(response.content)

    def submit(self):
        """ Submit the form data and update based on response.
            Given how slow the parsing of a 3M HTML page is, try and use the
            X-MicrosoftAjax: Delta=true header to get smaller blocks for processing.
        """
        is_set, upd = self.form_data.set_value_by_label('Page Size', '25')
        if is_set is False:
            return False
        response = self._do_post(True)
        if response is None:
            return False
        if self.form_data.update(response.content) is False:
            self.logger.warning("Submit failed :-(")
            return False

        if self.form_data.export_url is None:
            self.logger.warning(
                "Unable to find the export url. Cannot continue.")
            return False

        export_url = _make_url(self.form_data.export_url) + 'XML'
        response = get_or_post_a_url(export_url, cookies=self.cookies)
        self.raw_data = response.content
        return True

    def save_original(self, filename):
        """ Save the original, downloaded source into the filename provided.

        :param filename: Filename to save the file to.
        :returns: True or False
        :rtype: boolean
        """
        if self.raw_data is None:
            return False
        etree.write(filename, self.raw_data, encoding='utf-8')
        return True

    def set_value(self, lbl, value):
        is_set, cb_rqd = self.form_data.set_value_by_label(lbl, value)
        self.logger.debug("set_value_by_label [%s] -> %s, %s", lbl, is_set,
                          cb_rqd)
        if is_set and cb_rqd:
            return self.update()
        return is_set

    def _do_post(self, submit=False):
        """ Submit the form data and update based on response.
            Given how slow the parsing of a 3M HTML page is, try and use the
            X-MicrosoftAjax: Delta=true header to get smaller blocks for processing.
        """
        if self.form_data.action is None:
            self.logger.info(
                "Unable to post due no action URL available. Did you call get()?"
            )
            return None

        action_url = _make_url(self.form_data.action)
        form_hdrs = {
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'User-Agent': 'Mozilla',
            'X-Requested-With': 'XMLHttpRequest',
            'X-MicrosoftAjax': 'Delta=true',
            'Referer': unquote(action_url)
        }
        post_dict = self.form_data.as_post_data(submit=submit)
        post_data = "&".join(
            ["{}={}".format(key, post_dict[key]) for key in post_dict.keys()])

        response = get_or_post_a_url(action_url,
                                     post=True,
                                     cookies=self.cookies,
                                     headers=form_hdrs,
                                     data=post_data)
        return response