Пример #1
0
def main():
    fields = {
        'USER': username,
        'PASSWORD': password,
        'REALMOID': '06-40da262c-b9d7-00ef-0000-28ff000028ff',
        'TARGET': 'https://www.pge.com/myenergyweb/appmanager/pge/customer',
        'SMAUTHREASON': 0,
        'FCC': 'DEFAULT',
        'PROTOCOL': 'DEFAULT'
    }

    pge = GetResponseSoup('https://www.pge.com/eum/login', fields)

    # Get to the statements page
    pge.update('https://www.pge.com/myenergyweb/appmanager/pge/customer?_nfpb=true&_pageLabel=BillingPaymentHistory&_nfls=false')


    transaction_table = pge.soup.find('table', {'id': 'transaction-history-table'})
    downloadable_transaction_rows = [
        dict(
            filename=format_datestring(row.find('span', text=re.compile(r'\d{2}/\d{2}/\d{2}'))),
            url=row.find('a', {'class': 'download-pdf-lft'})['href']
        )
        for row in transaction_table.findAll('tr')
        if row.find('a', {'class': 'download-pdf-lft'}, text='Download')
    ]

    pge.download_invoices(downloadable_transaction_rows, SAVE_PATH)
Пример #2
0
def main():
    # Extract view_state and event_validation variables:
    field_names = [
        r'__EVENTARGUMENT',
        r'__VIEWSTATE',
        r'__EVENTVALIDATION',
        r'__VIEWSTATEGENERATOR',
        r'__VIEWSTATEENCRYPTED',
        r'__EVENTVALIDATION',
    ]

    fields = {
        r'dnn$ctr487$Login$Login_DNN$txtUsername': username,
        r'dnn$ctr487$Login$Login_DNN$txtPassword': password,
        r'__ASYNCPOST': 'true',
        r'ScrollTop': '301',
        r'ScriptManager_TSM' : ';;System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35:en:f319b152-218f-4c14-829d-050a68bb1a61:ea597d4b:b25378d2;Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en:3fe22950-1961-4f26-b9d4-df0df7356bf6:16e4e7cd:f7645509:ed16cbdc',
        r'StylesheetManager_TSSM': ';Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en-US:3fe22950-1961-4f26-b9d4-df0df7356bf6:45085116:27c5704c',
        r'ScriptManager': r'dnn$ctr487$dnn$ctr487$Login_UPPanel|dnn$ctr487$Login$Login_DNN$cmdLogin',
        r'__EVENTTARGET': 'dnn$ctr487$Login$Login_DNN$cmdLogin',
        r'__dnnVariable': {"__scdoff":"1","containerid_dnn_ctr395_ModuleContent":"395","cookieid_dnn_ctr395_ModuleContent":"_Module395_Visible","min_icon_395":"/Portals/_default/Containers/eCAReDefault/ar_closemenu.gif","max_icon_395":"/Portals/_default/Containers/eCAReDefault/ar_openmenu.gif","max_text":"Maximize","min_text":"Minimize"},
        r'RadAJAXControlID': r'dnn_ctr487_Login_UP',
    }

    # Login page (but not logging in)
    mw = GetResponseSoup(login_url)

    # Extract the field names from the site
    for field in field_names:
        find_field = mw.soup.find(id=field)
        fields[field] = find_field['value'] if find_field else ''

    # Actually log in
    mw.update(login_url, fields)

    mw.update('http://myaccount.mtnwater.com/Home.aspx')

    transaction_table = mw.soup.find('table', {'id': 'dnn_ctr479_BillingHistory_GridView1'})
    transaction_rows = [row for row in transaction_table.findAll('tr') if not row.find('th')]

    downloadable_transactions = [
        dict(
            filename=format_datestring(row.find('a', attrs={'href': re.compile(r'^javascript')}).text),
            url=row.find('a', {'href': re.compile(r'onlinebiller')})['href']
        )
        for row in transaction_rows
    ]

    mw.download_invoices(downloadable_transactions, SAVE_PATH)
Пример #3
0
def main():
    # Extract view_state and event_validation variables:
    field_names = [
        r'__EVENTARGUMENT',
        r'__VIEWSTATE',
        r'__EVENTVALIDATION',
        r'__VIEWSTATEGENERATOR',
        r'__VIEWSTATEENCRYPTED',
        r'__EVENTVALIDATION',
    ]

    fields = {
        r'dnn$ctr487$Login$Login_DNN$txtUsername': username,
        r'dnn$ctr487$Login$Login_DNN$txtPassword': password,
        r'__ASYNCPOST': 'true',
        r'ScrollTop': '301',
        r'ScriptManager_TSM':
        ';;System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35:en:f319b152-218f-4c14-829d-050a68bb1a61:ea597d4b:b25378d2;Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en:3fe22950-1961-4f26-b9d4-df0df7356bf6:16e4e7cd:f7645509:ed16cbdc',
        r'StylesheetManager_TSSM':
        ';Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en-US:3fe22950-1961-4f26-b9d4-df0df7356bf6:45085116:27c5704c',
        r'ScriptManager':
        r'dnn$ctr487$dnn$ctr487$Login_UPPanel|dnn$ctr487$Login$Login_DNN$cmdLogin',
        r'__EVENTTARGET': 'dnn$ctr487$Login$Login_DNN$cmdLogin',
        r'__dnnVariable': {
            "__scdoff": "1",
            "containerid_dnn_ctr395_ModuleContent": "395",
            "cookieid_dnn_ctr395_ModuleContent": "_Module395_Visible",
            "min_icon_395":
            "/Portals/_default/Containers/eCAReDefault/ar_closemenu.gif",
            "max_icon_395":
            "/Portals/_default/Containers/eCAReDefault/ar_openmenu.gif",
            "max_text": "Maximize",
            "min_text": "Minimize"
        },
        r'RadAJAXControlID': r'dnn_ctr487_Login_UP',
    }

    # Login page (but not logging in)
    mw = GetResponseSoup(login_url)

    # Extract the field names from the site
    for field in field_names:
        find_field = mw.soup.find(id=field)
        fields[field] = find_field['value'] if find_field else ''

    # Actually log in
    mw.update(login_url, fields)

    mw.update('http://myaccount.mtnwater.com/Home.aspx')

    transaction_table = mw.soup.find(
        'table', {'id': 'dnn_ctr479_BillingHistory_GridView1'})
    transaction_rows = [
        row for row in transaction_table.findAll('tr') if not row.find('th')
    ]

    downloadable_transactions = [
        dict(filename=format_datestring(
            row.find('a', attrs={
                'href': re.compile(r'^javascript')
            }).text),
             url=row.find('a', {'href': re.compile(r'onlinebiller')})['href'])
        for row in transaction_rows
    ]

    mw.download_invoices(downloadable_transactions, SAVE_PATH)
def main():
    field_names = [
        "__EVENTTARGET",
        "__VIEWSTATE",
        "__EVENTARGUMENT",
        "__VIEWSTATEGENERATOR",
        "__EVENTVALIDATION",
        "__LASTFOCUS",
    ]

    fields = {
        "ctl00$StaticContentWebView1$staticContentLocation":
        "StaticContent/NWEHeaderIndex.htm",
        "ctl00$body_content$txtUsername": username,
        "ctl00$body_content$txtPassword": password,
        "ctl00$StaticContentWebView2$staticContentLocation":
        "StaticContent/NWEFooterIndex.htm",
        "ctl00$body_content$btnLogin.x": "25",
        "ctl00$body_content$btnLogin.y": "11",
    }

    nwe = GetResponseSoup(login_url)

    # Extract the field names from the site
    extract_fields(nwe, fields, field_names)

    # Logging in
    nwe.update(login_url, fields)

    # Go to Payments page where the invoices are
    nwe.update(billhistory_url)
    prepare_and_download(nwe)

    # Get all the account numbers on my account
    nwe.update(billhistory_url)
    account_numbers = [
        o['value'] for o in nwe.soup.find(
            'select',
            attrs={
                'name':
                'ctl00$AccountSummaryHeaderControl1$headerAccountSelector'
            }).findAll('option') if o['value'] != '3168266-9'
    ]
    field_names.append(
        "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayAccountNumber"
    )
    field_names.append(
        "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayState")

    for account_number in account_numbers:
        if nwe.url != billhistory_url:
            nwe.update(billhistory_url)

        fields = {
            "ctl00$StaticContentWebView1$staticContentLocation":
            "StaticContent/NWEHeader.htm",
            "TC08BCDB1053_ctl00_ctl00_siteMapControl_customnavigation_ClientState":
            "",
            "ctl00$AccountSummaryHeaderControl1$headerAccountSelector":
            account_number,
            "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayURL:https":
            "//paynow7.speedpay.com/northwestern/index.asp",
            "ctl00$body_content$StaticContentWebView1$staticContentLocation":
            "StaticContent/BillViewRequirements.htm",
            "ctl00$StaticContentWebView2$staticContentLocation":
            "StaticContent/NWEFooter.htm"
        }

        # Extract the field names from the site
        extract_fields(nwe, fields, field_names)

        nwe.update(billhistory_url, fields)
        prepare_and_download(nwe)
def main():
    fields = {
        r'username': username,
        r'password': password,
        r'login-form-type': r'pwd',
    }

    rg = GetResponseSoup(login_url, fields)
    rg.update(
        'https://secure.republiconline.com/secureeportal/_layouts/fissinglesignon.aspx'
    )

    authTkn = rg.soup.find('input', {'name': 'authTkn'})['value']
    keyTkn = rg.soup.find('input', {'name': 'keyTkn'})['value']

    fields = {
        r'slchannel': 'ALWRSA',
        r'client': '701122300',
        r'unitCode': 'ALW',
        r'type': 'ApplicationMenu',
        r'enc': 'web',
        r'authTkn': authTkn,
        r'keyTkn': keyTkn
    }

    rg.update('https://secure3.billerweb.com/alw/inetSrv', fields)

    # At this point, we're at the page where the invoices are listed.
    # Let's find the invoice IDs

    def clean_due_date(s):
        cleaned_up = s.strip().replace('/', '-').replace('Due: ', '')
        # now looks like MM-DD-YYYY
        month, day, year = cleaned_up.split('-')
        return "{0}-{1}-{2}".format(year, month, day)

    def invoice_info(link):
        doc_id_search = re.compile(r'viewDocument\(\'(\d+)\'')
        find_id = doc_id_search.search(link['href'])
        doc_name = "{} {}".format(clean_due_date(link.contents[-1]),
                                  link.contents[0].strip().replace(': ', '-'))
        return dict(filename=doc_name, id=find_id.group(1))

    invoice_links = rg.soup.findAll('a', href=re.compile('viewDocument'))
    doc_ids = map(invoice_info, invoice_links)

    sessionHandle = rg.soup.find('input', {'name': 'sessionHandle'})['value']
    client = rg.soup.find('input', {'name': 'client'})['value']

    for invoice in doc_ids:
        fields = {
            r'sessionHandle': sessionHandle,
            r'type': 'UserService',
            r'action': 'ViewDocument',
            r'client': client,
            r'logoutUrl': '',
            r'operation': "search",
            r'startPos': '0',
            r'newBean': '',
            r'mode': '',
            r'documentId': invoice['id'],
            r'accountServiceId': '4',
            r'documentSetId': '',
            r'invoiceNumber': '',
            r'selectedAccount': '',
            r'accountSelection': '',
            r'payDocumentId': '',
            r'accountSelection': 'allActiveAccounts',
        }

        rg.update('https://secure3.billerweb.com/alw/inetSrv', fields)

        # src = rg.soup.find('frame')['src'].replace('/alw/inetSrv', '')
        src = rg.soup.find('frame')['src']
        parsed_url = urlparse(src)
        parsed_query = parse_qs(parsed_url.query)

        to_open = 'https://secure3.billerweb.com/alw/inetSrv/document.pdf'
        fields = {
            'action': 'ShowPdf',
            'type': parsed_query['type'][0],
            'client': parsed_query['client'][0],
            'sessionHandle': parsed_query['sessionHandle'][0],
            'hasToc': 'false',
            'beginPage': '1',
            'endPage': '2',
            'docId': invoice['id']
        }

        # there has to be a cleaner way to do this
        url_to_open = urlunparse(
            list(urlparse(to_open))[:4] + [urllib.urlencode(fields)] + [''])
        invoice['url'] = url_to_open

    rg.download_invoices(doc_ids, SAVE_PATH)
def main():
    field_names = [
        "__EVENTTARGET",
        "__VIEWSTATE",
        "__EVENTARGUMENT",
        "__VIEWSTATEGENERATOR",
        "__EVENTVALIDATION",
        "__LASTFOCUS",
    ]

    fields = {
        "ctl00$StaticContentWebView1$staticContentLocation": "StaticContent/NWEHeaderIndex.htm",
        "ctl00$body_content$txtUsername": username,
        "ctl00$body_content$txtPassword": password,
        "ctl00$StaticContentWebView2$staticContentLocation": "StaticContent/NWEFooterIndex.htm",
        "ctl00$body_content$btnLogin.x": "25",
        "ctl00$body_content$btnLogin.y": "11",

    }

    nwe = GetResponseSoup(login_url)

    # Extract the field names from the site
    extract_fields(nwe, fields, field_names)

    # Logging in
    nwe.update(login_url, fields)

    # Go to Payments page where the invoices are
    nwe.update(billhistory_url)
    prepare_and_download(nwe)

    # Get all the account numbers on my account
    nwe.update(billhistory_url)
    account_numbers = [o['value'] for o in nwe.soup.find('select', attrs={'name': 'ctl00$AccountSummaryHeaderControl1$headerAccountSelector'}).findAll('option') if o['value'] != '3168266-9']
    field_names.append("ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayAccountNumber")
    field_names.append("ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayState")

    for account_number in account_numbers:
        if nwe.url != billhistory_url:
            nwe.update(billhistory_url)

        fields = {
            "ctl00$StaticContentWebView1$staticContentLocation": "StaticContent/NWEHeader.htm",
            "TC08BCDB1053_ctl00_ctl00_siteMapControl_customnavigation_ClientState": "",
            "ctl00$AccountSummaryHeaderControl1$headerAccountSelector": account_number,
            "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayURL:https": "//paynow7.speedpay.com/northwestern/index.asp",
            "ctl00$body_content$StaticContentWebView1$staticContentLocation": "StaticContent/BillViewRequirements.htm",
            "ctl00$StaticContentWebView2$staticContentLocation": "StaticContent/NWEFooter.htm"
        }

        # Extract the field names from the site
        extract_fields(nwe, fields, field_names)

        nwe.update(billhistory_url, fields)
        prepare_and_download(nwe)
def main():
    fields = {
        r'username': username,
        r'password': password,
        r'login-form-type': r'pwd',
    }

    rg = GetResponseSoup(login_url, fields)
    rg.update('https://secure.republiconline.com/secureeportal/_layouts/fissinglesignon.aspx')

    authTkn = rg.soup.find('input', {'name': 'authTkn'})['value']
    keyTkn = rg.soup.find('input', {'name': 'keyTkn'})['value']

    fields = {
        r'slchannel': 'ALWRSA',
        r'client': '701122300',
        r'unitCode': 'ALW',
        r'type': 'ApplicationMenu',
        r'enc': 'web',
        r'authTkn': authTkn,
        r'keyTkn': keyTkn
    }

    rg.update('https://secure3.billerweb.com/alw/inetSrv', fields)

    # At this point, we're at the page where the invoices are listed.
    # Let's find the invoice IDs

    def clean_due_date(s):
        cleaned_up = s.strip().replace('/', '-').replace('Due: ', '')
        # now looks like MM-DD-YYYY
        month, day, year = cleaned_up.split('-')
        return "{0}-{1}-{2}".format(year, month, day)

    def invoice_info(link):
        doc_id_search = re.compile(r'viewDocument\(\'(\d+)\'')
        find_id = doc_id_search.search(link['href'])
        doc_name = "{} {}".format(clean_due_date(link.contents[-1]),
                                  link.contents[0].strip().replace(': ', '-'))
        return dict(
            filename=doc_name,
            id=find_id.group(1)
        )

    invoice_links = rg.soup.findAll('a', href=re.compile('viewDocument'))
    doc_ids = map(invoice_info, invoice_links)

    sessionHandle = rg.soup.find('input', {'name': 'sessionHandle'})['value']
    client = rg.soup.find('input', {'name': 'client'})['value']

    for invoice in doc_ids:
        fields = {
            r'sessionHandle': sessionHandle,
            r'type': 'UserService',
            r'action': 'ViewDocument',
            r'client': client,
            r'logoutUrl': '',
            r'operation': "search",
            r'startPos': '0',
            r'newBean': '',
            r'mode': '',
            r'documentId': invoice['id'],
            r'accountServiceId': '4',
            r'documentSetId': '',
            r'invoiceNumber': '',
            r'selectedAccount': '',
            r'accountSelection': '',
            r'payDocumentId': '',
            r'accountSelection': 'allActiveAccounts',
        }

        rg.update('https://secure3.billerweb.com/alw/inetSrv', fields)

        # src = rg.soup.find('frame')['src'].replace('/alw/inetSrv', '')
        src = rg.soup.find('frame')['src']
        parsed_url = urlparse(src)
        parsed_query = parse_qs(parsed_url.query)

        to_open = 'https://secure3.billerweb.com/alw/inetSrv/document.pdf'
        fields = {
            'action': 'ShowPdf',
            'type': parsed_query['type'][0],
            'client': parsed_query['client'][0],
            'sessionHandle': parsed_query['sessionHandle'][0],
            'hasToc': 'false',
            'beginPage': '1',
            'endPage': '2',
            'docId': invoice['id']
        }

        # there has to be a cleaner way to do this
        url_to_open = urlunparse(list(urlparse(to_open))[:4] + [urllib.urlencode(fields)] + [''])
        invoice['url'] = url_to_open

    rg.download_invoices(doc_ids, SAVE_PATH)