"acuerdo_prmid": scraperhelper.getQueryParametersElementChild(tds[1])[0], "titulo": tds[2].text, "estado": tds[3].text } sesion['acuerdos'].append(acuerdo) # scraperhelper.pt('Agreenments Scraped') data.append(sesion) saved = True except TimeoutException as ex: scraperhelper.pt('PAGE TimeoutException ERROR') except NoSuchElementException as ex: scraperhelper.pt('PAGE NoSuchElementException ERROR') except StaleElementReferenceException as ex: scraperhelper.pt('PAGE StaleElementReferenceException ERROR') except WebDriverException as ex: scraperhelper.pt('PAGE WebDriverException ERROR') finally: scraperhelper.pt('Loaded Session ' + session['prmid']) if not saved: errors.append(session['prmid']) print('----------- WITH ERROR! -------------') scraperhelper.closeSeleniumBrowser(browser) scraperhelper.saveToFile('sesiones.extended.1418', data, errors)
if len(next_buttons) > 0: browser.execute_script(next_buttons[0].get_attribute('href').replace('javascript:','')) page_number = scraperhelper.waitForChangesInAttribute(browser, '#detail .pages ul li.current', page_number, text = True) else: break # Get next option select = browser.find_element_by_id('ctl00_mainPlaceHolder_ddlLegislaturas') for option in select.find_elements_by_tag_name('option'): if int(option.get_attribute('value')) == int(option_selected) - 1: option.click() break scraperhelper.pt('New option clicked') # Wait till loaded option_selected = scraperhelper.waitForChangesInAttribute(browser, '#ctl00_mainPlaceHolder_ddlLegislaturas option[selected]', option_selected, attribute = 'value') scraperhelper.pt('New page loaded') except TimeoutException as ex: scraperhelper.pt('PAGE TimeoutException ERROR') except NoSuchElementException as ex: scraperhelper.pt('PAGE NoSuchElementException ERROR') except StaleElementReferenceException as ex: scraperhelper.pt('PAGE StaleElementReferenceException ERROR') except WebDriverException as ex: scraperhelper.pt('PAGE WebDriverException ERROR') scraperhelper.closeSeleniumBrowser(browser) scraperhelper.saveToFile('sesiones.simple.1418', data, errors)
errors = [] # main script GO! try: browser.get('https://www.camara.cl/camara/diputados.aspx') scraperhelper.pt('Get Current Reps Site') content = browser.find_elements_by_css_selector('li.alturaDiputado h4 a') for el in content: data.append({ "prmid": scraperhelper.getQueryParametersFromUrl( el.get_attribute('href'))[0], "nombre": str(el.text.replace('SR. ', '').replace('SRA. ', '')), "periodo": "2014-2018" }) except TimeoutException as ex: scraperhelper.pt('PAGE TimeoutException ERROR') except NoSuchElementException as ex: scraperhelper.pt('PAGE NoSuchElementException ERROR') except StaleElementReferenceException as ex: scraperhelper.pt('PAGE StaleElementReferenceException ERROR') except WebDriverException as ex: scraperhelper.pt('PAGE WebDriverException ERROR') scraperhelper.closeSeleniumBrowser(browser) scraperhelper.saveToFile('diputados.simple.1418', data, errors)
scraperhelper.getLinkFromElementChild(cols[4]), "prmid": scraperhelper.getQueryParametersElementChild(cols[5])[0] } data.append(res) next_buttons = browser.find_elements_by_css_selector( '.pages ul li.next a') if len(next_buttons) > 0: page = page + 1 browser.execute_script( next_buttons[0].get_attribute('href').replace( 'javascript:', '')) page_number = scraperhelper.waitForChangesInAttribute( browser, '.pages ul li.current', page_number, text=True) scraperhelper.pt('Loading Page ' + str(page)) else: break except TimeoutException as ex: scraperhelper.pt('PAGE TimeoutException ERROR') except NoSuchElementException as ex: scraperhelper.pt('PAGE NoSuchElementException ERROR') except StaleElementReferenceException as ex: scraperhelper.pt('PAGE StaleElementReferenceException ERROR') except WebDriverException as ex: scraperhelper.pt('PAGE WebDriverException ERROR') scraperhelper.closeSeleniumBrowser(browser) scraperhelper.saveToFile('resoluciones.simple.1418', data, errors)
commitees = summary[2].find_elements_by_css_selector('p') for co in commitees: rep_extended['comite_parlamentario'] = rep_extended[ 'comite_parlamentario'] + co.text rep_extended['telefono'] = ficha.find_element_by_css_selector( 'div.phones p').text.replace('Teléfono: ', '') rep_extended['correo'] = ficha.find_element_by_css_selector( 'li.email a').text data.append(rep_extended) saved = True except TimeoutException as ex: scraperhelper.pt('PAGE TimeoutException ERROR') except NoSuchElementException as ex: scraperhelper.pt('PAGE NoSuchElementException ERROR') except StaleElementReferenceException as ex: scraperhelper.pt('PAGE StaleElementReferenceException ERROR') except WebDriverException as ex: scraperhelper.pt('PAGE WebDriverException ERROR') finally: scraperhelper.pt('Loaded Representative ' + rep['prmid']) if not saved: errors.append(rep['prmid']) print('----------- WITH ERROR! -------------') scraperhelper.closeSeleniumBrowser(browser) scraperhelper.saveToFile('diputados.extended.1418', data, errors)
cols[3].text, "documento_link": scraperhelper.getLinkFromElementChild(cols[4]) } ac['resumen'].append(res) except TimeoutException as ex: ac['resumen'] = [] data.append(ac) saved = True except TimeoutException as ex: scraperhelper.pt('PAGE TimeoutException ERROR') except NoSuchElementException as ex: scraperhelper.pt('PAGE NoSuchElementException ERROR') except StaleElementReferenceException as ex: scraperhelper.pt('PAGE StaleElementReferenceException ERROR') except WebDriverException as ex: scraperhelper.pt('PAGE WebDriverException ERROR') finally: scraperhelper.pt('Loaded Agreenment ' + acuerdo['acuerdo_prmid'] + ' ' + str(counting) + '/' + str(all_count)) if not saved: errors.append(acuerdo['acuerdo_prmid']) print('----------- WITH ERROR! -------------') scraperhelper.closeSeleniumBrowser(browser) scraperhelper.saveToFile('acuerdos.extended.1418', data, errors)