"asistencia": tds[2].text, "observacion": tds[3].text, "ingreso": tds[4].text } sesion['asistencia'].append(asistencia) # scraperhelper.pt('Assistance Scraped') """ browser.get( 'https://www.camara.cl/trabajamos/sesion_asistencia.aspx?prmid=' + session['prmid']) js_script = "return_scrap_array = []; $('#detail .col.detalle table.tabla tbody tr').each(function(index) { return_scrap_array.push($(this).find('td').get().map( (td) => [td.textContent, td.getElementsByTagName('a')[0] ? td.getElementsByTagName('a')[0].href : ''] ) )}); return JSON.stringify(return_scrap_array);" asist_array = json.loads(browser.execute_script(js_script)) for asist in asist_array: asistencia = { "diputado_prmid": scraperhelper.getQueryParametersFromUrl(asist[0][1])[0], "partido_prmid": asist[1][0], "asistencia": asist[2][0], "observacion": asist[3][0], "ingreso": asist[4][0] } sesion['asistencia'].append(asistencia) # Go to 'Intervenciones' browser.get( 'https://www.camara.cl/trabajamos/sesion_intervenciones.aspx?prmid=' + session['prmid'])
scraperhelper.pt('Get Handy Elements') while int(option_selected) > 45: scraperhelper.pt('Get year id: ' + str(option_selected) + ' ----------') page_number = browser.find_element_by_css_selector('#detail .pages ul li.current').text subcount = 1 while True: scraperhelper.pt('Get Sessions: Page ' + str(subcount)) subcount = subcount + 1 rows = browser.find_elements_by_css_selector('#detail table.tabla tbody tr') for row in rows: try: columns = row.find_elements_by_css_selector('td') prmid = scraperhelper.getQueryParametersFromUrl(columns[1].find_element_by_tag_name('a').get_attribute('href')) sesion = { "fecha":columns[0].text, "sesion":columns[1].text, "estado":columns[2].text, "prmid":prmid[0] } data.append(sesion) except StaleElementReferenceException: print('ERROR!! -----') next_buttons = browser.find_elements_by_css_selector('#detail .pages ul li.next a') if len(next_buttons) > 0: browser.execute_script(next_buttons[0].get_attribute('href').replace('javascript:','')) page_number = scraperhelper.waitForChangesInAttribute(browser, '#detail .pages ul li.current', page_number, text = True) else:
"abstencion": [], "articulo_quinto": [], "pareos": [] } for el in browser.find_elements_by_css_selector('#detail .stress'): try: h2 = el.find_element_by_tag_name('h2').text except NoSuchElementException as ex: h2 = 'SIN TITULO' if 'A favor' in h2: for a in el.find_elements_by_css_selector('#ctl00_mainPlaceHolder_dtlAFavor td a'): vote['favor'].append(scraperhelper.getQueryParametersFromUrl(a.get_attribute('href'))[0]) elif 'En contra' in h2: for a in el.find_elements_by_css_selector('#ctl00_mainPlaceHolder_dtlEncontra td a'): vote['contra'].append(scraperhelper.getQueryParametersFromUrl(a.get_attribute('href'))[0]) elif 'Abstención' in h2: for a in el.find_elements_by_css_selector('#ctl00_mainPlaceHolder_dtlAbstencion td a'): vote['abstencion'].append(scraperhelper.getQueryParametersFromUrl(a.get_attribute('href'))[0]) elif 'Artículo 5°' in h2: for a in el.find_elements_by_css_selector('table td a'): vote['articulo_quinto'].append(scraperhelper.getQueryParametersFromUrl(a.get_attribute('href'))[0]) elif 'Pareos' in h2: for a in el.find_elements_by_css_selector('#ctl00_mainPlaceHolder_dtlPareos td a'): vote['pareos'].append(scraperhelper.getQueryParametersFromUrl(a.get_attribute('href'))[0]) else: vote['boletin'] = h2.replace('Boletín ','') vote['fecha'] = scraperhelper.getRestOfTheTextForElementWith(el, './p', 'Fecha:').strip()
browser = scraperhelper.initBrowser() # output lists data = [] errors = [] # main script GO! try: browser.get('https://www.camara.cl/camara/diputados.aspx') scraperhelper.pt('Get Current Reps Site') content = browser.find_elements_by_css_selector('li.alturaDiputado h4 a') for el in content: data.append({ "prmid": scraperhelper.getQueryParametersFromUrl( el.get_attribute('href'))[0], "nombre": str(el.text.replace('SR. ', '').replace('SRA. ', '')), "periodo": "2014-2018" }) except TimeoutException as ex: scraperhelper.pt('PAGE TimeoutException ERROR') except NoSuchElementException as ex: scraperhelper.pt('PAGE NoSuchElementException ERROR') except StaleElementReferenceException as ex: scraperhelper.pt('PAGE StaleElementReferenceException ERROR') except WebDriverException as ex: scraperhelper.pt('PAGE WebDriverException ERROR')