def download_tj(self, termo='a'): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo) driver.find_element_by_xpath(self.botao_pesquisar).click() texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source)).replace('"', '') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) driver.find_element_by_xpath(self.botao_proximo_ini).click() contador = 0 while True: try: time.sleep(1) texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source)).replace('"', '') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) driver.find_element_by_xpath(self.botao_proximo).click() contador = 0 except Exception as e: print(e) contador += 1 time.sleep(5) if contador > 3: driver.close() break
def download_tj(self, termo='ementa'): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_id(self.pesquisa_livre).send_keys(termo) driver.find_element_by_xpath(self.botao_pesquisar).click() time.sleep(1) contador = 0 texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"','')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto)) driver.find_element_by_xpath(self.botao_proximo_iniXP).click() time.sleep(1) texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"','')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto)) self.botao_proximo_iniXP = '//*[@id="navigator"]/div[1]/a[6]' driver.find_element_by_xpath(self.botao_proximo_iniXP).click() while True: try: texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"','')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto)) driver.find_element_by_xpath(self.botao_proximoXP).click() time.sleep(2) except: sucesso = False while not sucesso: try: time.sleep(1) driver.execute_script("window.history.go(-1)") driver.find_element_by_xpath(self.botao_proximoXP).click() sucesso = True except: pass driver.close()
def download_tj(self, termo='acordam'): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_id(self.pesquisa_livre).send_keys(termo) driver.find_element_by_id(self.botao_pesquisar).click() texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source).replace('"', '')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) loop_counter = 0 while True: try: driver.find_element_by_xpath(self.botao_proximoXP).clear() driver.find_element_by_xpath(self.botao_proximoXP).send_keys( str(self.contador_paginas)) driver.find_element_by_xpath(self.botao_proximo).click() time.sleep(2) self.contador_paginas += 1 texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source).replace('"', '')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) except: loop_counter += 1 if loop_counter > 3: break time.sleep(5) driver.execute_script("window.history.go(-1)") driver.close()
def download_tj(self, data_julg_ini, data_julg_fim, termo='acordam'): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo) driver.find_element_by_name( self.data_julgamento_inicial).send_keys(data_julg_ini) driver.find_element_by_name( self.data_julgamento_final).send_keys(data_julg_fim) driver.find_elements_by_name(self.botao_pesquisar)[2].click() texto = crawler_jurisprudencia_tj.extrai_texto_html( self, driver.page_source) tamanho = 5 while True: try: time.sleep(3) links_proximos = driver.find_elements_by_class_name( 'linkQuery') texto = crawler_jurisprudencia_tj.extrai_texto_html( self, driver.page_source) cursor.execute( 'INSERT INTO justica_estadual.jurisprudencia_ma (ementas) value("%s")' % texto.replace('"', '')) try: int(links_proximos[-1].text) driver.close() break except: driver.find_element_by_id('pagination').click() except Exception as e: driver.close() print(e) break
def download_1_inst(self,data_ini, data_fim, termo = 'a'): botao_proximo = '//*[@id="resultados"]/table[1]/tbody/tr[1]/td[2]/div/a[6]' botao_proximo_ini = '//*[@id="resultados"]/table[1]/tbody/tr[1]/td[2]/div/a[5]' data_ini_xpath = '//*[@id="iddadosConsulta.dtInicio"]' data_fim_xpath = '//*[@id="iddadosConsulta.dtFim"]' link = 'https://esaj.tjsp.jus.br/cjpg/pesquisar.do' pesquisa_xpath = '//*[@id="iddadosConsulta.pesquisaLivre"]' cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(link) driver.find_element_by_xpath(pesquisa_xpath).send_keys(termo) driver.find_element_by_xpath(data_ini_xpath).send_keys(data_ini) driver.find_element_by_xpath(data_fim_xpath).send_keys(data_fim) driver.find_element_by_xpath(self.botao_pesquisar).click() time.sleep(1) texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source)).replace('"','') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas_1_inst,texto)) driver.find_element_by_xpath(botao_proximo_ini).click() time.sleep(1) texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source)).replace('"','') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas_1_inst,texto)) contador = 3 while contador: try: driver.find_element_by_xpath(botao_proximo).click() time.sleep(2.5) texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source)).replace('"','') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas_1_inst,texto)) contador = 3 except: time.sleep(2) contador -= 1 driver.close()
def download_tj(self, data_ini, data_fim): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_xpath(self.pesquisa_livre).send_keys('a') driver.find_element_by_xpath( self.botao_mostrar_pesquisa_avancada).click() driver.find_element_by_xpath( self.data_julgamento_inicialXP).send_keys(data_ini) driver.find_element_by_xpath( self.data_julgamento_finalXP).send_keys(data_fim) driver.find_element_by_xpath(self.botao_pesquisar_avancado).click() links = driver.find_elements_by_partial_link_text('') for l in links: try: if re.search(r'\?q\=cache\:', l.get_attribute('href')): l.click() driver.switch_to.window(driver.window_handles[-1]) texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source).replace('"', '')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) pyautogui.hotkey('ctrl', 'w') driver.switch_to.window(driver.window_handles[0]) except Exception as e: pass try: driver.find_element_by_xpath(self.botao_proximo_iniXP).click() except: driver.close() return while True: try: time.sleep(2) links = driver.find_elements_by_partial_link_text('') for l in links: try: if re.search(r'\?q\=cache\:', l.get_attribute('href')): l.click() driver.switch_to.window(driver.window_handles[-1]) texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source).replace('"', '')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) pyautogui.hotkey('ctrl', 'w') driver.switch_to.window(driver.window_handles[0]) except Exception as e: pass driver.find_element_by_xpath(self.botao_proximoXP).click() except: driver.close() break
def download_tj(self,data_ini,data_fim): crawler_jurisprudencia_tj.delete_audios(self) cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_xpath(self.pesquisa_livre).send_keys('direito') driver.find_element_by_xpath(self.data_julgamento_inicialXP).send_keys(data_ini) driver.find_element_by_xpath(self.data_julgamento_finalXP).send_keys(data_fim) driver.find_element_by_xpath(self.botao_pesquisar).click() time.sleep(5) driver.find_element_by_xpath(self.link_download_captcha).click() time.sleep(5) driver.find_element_by_xpath(self.link_captcha).send_keys(crawler_jurisprudencia_tj.captcha(self)) crawler_jurisprudencia_tj.delete_audios(self) while True: try: time.sleep(2) driver.find_element_by_class_name('linkListaEspelhoAcordaos').click() texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"','')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto)) driver.find_element_by_xpath(self.botao_proximo_XP).click() except Exception as e: print(e) try: driver.find_element_by_xpath(self.link_download_captcha).click() time.sleep(5) driver.find_element_by_xpath(self.link_captcha).send_keys(crawler_jurisprudencia_tj.captcha(self)) crawler_jurisprudencia_tj.delete_audios(self) time.sleep(5) except: driver.close() return driver.close()
def download_tj(self, data_inicial='', data_final='', termo='a'): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo) driver.find_element_by_xpath(self.data_iniXP).send_keys(data_inicial) driver.find_element_by_xpath(self.data_fimXP).send_keys(data_final) time.sleep(2) driver.find_element_by_xpath(self.botao_pesquisar).click() contador = 0 while True: try: time.sleep(2) texto = crawler_jurisprudencia_tj.extrai_texto_html( self, driver.page_source) print(texto) break # cursor.execute('INSERT INTO justica_federal.jurisprudencia_trf1 (ementas) value("%s")' % texto.replace('"','')) contador = 0 except Exception as e: print(e) time.sleep(3) contador += 1 if contador > 3: driver.close() break
def download_tj(self, ano): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_xpath(self.pesquisa_livreXP).send_keys('ementa') driver.find_element_by_xpath(self.ano_julgamentoXP).send_keys(ano) driver.find_element_by_xpath(self.botao_pesquisarXP).click() driver.find_element_by_xpath(self.link_decisoesXP).click() while True: try: time.sleep(1) links_inteiro_teor = driver.find_elements_by_class_name('botaoLink') for i in range(2,len(links_inteiro_teor)): try: links_inteiro_teor[i].click() driver.switch_to.window(driver.window_handles[-1]) time.sleep(1) texto = crawler_jurisprudencia_tj.extrai_texto_html(self,driver.page_source).replace('"','') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto)) time.sleep(1) pyautogui.hotkey('ctrl','w') driver.switch_to.window(driver.window_handles[0]) except: driver.switch_to.window(driver.window_handles[0]) driver.find_element_by_xpath(self.botao_proximoXP).click() time.sleep(2) except Exception as e: print(e) break driver.close()
def download_tj(self, termo='processo'): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) time.sleep(1) driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo) if input( 'Resolva o captcha do Google e digite um número diferente de zero:\n' ): pass contador_loop = 0 while True: try: time.sleep(1) texto = crawler_jurisprudencia_tj.extrai_texto_html( self, driver.page_source).replace('"', '') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) driver.find_element_by_xpath(self.botao_proximoXP).click() except Exception as e: print(e) time.sleep(2) if contador_loop > 2: break contador_loop += 1 driver.close() time.sleep(1)
def download_tj(self, data_ini, data_fim, termo): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo) driver.find_element_by_xpath( self.data_julgamento_inicialXP).send_keys(data_ini) driver.find_element_by_xpath( self.data_julgamento_finalXP).send_keys(data_fim) driver.find_element_by_xpath(self.botao_pesquisar).click() time.sleep(4) links_inteiro_teor = driver.find_elements_by_partial_link_text('') for link in links_inteiro_teor: try: if re.search(r'pcjoDecisao.jsp\?', link.get_attribute('href')): link.click() driver.switch_to.window(driver.window_handles[1]) break except: pass texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source)).replace('"', '') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) ult_pag = driver.current_url driver.find_element_by_xpath(self.botao_proximo).click() loop_counter = 0 while True: try: if ult_pag == driver.current_url: break time.sleep(2) texto = crawler_jurisprudencia_tj.extrai_texto_html( self, (driver.page_source)).replace('"', '') cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas, texto)) ult_pag = driver.current_url driver.find_element_by_xpath(self.botao_proximo).click() except Exception as e: print(e) loop_counter += 1 if loop_counter > 2: driver.close() break driver.close()
def download_tj(self): driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) time.sleep(2) driver.find_element_by_id(self.pesquisa_livre).send_keys('acordam') driver.find_element_by_id(self.botao_pesquisar).click() while True: try: time.sleep(4) driver.find_element_by_id('btMaisResultados').click() except: break arq = open('jurisprudencia_tjap.txt','a') arq.write(crawler_jurisprudencia_tj.extrai_texto_html(self,driver.page_source)) driver.close()
def download_tj(self): cursor = cursorConexao() driver = webdriver.Chrome(self.chromedriver) driver.get(self.link_inicial) time.sleep(5) driver.find_element_by_id(self.pesquisa_livre).send_keys('ementa') driver.find_element_by_id(self.botao_pesquisar).click() loop_counter = 0 while True: try: texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"','')) cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto)) driver.find_element_by_xpath(self.botao_proximoXP).click() time.sleep(2) except: if input('ajude-me'): pass driver.close()