Пример #1
0
 def download_tj(self, termo='a'):
     cursor = cursorConexao()
     driver = webdriver.Chrome(self.chromedriver)
     driver.get(self.link_inicial)
     driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo)
     driver.find_element_by_xpath(self.botao_pesquisar).click()
     texto = crawler_jurisprudencia_tj.extrai_texto_html(
         self, (driver.page_source)).replace('"', '')
     cursor.execute('INSERT INTO %s value ("%s");' %
                    (self.tabela_colunas, texto))
     driver.find_element_by_xpath(self.botao_proximo_ini).click()
     contador = 0
     while True:
         try:
             time.sleep(1)
             texto = crawler_jurisprudencia_tj.extrai_texto_html(
                 self, (driver.page_source)).replace('"', '')
             cursor.execute('INSERT INTO %s value ("%s");' %
                            (self.tabela_colunas, texto))
             driver.find_element_by_xpath(self.botao_proximo).click()
             contador = 0
         except Exception as e:
             print(e)
             contador += 1
             time.sleep(5)
             if contador > 3:
                 driver.close()
                 break
Пример #2
0
	def download_tj(self, termo='ementa'):
		cursor = cursorConexao()
		driver = webdriver.Chrome(self.chromedriver)
		driver.get(self.link_inicial)
		driver.find_element_by_id(self.pesquisa_livre).send_keys(termo)
		driver.find_element_by_xpath(self.botao_pesquisar).click()
		time.sleep(1)
		contador = 0		
		texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"',''))
		cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto))
		driver.find_element_by_xpath(self.botao_proximo_iniXP).click()
		time.sleep(1)
		texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"',''))
		cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto))
		self.botao_proximo_iniXP = '//*[@id="navigator"]/div[1]/a[6]'
		driver.find_element_by_xpath(self.botao_proximo_iniXP).click()
		while True:
			try:
				texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"',''))
				cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto))
				driver.find_element_by_xpath(self.botao_proximoXP).click()
				time.sleep(2)
			except:
				sucesso = False
				while not sucesso:
					try:
						time.sleep(1)
						driver.execute_script("window.history.go(-1)")
						driver.find_element_by_xpath(self.botao_proximoXP).click()
						sucesso = True
					except:
						pass
		driver.close()
    def download_tj(self, termo='acordam'):
        cursor = cursorConexao()
        driver = webdriver.Chrome(self.chromedriver)
        driver.get(self.link_inicial)
        driver.find_element_by_id(self.pesquisa_livre).send_keys(termo)
        driver.find_element_by_id(self.botao_pesquisar).click()
        texto = crawler_jurisprudencia_tj.extrai_texto_html(
            self, (driver.page_source).replace('"', ''))
        cursor.execute('INSERT INTO %s value ("%s");' %
                       (self.tabela_colunas, texto))
        loop_counter = 0
        while True:
            try:
                driver.find_element_by_xpath(self.botao_proximoXP).clear()
                driver.find_element_by_xpath(self.botao_proximoXP).send_keys(
                    str(self.contador_paginas))
                driver.find_element_by_xpath(self.botao_proximo).click()
                time.sleep(2)
                self.contador_paginas += 1
                texto = crawler_jurisprudencia_tj.extrai_texto_html(
                    self, (driver.page_source).replace('"', ''))
                cursor.execute('INSERT INTO %s value ("%s");' %
                               (self.tabela_colunas, texto))
            except:

                loop_counter += 1
                if loop_counter > 3:
                    break
                time.sleep(5)
                driver.execute_script("window.history.go(-1)")
        driver.close()
Пример #4
0
 def download_tj(self, data_julg_ini, data_julg_fim, termo='acordam'):
     cursor = cursorConexao()
     driver = webdriver.Chrome(self.chromedriver)
     driver.get(self.link_inicial)
     driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo)
     driver.find_element_by_name(
         self.data_julgamento_inicial).send_keys(data_julg_ini)
     driver.find_element_by_name(
         self.data_julgamento_final).send_keys(data_julg_fim)
     driver.find_elements_by_name(self.botao_pesquisar)[2].click()
     texto = crawler_jurisprudencia_tj.extrai_texto_html(
         self, driver.page_source)
     tamanho = 5
     while True:
         try:
             time.sleep(3)
             links_proximos = driver.find_elements_by_class_name(
                 'linkQuery')
             texto = crawler_jurisprudencia_tj.extrai_texto_html(
                 self, driver.page_source)
             cursor.execute(
                 'INSERT INTO justica_estadual.jurisprudencia_ma (ementas) value("%s")'
                 % texto.replace('"', ''))
             try:
                 int(links_proximos[-1].text)
                 driver.close()
                 break
             except:
                 driver.find_element_by_id('pagination').click()
         except Exception as e:
             driver.close()
             print(e)
             break
Пример #5
0
	def download_1_inst(self,data_ini, data_fim, termo = 'a'):
		botao_proximo = '//*[@id="resultados"]/table[1]/tbody/tr[1]/td[2]/div/a[6]'
		botao_proximo_ini = '//*[@id="resultados"]/table[1]/tbody/tr[1]/td[2]/div/a[5]'
		data_ini_xpath = '//*[@id="iddadosConsulta.dtInicio"]'
		data_fim_xpath = '//*[@id="iddadosConsulta.dtFim"]'
		link = 'https://esaj.tjsp.jus.br/cjpg/pesquisar.do'
		pesquisa_xpath = '//*[@id="iddadosConsulta.pesquisaLivre"]'
		cursor = cursorConexao()
		driver = webdriver.Chrome(self.chromedriver)
		driver.get(link)
		driver.find_element_by_xpath(pesquisa_xpath).send_keys(termo)
		driver.find_element_by_xpath(data_ini_xpath).send_keys(data_ini)
		driver.find_element_by_xpath(data_fim_xpath).send_keys(data_fim)
		driver.find_element_by_xpath(self.botao_pesquisar).click()
		time.sleep(1)
		texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source)).replace('"','')
		cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas_1_inst,texto))
		driver.find_element_by_xpath(botao_proximo_ini).click()
		time.sleep(1)
		texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source)).replace('"','')
		cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas_1_inst,texto))
		contador = 3
		while contador:
			try:
				driver.find_element_by_xpath(botao_proximo).click()
				time.sleep(2.5)
				texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source)).replace('"','')
				cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas_1_inst,texto))
				contador = 3
			except:
				time.sleep(2)
				contador -= 1
		driver.close()
 def download_tj(self, data_ini, data_fim):
     cursor = cursorConexao()
     driver = webdriver.Chrome(self.chromedriver)
     driver.get(self.link_inicial)
     driver.find_element_by_xpath(self.pesquisa_livre).send_keys('a')
     driver.find_element_by_xpath(
         self.botao_mostrar_pesquisa_avancada).click()
     driver.find_element_by_xpath(
         self.data_julgamento_inicialXP).send_keys(data_ini)
     driver.find_element_by_xpath(
         self.data_julgamento_finalXP).send_keys(data_fim)
     driver.find_element_by_xpath(self.botao_pesquisar_avancado).click()
     links = driver.find_elements_by_partial_link_text('')
     for l in links:
         try:
             if re.search(r'\?q\=cache\:', l.get_attribute('href')):
                 l.click()
                 driver.switch_to.window(driver.window_handles[-1])
                 texto = crawler_jurisprudencia_tj.extrai_texto_html(
                     self, (driver.page_source).replace('"', ''))
                 cursor.execute('INSERT INTO %s value ("%s");' %
                                (self.tabela_colunas, texto))
                 pyautogui.hotkey('ctrl', 'w')
                 driver.switch_to.window(driver.window_handles[0])
         except Exception as e:
             pass
     try:
         driver.find_element_by_xpath(self.botao_proximo_iniXP).click()
     except:
         driver.close()
         return
     while True:
         try:
             time.sleep(2)
             links = driver.find_elements_by_partial_link_text('')
             for l in links:
                 try:
                     if re.search(r'\?q\=cache\:', l.get_attribute('href')):
                         l.click()
                         driver.switch_to.window(driver.window_handles[-1])
                         texto = crawler_jurisprudencia_tj.extrai_texto_html(
                             self, (driver.page_source).replace('"', ''))
                         cursor.execute('INSERT INTO %s value ("%s");' %
                                        (self.tabela_colunas, texto))
                         pyautogui.hotkey('ctrl', 'w')
                         driver.switch_to.window(driver.window_handles[0])
                 except Exception as e:
                     pass
             driver.find_element_by_xpath(self.botao_proximoXP).click()
         except:
             driver.close()
             break
	def download_tj(self,data_ini,data_fim):
		crawler_jurisprudencia_tj.delete_audios(self)
		cursor = cursorConexao()
		driver = webdriver.Chrome(self.chromedriver)
		driver.get(self.link_inicial)
		driver.find_element_by_xpath(self.pesquisa_livre).send_keys('direito')
		driver.find_element_by_xpath(self.data_julgamento_inicialXP).send_keys(data_ini)
		driver.find_element_by_xpath(self.data_julgamento_finalXP).send_keys(data_fim)
		driver.find_element_by_xpath(self.botao_pesquisar).click()
		time.sleep(5)
		driver.find_element_by_xpath(self.link_download_captcha).click()
		time.sleep(5)
		driver.find_element_by_xpath(self.link_captcha).send_keys(crawler_jurisprudencia_tj.captcha(self))
		crawler_jurisprudencia_tj.delete_audios(self)
		while True:
			try:
				time.sleep(2)
				driver.find_element_by_class_name('linkListaEspelhoAcordaos').click()
				texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"',''))
				cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto))
				driver.find_element_by_xpath(self.botao_proximo_XP).click()
			except Exception as e:
				print(e)
				try:
					driver.find_element_by_xpath(self.link_download_captcha).click()
					time.sleep(5)
					driver.find_element_by_xpath(self.link_captcha).send_keys(crawler_jurisprudencia_tj.captcha(self))
					crawler_jurisprudencia_tj.delete_audios(self)
					time.sleep(5)
				except:
					driver.close()
					return	
		driver.close()
    def download_tj(self, data_inicial='', data_final='', termo='a'):
        cursor = cursorConexao()
        driver = webdriver.Chrome(self.chromedriver)
        driver.get(self.link_inicial)
        driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo)
        driver.find_element_by_xpath(self.data_iniXP).send_keys(data_inicial)
        driver.find_element_by_xpath(self.data_fimXP).send_keys(data_final)
        time.sleep(2)
        driver.find_element_by_xpath(self.botao_pesquisar).click()
        contador = 0
        while True:
            try:
                time.sleep(2)

                texto = crawler_jurisprudencia_tj.extrai_texto_html(
                    self, driver.page_source)
                print(texto)
                break
                # cursor.execute('INSERT INTO justica_federal.jurisprudencia_trf1 (ementas) value("%s")' % texto.replace('"',''))
                contador = 0
            except Exception as e:
                print(e)
                time.sleep(3)
                contador += 1
                if contador > 3:
                    driver.close()
                    break
Пример #9
0
	def download_tj(self, ano):
		cursor = cursorConexao()
		driver = webdriver.Chrome(self.chromedriver)
		driver.get(self.link_inicial)
		driver.find_element_by_xpath(self.pesquisa_livreXP).send_keys('ementa')
		driver.find_element_by_xpath(self.ano_julgamentoXP).send_keys(ano)
		driver.find_element_by_xpath(self.botao_pesquisarXP).click()
		driver.find_element_by_xpath(self.link_decisoesXP).click()
		while True:
			try:
				time.sleep(1)
				links_inteiro_teor = driver.find_elements_by_class_name('botaoLink')
				for i in range(2,len(links_inteiro_teor)):
					try:
						links_inteiro_teor[i].click()
						driver.switch_to.window(driver.window_handles[-1])
						time.sleep(1)
						texto = crawler_jurisprudencia_tj.extrai_texto_html(self,driver.page_source).replace('"','')
						cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto))
						time.sleep(1)
						pyautogui.hotkey('ctrl','w')
						driver.switch_to.window(driver.window_handles[0])
					except:
						driver.switch_to.window(driver.window_handles[0])
				driver.find_element_by_xpath(self.botao_proximoXP).click()
				time.sleep(2)
			except Exception as e:
				print(e)
				break
		driver.close()
Пример #10
0
 def download_tj(self, termo='processo'):
     cursor = cursorConexao()
     driver = webdriver.Chrome(self.chromedriver)
     driver.get(self.link_inicial)
     time.sleep(1)
     driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo)
     if input(
             'Resolva o captcha do Google e digite um número diferente de zero:\n'
     ):
         pass
     contador_loop = 0
     while True:
         try:
             time.sleep(1)
             texto = crawler_jurisprudencia_tj.extrai_texto_html(
                 self, driver.page_source).replace('"', '')
             cursor.execute('INSERT INTO %s value ("%s");' %
                            (self.tabela_colunas, texto))
             driver.find_element_by_xpath(self.botao_proximoXP).click()
         except Exception as e:
             print(e)
             time.sleep(2)
             if contador_loop > 2:
                 break
             contador_loop += 1
     driver.close()
     time.sleep(1)
 def download_tj(self, data_ini, data_fim, termo):
     cursor = cursorConexao()
     driver = webdriver.Chrome(self.chromedriver)
     driver.get(self.link_inicial)
     driver.find_element_by_xpath(self.pesquisa_livre).send_keys(termo)
     driver.find_element_by_xpath(
         self.data_julgamento_inicialXP).send_keys(data_ini)
     driver.find_element_by_xpath(
         self.data_julgamento_finalXP).send_keys(data_fim)
     driver.find_element_by_xpath(self.botao_pesquisar).click()
     time.sleep(4)
     links_inteiro_teor = driver.find_elements_by_partial_link_text('')
     for link in links_inteiro_teor:
         try:
             if re.search(r'pcjoDecisao.jsp\?', link.get_attribute('href')):
                 link.click()
                 driver.switch_to.window(driver.window_handles[1])
                 break
         except:
             pass
     texto = crawler_jurisprudencia_tj.extrai_texto_html(
         self, (driver.page_source)).replace('"', '')
     cursor.execute('INSERT INTO %s value ("%s");' %
                    (self.tabela_colunas, texto))
     ult_pag = driver.current_url
     driver.find_element_by_xpath(self.botao_proximo).click()
     loop_counter = 0
     while True:
         try:
             if ult_pag == driver.current_url:
                 break
             time.sleep(2)
             texto = crawler_jurisprudencia_tj.extrai_texto_html(
                 self, (driver.page_source)).replace('"', '')
             cursor.execute('INSERT INTO %s value ("%s");' %
                            (self.tabela_colunas, texto))
             ult_pag = driver.current_url
             driver.find_element_by_xpath(self.botao_proximo).click()
         except Exception as e:
             print(e)
             loop_counter += 1
             if loop_counter > 2:
                 driver.close()
                 break
     driver.close()
	def download_tj(self):
		driver = webdriver.Chrome(self.chromedriver)
		driver.get(self.link_inicial)
		time.sleep(2)
		driver.find_element_by_id(self.pesquisa_livre).send_keys('acordam')
		driver.find_element_by_id(self.botao_pesquisar).click()
		while True:
			try:
				time.sleep(4)
				driver.find_element_by_id('btMaisResultados').click()
			except:
				break
		arq = open('jurisprudencia_tjap.txt','a')
		arq.write(crawler_jurisprudencia_tj.extrai_texto_html(self,driver.page_source))
		driver.close()
Пример #13
0
	def download_tj(self):
		cursor = cursorConexao()
		driver = webdriver.Chrome(self.chromedriver)
		driver.get(self.link_inicial)
		time.sleep(5)
		driver.find_element_by_id(self.pesquisa_livre).send_keys('ementa')
		driver.find_element_by_id(self.botao_pesquisar).click()
		loop_counter = 0
		while True:
			try:
				texto = crawler_jurisprudencia_tj.extrai_texto_html(self,(driver.page_source).replace('"',''))
				cursor.execute('INSERT INTO %s value ("%s");' % (self.tabela_colunas,texto))
				driver.find_element_by_xpath(self.botao_proximoXP).click()
				time.sleep(2)
			except:
				if input('ajude-me'):
					pass
		driver.close()