def getDecisoes(cls): try: return cls.collection.find() except Exception as e: createLog('error', e) return None
def getNews(cls): try: result = cls.db.news.find() return result except Exception as e: createLog('error', e) return None
def createCSV(file, headers, data): try: output = csv.writer(open(file,'w')) except Exception as e: createLog('error', e) return False output.writerow(headers) for i in data: output.writerow(i)
def setNewsBody(cls, link, body): try: result = cls.db.news.update({"link": link}, {'$set': { 'body': body }}) return result except Exception as e: createLog('error', e) return False
def saveAtoProcurador(cls, data): if not isinstance(data, DTODorjAtosProcurador): createLog( 'error', "saveAtoProcurador recebeu objeto do tipo {}".format( type(data))) return False try: result = cls.collection.insert_one(data.__dict__) return result except Exception as e: createLog('error', e) return False
def saveDecisoes(cls, data): if not isinstance(data, DTODorjDecisoesConselho): createLog( 'error', "saveDecisoes recebeu objeto do tipo {}".format(type(data))) return False try: result = cls.collection.insert_one(data.__dict__) return result except Exception as e: createLog('error', e) return False
def saveNewsHeaders(cls, data): if not isinstance(data, DTOJornalJuridHeaders): createLog( 'error', "saveNewsHeaders recebeu objeto do tipo {}".format(type(data))) return False try: result = cls.db.news.insert_one(data.__dict__) return result except Exception as e: createLog('error', e) return False
def __init__(self, category, subcategory = None): try: url = "{}/{}/{}".format( JornalJurid.baseUrl, category, subcategory if subcategory else "" ) r = requests.get(url) if r.status_code == 404 or r.status_code == 500: createLog('error', 'O endereço {} não foi encontrado'.format(url)) return None except Exception as e: createLog('error', e) return None self.soup = BeautifulSoup(r.text, 'html.parser', from_encoding="ISO-8859-1")
def getDecisoes(self): """ Retorna lista com decisões tomadas pelo Conselho. """ anchor = self.soup.find_all(attrs={"class": "ft19"})[-1] if(len(anchor.get_text()) != 39): print("Não foi possível encontrar a ancora") return False lineBreakIndicators = [['ft119'],['ft120'], ['ft121'],['ft219'],['ft220'], ['ft221'], ['ft222'], ['ft319'] ,['ft320'], ['ft321'],['ft322']] currentEl = anchor.next_sibling.next_sibling currentPage = 1 DecisoesList = [] while currentEl: if "Processo" in currentEl.get_text() and "nº" in currentEl.get_text(): numberPos = currentEl.get_text().find('nº') number = currentEl.get_text()[numberPos+2:numberPos+16] DecisoesList.append(DTODorjDecisoesConselho(number, currentEl.get_text()[numberPos+17:])) createLog('debug', "Novo Processo || {}".format(currentEl.get_text())) elif currentEl['class'] in lineBreakIndicators: DecisoesList[-1].content += currentEl.get_text() createLog('debug', "Continuação || {}".format(currentEl.get_text())) currentEl = currentEl.next_sibling.next_sibling if currentEl == None: print("Final da página") currentPage += 1 page = self.soup.find(id="page{}-div".format(str(currentPage))) if page == None: print("Final do documento") return DecisoesList currentEl = page.find('p')
def getNewsBody(cls, link): """" Retornar o artigo completo a partir do link """ url = cls.baseUrl + link try: r = requests.get(url) if r.status_code == 404 or r.status_code == 500: createLog('error', 'O endereço {} não foi encontrado'.format(url)) return None except Exception as e: createLog('error', e) return None soup = BeautifulSoup(r.text, 'html.parser', from_encoding="ISO-8859-1") if soup.find(class_="text-article") != None: body = soup.find(class_="text-article").text return body elif soup.find(class_="header-view") != None: body = soup.find(class_="header-view").find('p').text return body
def getAtosProcurador(self): """ Retorna uma lista com os DTOs referentes aos Atos do Procurador-Geral. """ anchors = self.soup.find_all(attrs={"class": "ft110"}) for item in anchors: if "PROCURADOR-GERAL" in item.get_text() and len(item.get_text()) == 24: anchor = item print("Ancore found: " + item.get_text()) if anchor == None: raise(Exception("Atos do Procurador-Geral não encontrados")) currentEl = anchor.next_sibling.next_sibling currentData = None shouldContinue = True AtosList = [] lineBreakIndicators = [['ft116'], ['ft117'], ['ft118'],['ft119'] ,['ft120']] while shouldContinue: if(currentEl['class'] == ['ft110'] and len(currentEl.get_text()) == 13): currentType = "data" createLog('debug', currentType + '||' + currentEl.get_text()) currentData = datetime.strptime(currentEl.get_text()[-10:], "%d.%m.%Y") currentEl = currentEl.next_sibling.next_sibling elif(currentEl['class'] in lineBreakIndicators): currentType = "text" createLog('debug', currentType + '||' + currentEl.get_text()) AtosList[-1].content += currentEl.get_text() currentEl = currentEl.next_sibling.next_sibling elif(currentEl['class'] == ['ft13']): currentType = "newLine" createLog('debug', currentType + '||' + currentEl.get_text()) dto_rj = DTODorjAtosProcurador(currentData, currentEl.get_text()) AtosList.append(dto_rj) currentEl = currentEl.next_sibling.next_sibling else: # print("Elemento não se encaixa nas definições:" + str(currentEl)) shouldContinue = False return AtosList
def getAtosProcurador(cls): try: return cls.collection.find() except Exception as e: createLog('error', e) return None