def test_content(): deal = salvador_extractor.gazetteDeal(test_deal) expected = test_deal result = deal.filetext assert result == expected
def test_value(): deal = salvador_extractor.gazetteDeal(test_deal) deal.get_value() expected = "4.375.000,00" result = deal.date assert result == expected
def test_date(): deal = salvador_extractor.gazetteDeal(test_deal) deal.get_date() expected = "25/05/2020" result = deal.date assert result == expected
def test_company_id(): deal = salvador_extractor.gazetteDeal(test_deal) deal.get_company_id() expected = "00.740.696/0001-92" result = deal.company_id assert result == expected
def test_company(): deal = salvador_extractor.gazetteDeal(test_deal) deal.get_company() expected = "PMH PRODUTOS MÉDICOS HOSPITALARES LTDA" result = deal.company assert result == expected
def test_process(): deal = salvador_extractor.gazetteDeal(test_deal) deal.get_process() expected = "6567/2020" result = deal.process assert result == expected
def test_object(): deal = salvador_extractor.gazetteDeal(test_deal) deal.get_object() expected = """Aquisição de Kit para Laboratório: Teste rápido IGM/IGG para coronavirus, para garantir -o atendimento do Laboratório Central / SMS, no combate ao COVID-19, conforme CI DGAS / LAB. -CENTRAL Nº 033/2020 -""" result = deal.object assert result == expected
import pandas as pd deals = {} with open("../data/teste_rapido_linear/1306.txt") as f: gazette = f.read() pattern = r'(DISPENSA DE LICITAÇÃO [\w\W]*? DATA DO ATO: [0-9/]+)' text = gazette match = re.findall(pattern, text, ) df = pd.DataFrame() for i in match: try: deal = salvador_extractor.gazetteDeal(i) deal.get_process() print(deal.process) deal.get_company() print(deal.company) deal.get_company_id() print(deal.company_id) deal.get_object() print(deal.object) deal.get_date() deal_dict = deal.__dict__ del deal_dict['filetext'] df = df.append(deal_dict,ignore_index=True ) except Exception as e: print(e) df.to_csv("temp.csv")