def catcher(): gid = [ '1593460334', '618041857', '1431072159', '1317012264', '354728218', '355601818', '1342035615' ] datasets = [] date = now().date() r = requests.get('http://www.saude.pr.gov.br/sites/default/' 'arquivos_restritos/files/documento/2020-0{}/INFORME_EPIDEMIOLOGICO_{}.pdf' .format(date.month, format_date(0, date))) r.raise_for_status while not r.ok: date = previous_date(date) r = requests.get('http://www.saude.pr.gov.br/sites/default/' 'arquivos_restritos/files/documento/2020-0{}/INFORME_EPIDEMIOLOGICO_{}.pdf' .format(date.month, format_date(0, date))) r.raise_for_status for gids in gid: url = ("https://docs.google.com/spreadsheets/d/1mw17ZXJaRML5QKcZPACVE-" "j7gJoqyv-TnOyG5ZCKINM/export?gid={}&format=csv".format(gids)) dataset = pd.read_csv(url, encoding='utf-8', engine='python', error_bad_lines=False) dataset.insert(len(dataset.columns), "insert_date", now()) dataset.insert(len(dataset.columns), "data_boletim", date) datasets.append(dataset) return datasets
def catcher(date): df = pd.DataFrame() today = now() while date <= today: url = 'https://covid19-brazil-api.now.sh/api/report/v1/brazil/{}'.format( format_date(1, date)) content = get_api(url) df = df.append(content, ignore_index=True) date = next_date(date) df.insert(len(df.columns), "insert_date", now()) return df
def catcher(): url = 'https://covid19-brazil-api.now.sh/api/report/v1/countries' content = get_api(url) df = pd.DataFrame(content) df.insert(len(df.columns), "insert_date", now()) return df
def catcher(): data = ["confirmed", "deaths", "recovered"] dataset = pd.DataFrame() for word in data: url = ("https://data.humdata.org/hxlproxy/data/download/" "time_series_covid19_{}_global_narrow.csv?dest=data_edit&" "filter01=merge&merge-url01=https%3A%2F%2Fdocs.google." "com%2Fspreadsheets%2Fd%2Fe%2F2PACX-1vTglKQRXpkKSErDiWG6ycqEth" "32MY0reMuVGhaslImLjfuLU0EUgyyu2e-3vKDArjqGX7dXEBV8FJ4f%2Fpub" "%3Fgid%3D1326629740%26single%3Dtrue%26output%3Dcsv&merge-keys0" "1=%23country%2Bname&merge-tags01=%23country%2Bcode%2C%23region" "%2Bmain%2Bcode%2C%23region%2Bsub%2Bcode%2C%23region%" "2Bintermediate%2Bcode&filter02=merge&merge-url02=https%" "3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2Fe%2F2PACX-" "1vTglKQRXpkKSErDiWG6ycqEth32MY0reMuVGhaslImLjfuLU0EUgyyu2e-" "3vKDArjqGX7dXEBV8FJ4f%2Fpub%3Fgid%3D398158223%26single%" "3Dtrue%26output%3Dcsv&merge-keys02=%23adm1%2Bname&" "merge-tags02=%23country%2Bcode%2C%23region%2Bmain%2Bcode%2" "C%23region%2Bsub%2Bcode%2C%23region%2Bintermediate%2Bcode&" "merge-replace02=on&merge-overwrite02=on&filter03=explode&" "explode-header-att03=date&explode-value-att03=value&filter04" "=rename&rename-oldtag04=%23affected%2Bdate&rename-newtag04=" "%23date&rename-header04=Date&filter05=rename&rename-oldtag05=" "%23affected%2Bvalue&rename-newtag05=%23affected%2Binfected" "%2Bvalue%2Bnum&rename-header05=Value&filter06=clean&clean-date" "-tags06=%23date&filter07=sort&sort-tags07=%23date&sort" "-reverse07=on&filter08=sort&sort-tags08=%23country%2Bname%2C" "%23adm1%2Bname&tagger-match-all=on&tagger-default-tag=" "%23affected%2Blabel&tagger-01-header=province%2Fstate&tagger" "-01-tag=%23adm1%2Bname&tagger-02-header=country%2Fregion" "&tagger-02-tag=%23country%2Bname&tagger-03-header=lat&tagger" "-03-tag=%23geo%2Blat&tagger-04-header=long&tagger-04-tag=" "%23geo%2Blon&header-row=1&url=https%3A%2F%2Fraw" ".githubusercontent.com%2FCSSEGISandData%2FCOVID-19" "%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series" "%2Ftime_series_covid19_{}_global.csv".format(word, word)) if word == "confirmed": temp_dataset = pd.read_csv( url, encoding='ISO-8859-1', engine='python', error_bad_lines=False, usecols=["Country/Region", "Date", "Value"]) else: temp_dataset = pd.read_csv(url, encoding='ISO-8859-1', engine='python', error_bad_lines=False, usecols=["Value"]) temp_dataset = cleaner(temp_dataset, word) dataset = pd.concat([dataset, temp_dataset], axis=1) dataset = dataset.sort_values(by='Date', ascending=False) dataset.reset_index(drop=True, inplace=True) dataset.insert(len(dataset.columns), "insert_date", now()) return dataset
def catcher(): url = ("https://docs.google.com/spreadsheets/d/" "1MWQE3s4ef6dxJosyqvsFaV4fDyElxnBUB6gMGvs3rEc" "/export?gid=1503196283&format=csv") dataset = pd.read_csv(url, encoding='utf-8', engine='python', error_bad_lines=False) dataset = cleaner(dataset) dataset.insert(len(dataset.columns), "insert_date", now()) return dataset
def catcher(): url = ("https://raw.githubusercontent.com/wcota/" "covid19br/master/cases-brazil-cities-time.csv") dataset = pd.read_csv(url, encoding='utf-8', engine='python', error_bad_lines=False) dataset = cleaner(dataset) dataset.insert(len(dataset.columns), "insert_date", now()) return dataset
def catcher(): req = Request('https://data.brasil.io/dataset/covid19/caso_full.csv.gz', headers={ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu;Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0', 'Accept-Encoding': 'gzip' }) response = urlopen(req) content = gzip.decompress(response.read()) dataset = pd.read_csv(io.StringIO(content.decode('utf-8'))) dataset.insert(len(dataset.columns), "insert_date", now()) return dataset
def Brasilapi_mundo(self, data): table = table_class.Brasilapi_mundo insert = table(country=data[0], cases=data[1], confirmed=data[2], deaths=data[3], recovered=data[4], updated_at=format_date(1, data[5]), insert_date=now()) self.session.add(insert) self.session.commit() return
def Brasilapi_nacional(self, data): table = table_class.Brasilapi_nacional insert = table(uid=data[0], uf=data[1], state=data[2], cases=data[3], deaths=data[4], suspects=data[5], refuses=data[6], datetime=data[7], insert_date=now()) self.session.add(insert) self.session.commit() return
def job(): main.insert_all() return print("Dados inseridos com sucesso. Datetime {}".format(now()))