Exemplo n.º 1
0
def file_upload(request):
    if request.method == 'POST':
        form = UploadFileForm(request.POST, request.FILES)
        if form.is_valid():
            title = request.POST['title']
            name, postfix = title.split('.')
            # upload *.data
            handle_uploaded_file(request.FILES['file_content'], name, '.' + postfix)
            data = Data.create(name, request.POST['title'], "tmp/" + str(title), request.POST['sa_index'],
                               request.POST['qid_index'], request.POST['is_cat'], request.POST['task_cat'])
            data.save()
            return HttpResponseRedirect('/PPDP/upload_gh/' + str(data.id))

    else:
        form = UploadFileForm()
    return render(request, 'PPDP/upload.html', {'form': form})
Exemplo n.º 2
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(Province.nama_prov == "Sulawesi Selatan")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Sulawesi Selatan", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select()
            .join(Province)
            .where(fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result
    link = "https://covid19.sulselprov.go.id"
    output = {}
    output["result"] = []

    with requests.session() as s:

        r = s.get(link, verify=False)
        data = r.text
        # print(data)
        data = re.sub(r"<!--", "", data)
        data = re.sub(r"-->", "", data)
        url = soup(data, "lxml")

        title = url.find("h4", attrs={"class": "text-danger"}).text
        pos = str(title).rfind("-")
        _last_update = str(title)[pos + 1 :]

        table = url.find("table", attrs={"class": "table table-striped"})

        if table is not None:
            res = []
            table_rows = table.find_all("tr")

            num_rows = len(table_rows)
            # print(num_rows)

            i = 0

            for tr in table_rows:
                td = tr.find_all("td")
                row = [tr.text.strip() for tr in td if tr.text.strip()]
                # print(row)
                if i >= 1 and i < num_rows - 1:

                    list_item = {}
                    list_item["provinsi"] = "Sulawesi Selatan"
                    list_item["kode_kab_kota"] = "N/A"
                    list_item["kab_kota"] = row[1]
                    list_item["kecamatan"] = "N/A"
                    list_item["populasi"] = "N/A"
                    list_item["lat_kab_kota"] = "N/A"
                    list_item["long_kab_kota"] = "N/A"
                    list_item["n_odr"] = "N/A"
                    list_item["n_otg"] = "N/A"
                    list_item["n_odp"] = int(str(row[2]).rstrip())
                    list_item["n_pdp"] = int(str(row[3]).rstrip())
                    list_item["n_confirm"] = int(str(row[4]).rstrip())
                    list_item["n_meninggal"] = "N/A"
                    list_item["n_sembuh"] = "N/A"
                    list_item["last_update"] = _last_update
                    # print(list_item)
                    output["result"].append(list_item)

                    kabkota = KabupatenKota.select().where(
                        KabupatenKota.prov_id == propinsi, KabupatenKota.nama == row[1]
                    )

                    if kabkota.count() < 1:
                        kabkota = KabupatenKota.create(prov_id=propinsi, nama=row[1])
                    else:
                        kabkota = kabkota.get()

                    datum = Data.select().where(
                        Data.kabupaten == kabkota,
                        Data.last_update == dateparser.parse(_last_update),
                    )
                    if datum.count() < 1:
                        datum = Data.create(
                            kabupaten=kabkota,
                            n_odp=int(str(row[2]).rstrip()),
                            n_pdp=int(str(row[3]).rstrip()),
                            n_confirm=int(str(row[4]).rstrip()),
                            last_update=dateparser.parse(_last_update),
                        )
                i = i + 1

    return output
Exemplo n.º 3
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(
        Province.nama_prov == "Kalimantan Selatan")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Kalimantan Selatan", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select().join(Province).where(
                fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result

    link = "https://corona.kalselprov.go.id/cov_map"
    output = {}
    output["result"] = []
    with requests.session() as s:
        r = s.get(link, verify=True)
        data = r.text
        json_data = json.loads(data)
        # print(json_data)
        for data in json_data:
            list_item = {}
            list_item["provinsi"] = "Kalimantan Selatan"
            list_item["kode_kab_kota"] = data["code"]
            list_item["kab_kota"] = data["name"]
            list_item["kecamatan"] = "N/A"
            list_item["populasi"] = "N/A"
            list_item["lat_kab_kota"] = "N/A"
            list_item["long_kab_kota"] = "N/A"
            list_item["n_odr"] = "N/A"
            list_item["n_otg"] = "N/A"
            list_item["n_odp"] = data["cov_odp_count"]
            list_item["n_pdp"] = data["cov_pdp_count"]
            list_item["n_confirm"] = data["cov_positive_count"]
            list_item["n_meninggal"] = data["cov_died_count"]
            list_item["n_sembuh"] = data["cov_recovered_count"]
            list_item["last_update"] = "N/A"
            kabkota = KabupatenKota.select().where(
                KabupatenKota.prov_id == propinsi,
                KabupatenKota.nama == data["name"])

            if kabkota.count() < 1:
                kabkota = KabupatenKota.create(prov_id=propinsi,
                                               nama=data["name"],
                                               kode=data["code"])
            else:
                kabkota = kabkota.get()
            datum = Data.select().where(
                Data.kabupaten == kabkota,
                Data.last_update == datetime.datetime.now())
            if datum.count() < 1:
                datum = Data.create(
                    kabupaten=kabkota,
                    n_odp=data["cov_odp_count"],
                    n_pdp=data["cov_pdp_count"],
                    n_confirm=data["cov_positive_count"],
                    n_meninggal=data["cov_died_count"],
                    n_sembuh=data["cov_recovered_count"],
                    last_update=datetime.datetime.now(),
                )
            output["result"].append(list_item)

    return output
Exemplo n.º 4
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(Province.nama_prov == "Jawa Tengah")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Jawa Tengah", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select().join(Province).where(
                fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result
    link = "https://corona.jatengprov.go.id/data"
    output = {}
    output["result"] = []
    with requests.session() as s:
        r = s.get(link, verify=True)
        tree = html.fromstring(r.text)
        _last_update = tree.xpath(
            "//section[5]/div/div/div[1]/div/p/text()")[0].strip()

        table_elem = tree.xpath("//section[5]/div/div/div[2]/div/div/table")[0]
        table_str = etree.tostring(table_elem)

        if table_str is not None and table_str != "":
            res = []

            df = pd.read_html(table_str)[0]
            df["positif"] = df["Positif: Sembuh"] + df[
                "Positif: Meninggal"] + df["Positif: Dirawat"]
            table = df.to_dict("records")

            for row in table:
                list_item = {}
                list_item["provinsi"] = "Jawa Tengah"
                list_item["kode_kab_kota"] = "N/A"
                list_item["kab_kota"] = row["Kabupaten/Kota"]
                list_item["kecamatan"] = "N/A"
                list_item["populasi"] = "N/A"
                list_item["lat_kab_kota"] = "N/A"
                list_item["long_kab_kota"] = "N/A"
                list_item["n_odr"] = "N/A"
                list_item["n_otg"] = "N/A"
                list_item["n_odp"] = row["ODP: Proses"]
                list_item["n_pdp"] = row["PDP: Dirawat"]
                list_item["n_confirm"] = row["positif"]
                list_item["n_meninggal"] = row["Positif: Meninggal"]
                list_item["n_sembuh"] = row["Positif: Sembuh"]
                list_item["last_update"] = _last_update

                kabkota = KabupatenKota.select().where(
                    KabupatenKota.prov_id == propinsi,
                    KabupatenKota.nama == list_item["kab_kota"],
                )

                if kabkota.count() < 1:
                    kabkota = KabupatenKota.create(prov_id=propinsi,
                                                   nama=list_item["kab_kota"])
                else:
                    kabkota = kabkota.get()

                datum = Data.select().where(
                    Data.kabupaten == kabkota,
                    Data.last_update == dateparser.parse(_last_update),
                )
                if datum.count() < 1:
                    datum = Data.create(
                        kabupaten=kabkota,
                        n_pdp=list_item["n_pdp"],
                        n_confirm=list_item["n_confirm"],
                        n_meninggal=list_item["n_meninggal"],
                        n_sembuh=list_item["n_sembuh"],
                        last_update=dateparser.parse(_last_update),
                    )
                output["result"].append(list_item)

    return output
Exemplo n.º 5
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(Province.nama_prov == "Aceh")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Aceh", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select().join(Province).where(
                fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result

    link = "https://covid.bravo.siat.web.id/json/peta"
    output = {}
    output["result"] = []
    with requests.session() as s:
        r = s.get(link, verify=True)
        data = r.text
        json_data = json.loads(data)
        # print(json_data)
        for data in json_data:
            list_item = {}
            list_item["provinsi"] = "Aceh"
            list_item["kode_kab_kota"] = "N/A"
            list_item["kab_kota"] = data["namaKabupaten"]
            list_item["kecamatan"] = "N/A"
            list_item["populasi"] = "N/A"
            list_item["lat_kab_kota"] = data["latitude"]
            list_item["long_kab_kota"] = data["longitude"]
            list_item["n_odr"] = "N/A"
            list_item["n_otg"] = "N/A"
            list_item["n_odp"] = data["odp"]
            list_item["n_pdp"] = data["pdp"]
            list_item["n_confirm"] = data["positif"]
            list_item["n_meninggal"] = data["positifMeninggal"]
            list_item["n_sembuh"] = data["positifSembuh"]
            list_item["last_update"] = data["updateDate"]

            kabkota = KabupatenKota.select().where(
                KabupatenKota.prov_id == propinsi,
                KabupatenKota.nama == data["namaKabupaten"],
            )

            if kabkota.count() < 1:
                kabkota = KabupatenKota.create(
                    prov_id=propinsi,
                    nama=data["namaKabupaten"],
                    lat=data["latitude"],
                    lon=data["longitude"],
                )
            else:
                kabkota = kabkota.get()
            datum = Data.select().where(
                Data.kabupaten == kabkota,
                Data.last_update == dateparser.parse(data["updateDate"]),
            )
            if datum.count() < 1:
                datum = Data.create(
                    kabupaten=kabkota,
                    n_odp=data["odp"],
                    n_pdp=data["pdp"],
                    n_confirm=data["positif"],
                    n_meninggal=data["positifMeninggal"],
                    n_sembuh=data["positifSembuh"],
                    last_update=dateparser.parse(data["updateDate"]),
                )
            output["result"].append(list_item)

    return output
Exemplo n.º 6
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(
        Province.nama_prov == "Daerah Istimewa Yogyakarta"
    )
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Daerah Istimewa Yogyakarta", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select()
            .join(Province)
            .where(fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result
    # konfigurasi chromedriver
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--window-size=1420,1080")
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")

    browser = webdriver.Chrome(chrome_options=chrome_options)

    hidden = "/html/body/div[2]/div[2]/div/div/form/input[1]"
    kodepos = '//*[@id="fname"]'
    button = "/html/body/div[2]/div[2]/div/div/form/button"
    
    directory = Path().absolute()
    
    kodepos_df = pd.read_csv(str(directory)+"/data/Data_KodePos_Kecamatan_DIY.csv", delimiter=";")

    output = {}
    output["result"] = []
    for index, row in kodepos_df.iterrows():
        # konfigurasi base URL
        link = "https://sebaran-covid19.jogjaprov.go.id/kodepos"
        browser.get(link)
        kode_pos = str(row["kode_pos"])

        e = browser.find_element_by_xpath(hidden).get_attribute("value")
        e = browser.find_element_by_xpath(kodepos)
        e.send_keys(kode_pos)
        e = browser.find_element_by_xpath(button)
        e.click()
        # time.sleep(5)

        data = browser.page_source
        # print(data)
        url = soup(data, "lxml")

        odp = url.find("b", {"id": "odp"})
        pdp = url.find("b", {"id": "pdp"})
        positif = url.find("b", {"id": "positif"})
        last_update_blok = url.find("div", {"class": "dataupdate"})
        populasi = url.find("b", {"id": "populasi"})
        if populasi is None:
            populasi = url.find("strong", {"id": "populasi"})

        for item in last_update_blok.contents:
            if item.name == "p":
                if item.has_attr("style") == False:
                    _last_update = item.text.replace("Data Update ", "").rstrip()

        list_item = {}
        list_item["provinsi"] = "Daerah Istimewa Yogyakarta"

        list_item["kode_kab_kota"] = str(row["kode_wilayah"])
        list_item["kab_kota"] = str(row["kabupaten_kota"])
        list_item["kecamatan"] = str(row["nama_kecamatan"])
        list_item["populasi"] = str(populasi.text).rstrip()
        list_item["lat_kab_kota"] = "N/A"
        list_item["long_kab_kota"] = "N/A"
        list_item["n_odr"] = "N/A"
        list_item["n_otg"] = "N/A"
        list_item["n_odp"] = int(str(odp.text).rstrip())
        list_item["n_pdp"] = int(str(pdp.text).rstrip())
        list_item["n_confirm"] = int(str(positif.text).rstrip())
        list_item["n_meninggal"] = "N/A"
        list_item["n_sembuh"] = "N/A"
        list_item["last_update"] = _last_update

        kabkota = KabupatenKota.select().where(
            KabupatenKota.prov_id == propinsi,
            KabupatenKota.nama == str(row["kabupaten_kota"]),
        )

        if kabkota.count() < 1:
            kabkota = KabupatenKota.create(
                prov_id=propinsi,
                nama=str(row["kabupaten_kota"]),
                kode=str(row["kode_wilayah"]),
            )
        else:
            kabkota = kabkota.get()
        datum = Data.select().where(
            Data.kabupaten == kabkota,
            Data.last_update == dateparser.parse(_last_update),
        )
        if datum.count() < 1:
            datum = Data.create(
                kabupaten=kabkota,
                n_odp=int(str(odp.text).rstrip()),
                n_pdp=int(str(pdp.text).rstrip()),
                n_confirm=int(str(positif.text).rstrip()),
                last_update=dateparser.parse(_last_update),
            )
        output["result"].append(list_item)
    browser.stop_client()
    browser.close()
    browser.quit()

    return output
Exemplo n.º 7
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(Province.nama_prov == "Bali")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Bali", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select().join(Province).where(
                fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result
    link = "https://pendataan.baliprov.go.id/"
    output = {}
    output["result"] = []
    with requests.session() as s:

        r = s.get(link, verify=True)
        data = r.text
        url = soup(data, "lxml")

        con = url.find_all("div", attrs={"card-header"})
        title = con[6].find("h3").text
        pos = str(title).rfind("Dengan ")
        _last_update = str(title)[pos + 7:]

        table = url.find("table", attrs={"class": "table"})
        # print(table)

        if table is not None:
            res = []
            table_rows = table.find_all("tr")
            num_rows = len(table_rows)

            i = 0
            for tr in table_rows:
                td = tr.find_all("td")
                row = [tr.text.strip() for tr in td if tr.text.strip()]
                # print(row)
                if i >= 1 and i < num_rows - 1:
                    if row:
                        list_item = {}
                        list_item["provinsi"] = "Bali"
                        list_item["kode_kab_kota"] = "N/A"
                        list_item["kab_kota"] = row[0]
                        list_item["kecamatan"] = "N/A"
                        list_item["populasi"] = "N/A"
                        list_item["lat_kab_kota"] = "N/A"
                        list_item["long_kab_kota"] = "N/A"
                        list_item["n_odr"] = "N/A"
                        list_item["n_otg"] = "N/A"
                        list_item["n_odp"] = "N/A"
                        list_item["n_pdp"] = int(str(row[7]).rstrip())
                        list_item["n_confirm"] = int(str(row[6]).rstrip())
                        list_item["n_meninggal"] = int(str(row[9]).rstrip())
                        list_item["n_sembuh"] = int(str(row[8]).rstrip())
                        list_item["last_update"] = _last_update
                        # print(list_item)
                        kabkota = KabupatenKota.select().where(
                            KabupatenKota.prov_id == propinsi,
                            KabupatenKota.nama == row[0],
                        )

                        if kabkota.count() < 1:
                            kabkota = KabupatenKota.create(prov_id=propinsi,
                                                           nama=row[0])
                        else:
                            kabkota = kabkota.get()

                        datum = Data.select().where(
                            Data.kabupaten == kabkota,
                            Data.last_update == dateparser.parse(_last_update),
                        )
                        if datum.count() < 1:
                            datum = Data.create(
                                kabupaten=kabkota,
                                n_pdp=int(str(row[7]).rstrip()),
                                n_confirm=int(str(row[6]).rstrip()),
                                n_meninggal=int(str(row[9]).rstrip()),
                                n_sembuh=int(str(row[8]).rstrip()),
                                last_update=dateparser.parse(_last_update),
                            )
                        output["result"].append(list_item)
                i = i + 1
    return output
Exemplo n.º 8
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(Province.nama_prov == "Banten")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Banten", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select().join(Province).where(
                fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result
    link = "https://infocorona.bantenprov.go.id/"
    output = {}
    output["result"] = []
    with requests.session() as s:
        r = s.get(link, verify=True)
        data = r.text
        url = soup(data, "lxml")

        script = url.find_all("script")
        json_data = ""
        for item in script:
            if re.search(r"pieSeries.data\s\=\s(.*)\;", str(item)):
                var_data = re.findall(r"pieSeries.data\s\=\s(.*)\;", str(item))
                json_data = json.loads(str(var_data[0]))

        for data in json_data:
            list_item = {}
            list_item["provinsi"] = "Banten"
            list_item["kode_kab_kota"] = "N/A"
            list_item["kab_kota"] = data["title"]
            list_item["kecamatan"] = "N/A"
            list_item["populasi"] = "N/A"
            list_item["lat_kab_kota"] = data["latitude"]
            list_item["long_kab_kota"] = data["longitude"]
            list_item["n_odr"] = "N/A"
            list_item["n_otg"] = "N/A"
            list_item["n_odp"] = data["pieData"][0]["value"]
            list_item["n_pdp"] = data["pieData"][1]["value"]
            list_item["n_confirm"] = data["pieData"][2]["value"]
            list_item["n_meninggal"] = "N/A"
            list_item["n_sembuh"] = "N/A"
            list_item["last_update"] = "N/A"

            kabkota = KabupatenKota.select().where(
                KabupatenKota.prov_id == propinsi,
                KabupatenKota.nama == data["title"])

            if kabkota.count() < 1:
                kabkota = KabupatenKota.create(
                    prov_id=propinsi,
                    nama=data["title"],
                    lat=data["latitude"],
                    lon=data["longitude"],
                    populasi="",
                )
            else:
                kabkota = kabkota.get()

            datum = Data.select().where(
                Data.kabupaten == kabkota,
                Data.last_update == datetime.datetime.now())
            if datum.count() < 1:
                datum = Data.create(
                    kabupaten=kabkota,
                    n_odp=data["pieData"][0]["value"],
                    n_pdp=data["pieData"][1]["value"],
                    n_confirm=data["pieData"][2]["value"],
                    last_update=datetime.datetime.now(),
                )

            output["result"].append(list_item)

    return output
Exemplo n.º 9
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(
        Province.nama_prov == "Nusa Tenggara Barat")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Nusa Tenggara Barat", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select().join(Province).where(
                fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result

    headers = {
        "accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "accept-encoding":
        "gzip, deflate, br",
        "accept-language":
        "en-US,en;q=0.9",
        "cache-control":
        "max-age=0",
        "cookie":
        "XSRF-TOKEN=eyJpdiI6IjJGSjNJWmxJS1AzNExHQ1poVDZPeWc9PSIsInZhbHVlIjoiTVEvWTVSWHZJSUtyY0RaalFPa2tzZW1hWmJYN0ZucGtEMXFtNGRRN3RLQXloVkxwNC90VEZMZHozYk1kV1cvLyIsIm1hYyI6ImVlM2NjOTg4YTA2YzMxZjllZGE3MGM0Njk1YTJmZGU1Nzc3ZGE4MmM1MWRlNTg4YWFjZWQ4MWQxZmUzMzkyNzEifQ%3D%3D; laravel_session=eyJpdiI6InN3a2JkdGJPcWMvNmVxbmxBZGxCK2c9PSIsInZhbHVlIjoiM1dwZmdmUHdNY3RwWG9oVXJqM2dYQmZSWnlEakY3TkVNZ2Mra21RY3hLN3V0UGMwQWxVbzhSbU5NNjR0aHdyeiIsIm1hYyI6ImQxNzYyMWI2MjhkMDRlYTY1Mjc4NDFhMTRkMzZiNDliNjdkY2NiNDkxZTY1NTRjZTIxZGVjZGE1YjkzZmUyZWYifQ%3D%3D",
        "referer":
        "https://corona.ntbprov.go.id/",
        "sec-fetch-dest":
        "document",
        "sec-fetch-mode":
        "navigate",
        "sec-fetch-site":
        "same-origin",
        "sec-fetch-user":
        "******",
        "upgrade-insecure-requests":
        "1",
        "user-agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
    }

    link = "https://corona.ntbprov.go.id/list-data"
    output = {}
    output["result"] = []
    with requests.session() as s:

        r = s.get(link, verify=False, headers=headers)
        data = r.text
        url = soup(data, "lxml")

        table = url.find("table",
                         attrs={"class": "table table-bordered table-striped"})
        # print(table)

        if table is not None:
            res = []

            th = table.find("th")
            info_date = th.text.replace("\n", "").replace("  ", "")
            pos_l = info_date.find(",")
            pos_r = info_date.rfind("Pukul")
            _last_update = info_date[pos_l + 1:pos_r]

            table_rows = table.find_all("tr")
            num_rows = len(table_rows)

            i = 0
            for tr in table_rows:
                td = tr.find_all("td")
                row = [tr.text.strip() for tr in td if tr.text.strip()]
                # print(row)
                if i >= 1 and i < num_rows - 1:
                    if row:
                        list_item = {}
                        list_item["provinsi"] = "Nusa Tenggara Barat"
                        list_item["kode_kab_kota"] = "N/A"
                        list_item["kab_kota"] = (str(row[0]).replace(
                            "\n", "").replace("               ", " "))
                        list_item["kecamatan"] = "N/A"
                        list_item["populasi"] = "N/A"
                        list_item["lat_kab_kota"] = "N/A"
                        list_item["long_kab_kota"] = "N/A"
                        list_item["n_odr"] = "N/A"
                        list_item["n_otg"] = int(str(row[5]).rstrip())
                        list_item["n_odp"] = int(str(row[8]).rstrip())
                        list_item["n_pdp"] = int(str(row[11]).rstrip())
                        list_item["n_confirm"] = int(str(row[14]).rstrip())
                        list_item["n_meninggal"] = int(str(row[16]).rstrip())
                        list_item["n_sembuh"] = int(str(row[17]).rstrip())
                        list_item["last_update"] = _last_update
                        # print(list_item)
                        kabkota = KabupatenKota.select().where(
                            KabupatenKota.prov_id == propinsi,
                            KabupatenKota.nama == row[1],
                        )

                        if kabkota.count() < 1:
                            kabkota = KabupatenKota.create(
                                prov_id=propinsi,
                                nama=str(row[0]).replace("\n", "").replace(
                                    "               ", " "),
                            )
                        else:
                            kabkota = kabkota.get()

                        datum = Data.select().where(
                            Data.kabupaten == kabkota,
                            Data.last_update == dateparser.parse(_last_update),
                        )
                        if datum.count() < 1:
                            datum = Data.create(
                                kabupaten=kabkota,
                                n_otg=int(str(row[5]).rstrip()),
                                n_odp=int(str(row[8]).rstrip()),
                                n_pdp=int(str(row[11]).rstrip()),
                                n_confirm=int(str(row[14]).rstrip()),
                                n_meninggal=int(str(row[16]).rstrip()),
                                n_sembuh=int(str(row[17]).rstrip()),
                                last_update=dateparser.parse(_last_update),
                            )
                        output["result"].append(list_item)
                i = i + 1
    return output
Exemplo n.º 10
0
def scrape():
    prov = __name__.split(".")[-1]
    propinsi = Province.select().where(Province.nama_prov == "Sulawesi Barat")
    if propinsi.count() < 1:
        propinsi = Province.create(nama_prov="Sulawesi Barat", alias=prov)
    else:
        propinsi = propinsi.get()
    sekarang = datetime.datetime.now().date()
    try:
        result = list(
            Data.select().join(Province).where(
                fn.date_trunc("day", Data.last_update) == sekarang),
            Province.alias == prov,
        )
    except:
        result = []
    if len(result) > 0:
        return result
    link = "https://covid19.sulbarprov.go.id/utama/data"
    output = {}
    output["result"] = []
    with requests.session() as s:

        r = s.get(link, verify=True)
        data = r.text
        url = soup(data, "lxml")

        table = url.find("table", attrs={"class": "table-responsive"})
        # print(table)

        if table is not None:
            res = []
            table_rows = table.find_all("tr")
            num_rows = len(table_rows)

            i = 0
            for tr in table_rows:
                td = tr.find_all("td")
                row = [tr.text.strip() for tr in td if tr.text.strip()]
                # print(row)
                if i >= 1 and i < num_rows - 1:
                    if row:
                        list_item = {}
                        list_item["provinsi"] = "Sulawesi Barat"
                        list_item["kode_kab_kota"] = "N/A"
                        list_item["kab_kota"] = str(row[1]).rstrip()
                        list_item["kecamatan"] = "N/A"
                        list_item["populasi"] = "N/A"
                        list_item["lat_kab_kota"] = "N/A"
                        list_item["long_kab_kota"] = "N/A"
                        list_item["n_odr"] = "N/A"
                        list_item["n_otg"] = int(str(row[6]).rstrip())
                        list_item["n_odp"] = int(str(row[2]).rstrip())
                        list_item["n_pdp"] = int(str(row[10]).rstrip())
                        list_item["n_confirm"] = int(str(row[14]).rstrip())
                        list_item["n_meninggal"] = (
                            int(str(row[5]).rstrip()) +
                            int(str(row[9]).rstrip()) +
                            int(str(row[12]).rstrip()) +
                            int(str(row[18]).rstrip()))
                        list_item["n_sembuh"] = int(str(row[17]).rstrip())
                        list_item["last_update"] = "N/A"
                        # print(list_item)
                        kabkota = KabupatenKota.select().where(
                            KabupatenKota.prov_id == propinsi,
                            KabupatenKota.nama == row[1],
                        )

                        if kabkota.count() < 1:
                            kabkota = KabupatenKota.create(prov_id=propinsi,
                                                           nama=row[1])
                        else:
                            kabkota = kabkota.get()

                        datum = Data.select().where(
                            Data.kabupaten == kabkota,
                            Data.last_update == datetime.datetime.now(),
                        )
                        if datum.count() < 1:
                            datum = Data.create(
                                kabupaten=kabkota,
                                n_otg=int(str(row[6]).rstrip()),
                                n_odp=int(str(row[2]).rstrip()),
                                n_pdp=int(str(row[10]).rstrip()),
                                n_confirm=int(str(row[14]).rstrip()),
                                n_meninggal=int(str(row[5]).rstrip()) +
                                int(str(row[9]).rstrip()) +
                                int(str(row[12]).rstrip()) +
                                int(str(row[18]).rstrip()),
                                n_sembuh=int(str(row[17]).rstrip()),
                                last_update=datetime.datetime.now(),
                            )
                        output["result"].append(list_item)
                i = i + 1
    return output