def exists_dir_object():
    po = PathOperator()
    tmp_folder_name = 'huga'
    os.makedirs(tmp_folder_name)
    yield po, tmp_folder_name
    if (os.path.exists(tmp_folder_name)):
        os.removedirs(tmp_folder_name)
Пример #2
0
    def get_patient_dict(self, source_url, target_url, scr):
        # 発生状況等の取得
        url = scr.getTargetUrl(source_url, target_url)
        soup = scr.getContent(url)
        # テーブル情報を取得する
        try:
            dataset = scr.parseSingleTable(soup)
        except ValueError as e:
            raise e
        # pdf格納folderの作成
        path_op = PathOperator()
        path_op.create_path('pdf')

        # 県外事例は除外
        inside_checker = StringUtil()
        patient_data_tmp = []

        for data in dataset:
            if inside_checker.exclude_outside(data[0]):

                # 格納先のファイル名を作成
                file_name = path_op.set_downlaod_file_name(
                    'pdf', path_op.get_file_name(data[3]))
                scr.downloadPdf(data[3], file_name)
                output_file = os.path.splitext(os.path.basename(file_name))[0]
                output_path = os.path.join('./text', output_file + '.txt')
                is_duplicated, number_char = StringUtil(
                ).is_duplicate_data(data[0])

                # 重複している場合
                if is_duplicated:
                    for n in number_char:
                        patient_data_tmp.append(
                            {"No": n, "発生判明日": data[2].strip(), "link": output_path})

                # 単一の場合
                else:
                    patient_data_tmp.append(
                        {"No": number_char,  "発生判明日": data[2].strip(), "link": output_path})

        # convertの実行
        path_op.create_path('text')
        PdfParser.execute_pdf2text()

        # patientの作成
        for tmp in patient_data_tmp:
            # テキストからjsonの作成
            result = TextParser.text2dict(tmp['No'], tmp['link'])
            result.update(tmp)
            result.update({'退院': None})
            del result['link']
            self.patient_list.append(result)

        # Noに数字が入らない場合の処理(例:"再陽性")
        nan_patient_list = list(filter(lambda x: re.search(
            r'\d', x['No']) is None, self.patient_list))
        number_patient_list = list(filter(lambda x: re.search(
            r'\d', x['No']) is not None, self.patient_list))
        insert_patient_list = sorted(number_patient_list, key=lambda x: int(
            re.sub(r'県|内|例|目|第\d報', '', x['No'])))
        insert_patient_list.extend(nan_patient_list)
        self.patients['data'] = insert_patient_list

        # patients_summaryの作成
        self.create_patients_summary_dict(insert_patient_list)
        return self.patients, self.patients_summary_data
def create_dir_object():
    po = PathOperator()
    tmp_folder_name = 'hoge'
    yield po, tmp_folder_name
    if (os.path.exists(tmp_folder_name)):
        os.removedirs(tmp_folder_name)
def file_name_object():
    po = PathOperator()
    yield po
Пример #5
0
def getPatientDict(index_html, scr, update_datetime):
    # 発生状況等の取得
    url = scr.getTargetUrl(index_html, 'info_coronavirus_prevention.html')
    soup = scr.getContent(url)
    # テーブル情報を取得する
    try:
        dataset = scr.parseSingleTable(soup)
    except ValueError as e:
        raise e
    # pdf格納folderの作成
    path_op = PathOperator()
    path_op.create_path('pdf')

    # 県外事例は除外
    inside_checker = StringUtil()
    patient_data_tmp = []
    patients_summary_tmp = []
    for data in dataset:
        if inside_checker.exclude_outside(data[0]):

            # 格納先のファイル名を作成
            file_name = path_op.set_downlaod_file_name(
                'pdf', path_op.get_file_name(data[3]))
            scr.downloadPdf(data[3], file_name)
            output_file = os.path.splitext(os.path.basename(file_name))[0]
            output_path = os.path.join('./text', output_file + '.txt')
            is_duplicated, number_char = StringUtil().is_duplicate_data(
                data[0])

            # 重複している場合
            if is_duplicated:
                for n in number_char:
                    # print('No:{} リリース日:{} 判明日:{} Link:{}'.format(n, data[1].strip(), data[2].strip(), output_path))
                    patient_data_tmp.append({
                        "No": n,
                        "リリース日": data[1].strip(),
                        "link": output_path
                    })
                    patients_summary_tmp.append(data[2].strip())

            # 単一の場合
            else:
                # print('No:{} リリース日:{} 判明日:{} Link:{}'.format(number_char, data[1].strip(), data[2].strip(), output_path))
                patient_data_tmp.append({
                    "No": number_char,
                    "リリース日": data[1].strip(),
                    "link": output_path
                })
                patients_summary_tmp.append(data[2].strip())

    # convertの実行
    path_op.create_path('text')
    convert_txt = PdfParser()
    convert_txt.executeConvert()

    # テキストからjsonの作成
    parser = TextParser()
    patient_list = []

    # patientの作成
    for tmp in patient_data_tmp:
        result = parser.text2dict(tmp['No'], tmp['link'])
        result.update(tmp)
        result.update({'退院': None})
        del result['link']
        patient_list.append(result)
    patients = {}
    patients['__comments'] = "陽性患者の属性"
    patients['date'] = update_datetime
    # Noに数字が入らない場合の処理(例:"再陽性")
    nan_patient_list = list(
        filter(lambda x: re.search(r'\d', x['No']) is None, patient_list))
    number_patient_list = list(
        filter(lambda x: re.search(r'\d', x['No']) is not None, patient_list))
    insert_patient_list = sorted(
        number_patient_list,
        key=lambda x: int(re.sub(r'県|内|例|目', '', x['No'])))
    insert_patient_list.extend(nan_patient_list)
    patients['data'] = insert_patient_list
    # patients_summaryの作成
    patients_summary_data = {}
    patients_summary_data['__comments'] = "陽性患者数"
    patients_summary_data['date'] = update_datetime
    patients_summary = TimeUtil().create_dt_dict(datetime.datetime.now())
    for k, g in groupby(patients_summary_tmp):
        patients_summary = list(
            map(
                lambda x: {
                    "日付": x["日付"],
                    "小計": len(list(g)) if x['日付'] == k else x['小計']
                }, patients_summary))

    # 小計が0とならない最新の日付までのリストにする
    patients_summary = sorted(patients_summary, key=lambda x: x['日付'])
    jc = JsonChecker()
    patients_summary = jc.exclude_zero_max_date(patients_summary)
    patients_summary_data['data'] = patients_summary

    return patients, patients_summary_data