def run_test(self): fs = self.corp.extract_fs(bgn_de=self.bgn_de, separate=self.separate, report_tp=self.report_tp) for test in self.test_set: tp = test['fs_tp'] date = test['date'] column = test['column'] item = test['item'] expected = test['expected'] df = fs[tp] date_column = find_all_columns(df=df, query=date)[0] label_column = find_all_columns(df=df, query=column)[0] actual = None for idx in range(len(df)): text = df[label_column].iloc[idx].replace(' ', '') if str_compare(text, item): actual = df[date_column].iloc[idx] if actual != expected: pytest.fail("Test failed: corp_code='{}', ".format( self.corp.corp_code) + "corp_name='{}', fs_tp='{}', ".format( self.corp.corp_name, tp) + "start_dt='{}', report_tp='{}', ".format( self.bgn_de, fs.info['report_tp']) + "date='{}', column='{}',".format(date, column) + "item='{}', actual='{}', expected='{}'".format( item, actual, expected))
def _extract_dataset(self, reports: List[Report]): """ Report에 포함된 XBRL 파일에서 Concept_id 와 Label 값 추출 Parameters ---------- reports: list of Report 추출할 Report 리스트 """ if is_notebook(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm dataset = [] for report in tqdm(reports, desc='Extracting concept_id and label_ko', unit='report'): df_fs = analyze_xbrl(report) if df_fs is None: continue for tp in df_fs: df = df_fs[tp] if df is not None: concept_column = find_all_columns(df, 'concept_id')[0] label_ko_column = find_all_columns(df, 'label_ko')[0] for idx in range(len(df)): concept_id = df[concept_column].iloc[idx] label_ko = df[label_ko_column].iloc[idx] if concept_id and label_ko: try: label = self.extract_nouns(label_ko) dataset.append((concept_id, label)) except BaseException: continue self._dataset = dataset
def compare_df_and_ndf_cnn( column: Tuple[Union[str, Tuple[str]]], df: DataFrame, ndf: DataFrame, ldf: DataFrame, ndata: List[Union[float, str, None]], nlabels: List[str]) -> Tuple[List[Union[float, str]], List[str]]: """ Convolutional neural network 를 시용하여 데이터를 검색하는 함수 Parameters ---------- column: tuple 추가할 column Name df: dict of { str: DataFrame } 데이터를 추가할 DataFrame, 추출된 결과값이 누적된 DataFrame ndf: dict of { str: DataFrame } 데이터를 검색할 DataFrame, Report에서 추출한 새로운 DataFrame ndata: list of float 추가할 column의 데이터 리스트 nlabels: list of str 추가할 column의 label 리스트 Returns ------- tuple of list 추가할 column의 데이터 리스트, 추가할 column의 label 리스트 """ # CNN 처리시 사용 concept_none_data = {} df_label_column = find_all_columns(df, 'label_ko')[0] is_concept = True df_concept_column = find_all_columns(df, 'concept_id') if len(df_concept_column) == 0: is_concept = False else: df_concept_column = df_concept_column[0] ndf_label_column = find_all_columns(ndf, 'label_ko')[0] for idx, value in enumerate(ndata): if isinstance(value, str): pass elif value is None: pass elif math.isnan(value): pass else: continue label = df[df_label_column].iloc[idx] label = re.sub(r'\s+', '', label) label = extract_account_title(label) if is_concept: concept_id = df[df_concept_column].iloc[idx] else: concept_id = guess_concept_id(label) if concept_id is not None: concept_none_data[concept_id] = idx matched = [] used = [] for idx in range(len(ndf)): if idx in matched: continue label = extract_account_title(ndf[ndf_label_column].iloc[idx]) concept_id = guess_concept_id(label) index = concept_none_data.get(concept_id) if index is not None and index not in used: value = ndf[column].iloc[idx] if isinstance(value, str): pass else: used.append(index) matched.append(idx) ndata[index] = value nlabels[index] = label return ndata, nlabels
def test_fs_show_depth(fs_report): df = fs_report.show('bs', show_depth=1) columns = find_all_columns(df, 'class') actual = len(columns) expected = 2 assert actual == expected
def test_fs_concept_false(fs_report): df = fs_report.show('bs', show_concept=False) columns = find_all_columns(df, 'concept') actual = len(columns) expected = 0 assert actual == expected
def show(self, tp, show_class: bool = True, show_depth: int = 10, show_concept: bool = True) -> DataFrame: """ 재무제표 정보를 표시해주는 Method Parameters ---------- tp: str 표시할 재무제표 타입: 'fs' 재무상태표, 'is' 손익계산서, 'ci' 포괄손익계산서, 'cf' 현금흐름표 show_class: bool class 표시 여부 show_depth: bool 표시할 class의 깊이 show_concept: bool concept_id 표시 여부 Returns ------- DataFrame 재무제표 """ from dart_fss.fs.extract import find_all_columns df = self._statements[tp] if df is None: return df class_columns = find_all_columns(df, 'class') if show_class is False: ncolumns = [] columns = df.columns.tolist() for column in columns: if column not in class_columns: ncolumns.append(column) df = df[ncolumns] else: drop_rows = [] columns = df.columns.tolist() cdf = df[class_columns] for idx in range(len(cdf)): for class_idx, item in enumerate(cdf.iloc[idx]): if class_idx > show_depth and item is not None: drop_rows.append(idx) ncolumns = [] for column in columns: if column not in class_columns[show_depth + 1:]: ncolumns.append(column) df = df[ncolumns].drop(drop_rows) if show_concept is False: concept_colmuns = find_all_columns(df, 'concept_id') if len(concept_colmuns) == 1: ncolumns = [] columns = df.columns.tolist() for column in columns: if column not in concept_colmuns: ncolumns.append(column) df = df[ncolumns] return df
def test_xbrl_get_author_information(samsung_xbrl): author = samsung_xbrl.get_author_information() column = find_all_columns(author, '공시담당자')[0] actual = author[column][3] expected = '031-277-7227' assert actual == expected