예제 #1
0
 def test_get_xbrl(self):
     _dir = os.path.dirname(__file__)
     client = DocumentClient()
     # lang="en" does not exist?
     file_path = client.get_xbrl("S100FGR9", save_dir=_dir)
     self.assertTrue(os.path.exists(file_path))
     os.remove(file_path)
예제 #2
0
 def setUpClass(cls):
     _dir = os.path.join(os.path.dirname(__file__), "../../data")
     client = DocumentClient()
     file_path = client.get_xbrl("S100G2KL",
                                 save_dir=_dir,
                                 expand_level="dir")
     cls.reader = Reader(file_path)
예제 #3
0
 def test_get_xbrl_zip_dir(self):
     _dir = os.path.dirname(__file__)
     client = DocumentClient()
     # lang="en" does not exist?
     file_path = client.get_xbrl("S100FGSC",
                                 save_dir=_dir,
                                 expand_level=None)
     self.assertTrue(os.path.exists(file_path))
     self.assertTrue(str(file_path).endswith(".zip"))
     os.remove(file_path)
예제 #4
0
 def test_get_xbrl_zip(self):
     _dir = os.path.dirname(__file__)
     client = DocumentClient()
     # lang="en" does not exist?
     file_path = client.get_xbrl("S100FGR9",
                                 save_dir=_dir,
                                 expand_level="dir")
     self.assertTrue(os.path.exists(file_path))
     self.assertTrue(os.path.isdir(file_path))
     shutil.rmtree(file_path)
예제 #5
0
def check():
    _dir = os.path.join(os.path.dirname(__file__), "./data")
    if os.path.exists(_dir):
        shutil.rmtree(_dir)
    else:
        os.mkdir(_dir)
    client = DocumentClient()
    file_path = client.get_xbrl("S100G2KL", save_dir=_dir, expand_level="dir")
    reader = Reader(file_path)
    print("Start Calculation")
    bs = reader.extract(Finance).bs()
    bs.to_csv("bs.csv", index=False, encoding="shift_jis")
    shutil.rmtree(_dir)
예제 #6
0
    def test_xbrl_dir(self):
        _dir = os.path.join(os.path.dirname(__file__), "../data")
        client = DocumentClient()
        file_path = client.get_xbrl("S100FGR9", save_dir=_dir,
                                    expand_level="dir")
        xbrl_dir = Directory(file_path)

        self.assertGreater(len(xbrl_dir.xsd.find_all("element")), 0)
        self.assertGreater(len(xbrl_dir.cal.find_all("calculationLink")), 0)
        self.assertGreater(len(xbrl_dir.def_.find_all("definitionArc")), 0)
        self.assertGreater(len(xbrl_dir.lab.find_all("labelLink")), 0)
        self.assertGreater(len(xbrl_dir.lab_en.find_all("labelLink")), 0)
        self.assertGreater(len(xbrl_dir.pre.find_all("presentationLink")), 0)
        self.assertTrue(xbrl_dir.man.find("manifest"))

        xbrl_dir.delete()
        self.assertFalse(os.path.exists(file_path))
예제 #7
0
 def get_xbrl(self, save_dir: str = "", file_name: str = "",
              lang: str = "ja",):
     from xbrr.edinet.client.document_client import DocumentClient
     client = DocumentClient()
     return client.get_xbrl(self.document_id, save_dir, file_name, lang)
예제 #8
0
    def collect(self,
                directory="",
                source_directory="",
                year="",
                edinet_code="",
                sec_code="",
                jcn="",
                file_type="xbrl"):
        """
        Collect the documents based on ledger file.
        """

        if not source_directory:
            s_dir = Path.cwd().joinpath(self.storage._default_raw_data)
        else:
            s_dir = Path(source_directory)
            if not s_dir.is_absolute():
                s_dir = Path.cwd().joinpath(source_directory)

        target = self.data

        filters = {
            "fiscal_year": str(year),
            "edinet_code": edinet_code,
            "sec_code": str(sec_code),
            "jcn": str(jcn)
        }

        conditions = []
        for k in filters:
            if filters[k]:
                target = target[target[k] == filters[k]]
                conditions.append(filters[k])

        if len(conditions) == 0:
            raise Exception("You have to specify at least one condition.")

        if not directory:
            t_dir = Path.cwd().joinpath(self.storage._default_raw_data)
        else:
            t_dir = Path(directory)
            if not t_dir.is_absolute():
                t_dir = Path.cwd().joinpath(directory)

        t_dir = t_dir.joinpath("_".join(conditions))
        if not t_dir.exists():
            t_dir.mkdir(parents=True, exist_ok=True)

        for i, r in tqdm(target.iterrows(), total=target.shape[0]):
            fiscal_year = r["fiscal_year"]
            doc_id = r["doc_id"]
            y_s_dir = s_dir.joinpath(fiscal_year).joinpath("docs")
            y_s_path = y_s_dir.joinpath(f"{doc_id}.xbrl")

            if y_s_path.exists():
                shutil.copy(str(y_s_path),
                            str(t_dir.joinpath(f"{doc_id}.xbrl")))
            else:
                client = DocumentClient()
                file_type_matched = True
                try:
                    if file_type == "pdf":
                        file_path = client.get_pdf(doc_id, save_dir=t_dir)
                    elif file_type == "xbrl":
                        file_path = client.get_xbrl(doc_id,
                                                    save_dir=t_dir,
                                                    expand_level="file")
                    elif file_type == "zip":
                        file_path = client.get_xbrl(doc_id,
                                                    save_dir=t_dir,
                                                    expand_level="dir")
                    elif file_type == "csr":
                        if isinstance(r['csr_path'], str) and r['csr_path']:
                            print(r['csr_path'])
                            file_name = os.path.basename(r['csr_path'])
                            url = f"https://s3-ap-northeast-1.amazonaws.com/chakki.esg.csr.jp/{r['csr_path']}"
                            self.storage._download(url,
                                                   t_dir.joinpath(file_name))
                    else:
                        file_type_matched = False
                except Exception as e:
                    print("Can not download {}.".format(doc_id))

                if not file_type_matched:
                    raise Exception(f"File type {file_type} is not supported")

                time.sleep(0.1)  # to save api host

        return target