def get_financial_data(symbol, timeframe, report_type): """ Returns financial data in an iterator of csv rows (tuples). First row will be the headers. symbol: e.g. "C6L.SI" timeframe: "annual" or "quarter" report_type: any string in FINANCIAL_REPORT_TYPES """ logger.info("getting {0} {1} data of {2}".format(timeframe, report_type, symbol)) url = _get_report_url(symbol, timeframe, report_type) html = strict_get(url, _test_for_not_found) try: raw_data = _scrape_html(html) except (IndexError, AttributeError, KeyError) as e: msg = "failed to scrape raw data out from the html content of {0} {1} {2}".format(symbol, timeframe, report_type) logger.exception(e) logger.error(msg) raise ScrapingError(msg) from e try: # exception can only be triggered in transpose_items because preprocess returns a generator items = preprocess(raw_data, symbol, timeframe, report_type) return transpose_items(items) except (AssertionError, ValueError) as e: msg = "failed to preprocess the raw data of {0} {1} {2}".format(symbol, timeframe, report_type) logger.exception(e) logger.error(msg) raise PreprocessingError(msg) from e
def test_preprocess(self): obj = { "data": [ ["periods", ["2009", "2010", "2011"]], ["Cash & Short Term Investments", ["4,504", "5,400", "5,488"]], ["Cash & Short Term Investments", ["3,504", "4,400", "4,488"]], ["Liabilities & Shareholders' Equity", ["25,169", "22,589", "22,501"]], ], "symbol_prefix": "C6L", "timeframe": "annual", "top_remark": "Fiscal year is April-March. All values SGD Thousands.", "report_type": "balance-sheet", } with patch("fa.miner.preprocess.UNO_VALUE_UNIT_COLUMNS", {"Cash & Short Term Investments"}): preprocessed = list(preprocess.preprocess(obj, "C6L.SI", "annual", "balance-sheet")) # first "Cash & Short Term Investments" is in UNO_VALUE_UNIT_COLUMNS self.assertEqual(preprocessed, [ ("Date", [datetime(2009, 4, 1), datetime(2010, 4, 1), datetime(2011, 4, 1)]), ("Cash & Short Term Investments", [4504, 5400, 5488]), ("Cash & Short Term Investments_2", [3504000, 4400000, 4488000]), ("Liabilities & Shareholders' Equity", [25169000, 22589000, 22501000]) ])