예제 #1
0
파일: collector.py 프로젝트: microsoft/qlib
    def get_data_from_remote(symbol,
                             interval,
                             start,
                             end,
                             show_1min_logging: bool = False):
        error_msg = f"{symbol}-{interval}-{start}-{end}"

        def _show_logging_func():
            if interval == YahooCollector.INTERVAL_1min and show_1min_logging:
                logger.warning(f"{error_msg}:{_resp}")

        interval = "1m" if interval in ["1m", "1min"] else interval
        try:
            _resp = Ticker(symbol,
                           asynchronous=False).history(interval=interval,
                                                       start=start,
                                                       end=end)
            if isinstance(_resp, pd.DataFrame):
                return _resp.reset_index()
            elif isinstance(_resp, dict):
                _temp_data = _resp.get(symbol, {})
                if isinstance(
                        _temp_data,
                        str) or (isinstance(_resp, dict) and _temp_data.get(
                            "indicators", {}).get("quote", None) is None):
                    _show_logging_func()
            else:
                _show_logging_func()
        except Exception as e:
            logger.warning(f"{error_msg}:{e}")
예제 #2
0
    def get_data_from_remote(symbol,
                             interval,
                             start,
                             end,
                             show_1min_logging: bool = False):
        error_msg = f"{symbol}-{interval}-{start}-{end}"

        def _show_logging_func():
            if interval == YahooCollector.INTERVAL_1min and show_1min_logging:
                logger.warning(f"{error_msg}:{_resp}")

        interval = "1m" if interval in ["1m", "1min"] else interval
        try:
            _resp = Ticker(symbol,
                           asynchronous=False).history(interval=interval,
                                                       start=start,
                                                       end=end)
            if isinstance(_resp, pd.DataFrame):
                return _resp.reset_index()
            elif isinstance(_resp, dict):
                _temp_data = _resp.get(symbol, {})
                if isinstance(
                        _temp_data,
                        str) or (isinstance(_resp, dict) and _temp_data.get(
                            "indicators", {}).get("quote", None) is None):
                    _show_logging_func()
            else:
                _show_logging_func()
        except Exception as e:
            logger.warning(
                f"get data error: {symbol}--{start_}--{end_}" +
                "Your data request fails. This may be caused by your firewall (e.g. GFW). Please switch your network if you want to access Yahoo! data"
            )
예제 #3
0
    def collector_data(self):
        """collector data

        """
        logger.info("start collector yahoo data......")
        error_symbol = []
        with ThreadPoolExecutor(max_workers=self.max_workers) as worker:
            futures = {}
            p_bar = tqdm(total=len(self.stock_list))
            for symbols in [
                    self.stock_list[i:i + self.max_workers]
                    for i in range(0, len(self.stock_list), self.max_workers)
            ]:
                resp = Ticker(
                    symbols, asynchronous=True,
                    max_workers=self.max_workers).history(period="max")
                if isinstance(resp, dict):
                    for symbol, df in resp.items():
                        if isinstance(df, pd.DataFrame):
                            futures[worker.submit(
                                self.save_stock, symbol,
                                df.reset_index().rename(
                                    columns={"index": "date"}))] = symbol
                        else:
                            error_symbol.append(symbol)
                else:
                    for symbol, df in resp.reset_index().groupby("symbol"):
                        futures[worker.submit(self.save_stock, symbol,
                                              df)] = symbol
                p_bar.update(self.max_workers)
            p_bar.close()

            with tqdm(total=len(futures.values())) as p_bar:
                for future in as_completed(futures):
                    try:
                        future.result()
                    except Exception as e:
                        logger.error(e)
                        error_symbol.append(futures[future])
                    p_bar.update()

        logger.info(error_symbol)
        logger.info(len(error_symbol))
        logger.info(len(self.stock_list))

        # TODO: from MSN
        df = pd.DataFrame(
            map(lambda x: x.split(","),
                requests.get(CSI300_BENCH_URL).json()["data"]["klines"]))
        df.columns = [
            "date", "open", "close", "high", "low", "volume", "money", "change"
        ]
        df["date"] = pd.to_datetime(df["date"])
        df = df.astype(float, errors="ignore")
        df["adjclose"] = df["close"]
        df.to_csv(self.save_dir.joinpath("sh000300.csv"), index=False)
예제 #4
0
    def _gen_real_prices(self):
        ''' Attempt to generate real prices. Returns None if prices cannot be found '''
        prices = Ticker(f'{self.code}.AX').history(start=self.dates[0],
                                                   end=TOMORROW)
        if isinstance(prices, dict):
            return None

        prices = prices.reset_index()[['date', 'close']].set_index('date')
        prices.columns = ['price']
        return prices
예제 #5
0
    def _collector(self, stock_list):

        error_symbol = []
        with ThreadPoolExecutor(max_workers=self.max_workers) as worker:
            futures = {}
            p_bar = tqdm(total=len(stock_list))
            for symbols in [
                    stock_list[i:i + self.max_workers]
                    for i in range(0, len(stock_list), self.max_workers)
            ]:
                self._sleep()
                resp = Ticker(
                    symbols,
                    asynchronous=self._asynchronous,
                    max_workers=self.max_workers).history(period="max")
                if isinstance(resp, dict):
                    for symbol, df in resp.items():
                        if isinstance(df, pd.DataFrame):
                            self._temp_save_small_data(self, df)
                            futures[worker.submit(
                                self.save_stock, symbol,
                                df.reset_index().rename(
                                    columns={"index": "date"}))] = symbol
                        else:
                            error_symbol.append(symbol)
                else:
                    for symbol, df in resp.reset_index().groupby("symbol"):
                        self._temp_save_small_data(self, df)
                        futures[worker.submit(self.save_stock, symbol,
                                              df)] = symbol
                p_bar.update(self.max_workers)
            p_bar.close()

            with tqdm(total=len(futures.values())) as p_bar:
                for future in as_completed(futures):
                    try:
                        future.result()
                    except Exception as e:
                        logger.error(e)
                        error_symbol.append(futures[future])
                    p_bar.update()
        print(error_symbol)
        logger.info(f"error symbol nums: {len(error_symbol)}")
        logger.info(f"current get symbol nums: {len(stock_list)}")
        error_symbol.extend(self._mini_symbol_map.keys())
        return error_symbol
예제 #6
0
        def _get_simple(start_, end_):
            self._sleep()
            error_msg = f"{symbol}-{self._interval}-{start_}-{end_}"

            def _show_logging_func():
                if self._interval == "1m" and self._show_1m_logging:
                    logger.warning(f"{error_msg}:{_resp}")

            try:
                _resp = Ticker(symbol, asynchronous=False).history(interval=self._interval, start=start_, end=end_)
                if isinstance(_resp, pd.DataFrame):
                    return _resp.reset_index()
                elif isinstance(_resp, dict):
                    _temp_data = _resp.get(symbol, {})
                    if isinstance(_temp_data, str) or (
                        isinstance(_resp, dict) and _temp_data.get("indicators", {}).get("quote", None) is None
                    ):
                        _show_logging_func()
                else:
                    _show_logging_func()
            except Exception as e:
                logger.warning(f"{error_msg}:{e}")
예제 #7
0
import numpy as np
from yahooquery import Ticker
import armagarch as ag
import multiprocessing as mp

price = Ticker('AAPL').history(period='10y', interval='1d')
price.reset_index(inplace=True)
price.drop('symbol', axis=1, inplace=True)
price.set_index('date', inplace=True)

log_returns = np.log(price['adjclose'] / price['adjclose'].shift(1)).dropna()
T = len(log_returns)

intervals = [(i - 500, i) for i in range(500, T - 1)]


def one_step_var(interval):
    t1, t2 = interval
    X = log_returns.values[t1:t2]
    prediction_date = log_returns.index[t2 + 1].strftime('%Y-%m-%d')

    model = ag.VaRModel()
    model.fit(X, 2, 2, verbose=False, summary_stats=False)
    value_at_risk95 = model.predict(X, threshold=0.95)

    with open(
            '/home/howardwong/Desktop/Research/ARMA-GARCH-Models/data/var-forecasts/{}.txt'
            .format(prediction_date), 'w') as f:
        f.write(prediction_date + ',' + str(value_at_risk95) + ',' +
                str(log_returns.values[t2 + 1]))
        f.close()
예제 #8
0
import streamlit as st

from yahooquery import Ticker

data = Ticker("ORSTED.CO").balance_sheet(frequency="annual")

st.header("Raw Data")
st.dataframe(data)

def get_default_format(data):
    format_dict = {}
    for column in data.columns:
        if data[column].dtype == "int64":
            format_dict[column] = "{0:,.0f}"
        elif data[column].dtype == "float64":
            format_dict[column] = "{0:,.2f}"
        else:
            format_dict[column] = "{0:}"
    return format_dict

st.header("Dataframe Index dropped and decimal symbol format applied")
data_reset_index = data.reset_index()
format_dict = get_default_format(data_reset_index)
st.dataframe(data_reset_index.style.format(format_dict))

st.header("Dataframe Index NOT dropped and decimal symbol format applied")
format_dict = get_default_format(data)
st.dataframe(data.style.format(format_dict))