Exemplo n.º 1
0
class ArcticBinary:
    def __init__(self,
                 lib_name: str = _ARCTIC_BINARY_LIBRARY,
                 mongo_db: str = "auto"):
        """假定一个 instance 只操作一个 library
            mongo_db :
                "auto" 根据环境是否为 colab 自动选择  google 还是 local
                "google" 选择 google 的 mongo
                "intranet" 选择机房中的Mongo
        """
        # 这里 暂时先 hardcode arctic 所使用的 mongo 地址
        mongo_db_conn_str = get_mongo_admin_conn_str()
        if mongo_db == "google":
            mongo_db_conn_str = get_google_mongo_conn_str()
        elif mongo_db == "intranet":
            mongo_db_conn_str = get_intranet_mongo_conn_str()
        self._store = Arctic(mongo_db_conn_str)
        if not self._store.library_exists(lib_name):
            self._store.initialize_library(lib_name, VERSION_STORE)
        self._lib = self._store[lib_name]

    def write_bin_object(self, bin_data: bytes, symbol: str):
        self._lib.write(symbol, bin_data)

    def read_bin_object(self, symbol: str) -> bytes:
        return self._lib.read(symbol).data

    def has_symbol(self, symbol: str) -> bytes:
        return self._lib.has_symbol(symbol)
Exemplo n.º 2
0
def _get_lib(lib_name: "lib name(str)" = "default",
             lib_type: "lib type" = VERSION_STORE):
    client = MongoClient(host=config.MONGO_HOST,
                         port=27017,
                         username=config.MONGO_USER,
                         password=config.MONGO_PWD,
                         authSource=config.MONGO_AUTHDB)

    a = Arctic(client)
    if not a.library_exists(lib_name):
        a.initialize_library(lib_name, lib_type=lib_type)
    return a[lib_name]
Exemplo n.º 3
0
def update_pool_cmd(ctx, pool_id, start, end, libname, mongo_uri):
    config = Config()
    ctx.obj["start"] = start
    ctx.obj["end"] = end

    start, end = datetime.datetime.strptime(start, "%Y%m%d"), \
        datetime.datetime.strptime(end, "%Y%m%d")

    # for dumping usuage
    pool = ctx.obj["pools"][pool_id]()
    logger.info(f"Proccessing Pool: {pool.__class__.__name__.lower()}")
    symbols = pool.variables.keys()

    # if load we load from arctic mongodb, then filling the missing ones
    try:
        store = Arctic(mongo_uri)
        logger.info(f"Connecting mongodb from: URI={mongo_uri}.")

        # Here needs to be improved
        # for async to since evert symbol may have different length

        data_ = {}
        if store.library_exists(libname):
            lib = store.get_library(libname)
            start_ = start

            for sym in symbols:
                try:
                    d = lib.read(sym).data
                    data_[sym] = d
                    if d.index[
                            -1] > start_:  # check db start is gt requested start
                        start_ = d.index[-1]
                except arctic.exceptions.NoDataFoundException as e:
                    logger.info(e)
            start = start_

    # update from start, in older to overide the start date data point]
    except pymongo.errors.ServerSelectionTimeoutError as e:
        click.echo(str(e))

    data = pool.get_batch(symbols, start, end)
    # merge data by replacing
    if data_:
        for sym, d in data.items():
            d = pd.merge(data_[sym], d)  # this will replace with new data
            data[sym] = d
    # dataset
    for symb, var in pool.variables.items():
        logger.info(f"symbol: {symb}")
        yield var, data[symb]
Exemplo n.º 4
0
    def poke(self, context):
        hook = MongoHook(self.mongo_conn_id, libname=self.libname)
        client = hook.get_conn()
        store = Arctic(client)

        self.log.info(
            f'Poking for {self.mongo_conn_id}, {self.libname}: {self.symbol}')

        try:
            if store.library_exists(self.libname):
                lib = store.get_library(self.libname)
                if lib.has_symbol(self.symbol):
                    return self.python_call_back(
                        self.meta,
                        lib.read_meta(self.symbol).metadata)
        except OSError:
            return False
        return False
Exemplo n.º 5
0
class DB():
    def __init__(
            self,
            mongod=r'E:\mongodb-win32-x86_64-2008plus-ssl-4.0.6\bin\mongod.exe',
            dbPath=r'E:\mongodb-win32-x86_64-2008plus-ssl-4.0.6\bin\data',
            address='127.0.0.1'):

        self.mongod = mongod
        self.dbPath = dbPath
        self.address = address

    def startDB(self, storename='fx'):
        self.con = subprocess.Popen("%s %s %s" %
                                    (self.mongod, "--dbpath", self.dbPath),
                                    shell=True)

        self.store = Arctic(self.address)

        if not self.store.library_exists('fx'):
            self.store.initialize_library(storename)

        self.library = self.store[storename]
        self.rc = self.con.returncode

    def readFxData(self,
                   name='EURUSD',
                   version=1,
                   start='2016-07-01',
                   end='2016-07-02'):
        return self.library.read(name,
                                 as_of=version,
                                 date_range=DateRange(start, end))

    def writeData2DB(self, df, name):
        self.library.write(name, df)

    def __del__(self):
        print("terminating db connection.")
        self.con.terminate()
class ArcticChunkStorage:
    """
    有关 Arctic 的存储方式:
        TS 方式 存储的数据:
            - lib_name 对应于一个数据的调用接口
            - symbol 对应于一个股票
            - chunk_size 一般取 M(对于 daily 的数据)
        CS 方式 存储的数据:
            - lib_name 考虑 Per Data Vendor一个,或者几个接口合用同一个 lib_name
            - symbol 数据接口的名称
            - chunk_size 取 D,一个 chunk 一期数据(这里一般不会一次取出多期数据,本地磁盘也会有pkl的缓存)

    """
    META_KEY_MTIME = "mtime"
    META_KEY_MAX_T_INSTORE = "max_t"

    def __init__(self):
        self.arctic_store = Arctic(get_mongo_admin_conn_str(),
                                   connectTimeoutMS=600 * 1000,
                                   serverSelectionTimeoutMS=600 * 1000)
        # NOTE: arctic_store 已经有 library 的 cache 机制

    def check_library(self, lib_name: str):
        if not self.arctic_store.library_exists(lib_name):
            self.arctic_store.initialize_library(lib_name,
                                                 lib_type=CHUNK_STORE)

    def is_symbol_exist(self, lib_name: str, symbol: str) -> bool:
        if not self.arctic_store.library_exists(lib_name):
            return False
        lib_chunk_store = self.arctic_store[lib_name]
        return lib_chunk_store.has_symbol(symbol)

    def is_cs_date_exist(self, lib_name: str, api_name: str,
                         date_v: date) -> bool:
        if not self.is_symbol_exist(lib_name, api_name):
            return False
        df = self.arctic_store[lib_name].read(api_name,
                                              chunk_range=pd.date_range(
                                                  date_v, date_v))
        return _get_df_rows_count(df) > 0

    def init_write_chunk_lib(self, lib_name: str, chunk_size: str, symbol: str,
                             df: pd.DataFrame):
        if not self.arctic_store.library_exists(lib_name):
            self.arctic_store.initialize_library(lib_name,
                                                 lib_type=CHUNK_STORE)
        lib_chunk_store = self.arctic_store[lib_name]
        if lib_chunk_store.has_symbol(symbol):
            lib_chunk_store.delete(symbol)

        run_start = time.time()
        # 仅写入有数据的dataframe,如果 dataframe没有一行数据,则只写入 meta 的内容
        lib_chunk_store.write(symbol, df, chunk_size=chunk_size, upsert=True)

        logger.debug(
            f"Init write {lib_name}-{symbol} arctic , used {time.time() - run_start} secs, {_get_df_rows_count(df)} rows "
        )
        max_date_in_db = df.index.max()

        lib_chunk_store.write_metadata(
            symbol, {
                self.META_KEY_MTIME: datetime.now(),
                self.META_KEY_MAX_T_INSTORE: max_date_in_db
            })

    def _write_cs_chunk_lib(self, lib_name: str, api_name: str,
                            df: pd.DataFrame):
        if not self.arctic_store.library_exists(lib_name):
            self.arctic_store.initialize_library(lib_name,
                                                 lib_type=CHUNK_STORE)
        lib_chunk_store = self.arctic_store[lib_name]

        run_start = time.time()
        if not lib_chunk_store.has_symbol(api_name):
            lib_chunk_store.write(api_name, df, chunk_size="D", upsert=True)
        else:
            lib_chunk_store.update(api_name, df, upsert=True)
        logger.debug(
            f"Init write {lib_name}-{api_name} arctic , used {time.time() - run_start} secs, {_get_df_rows_count(df)} rows "
        )

        lib_chunk_store.write_metadata(api_name,
                                       {self.META_KEY_MTIME: datetime.now()})

    def _append_write_chunk_lib(self, lib_name: str, symbol: str,
                                df: pd.DataFrame):
        lib_chunk_store = self.arctic_store[lib_name]
        min_date = df.index.min()

        df_last_chunk = next(lib_chunk_store.reverse_iterator(
            symbol))  # 读取出最后一个 chunk 的数据,需要一同进行更新
        max_date_in_db = df_last_chunk.index.max()
        if max_date_in_db >= min_date:
            err_msg = f"Can't append data {lib_name}-{symbol} already existed in db. max_db_t:{max_date_in_db} , min_data_t:{min_date}." \
                      f"Maybe another one has updated ts data!"
            logger.error(err_msg)
            # NOTE : 这里不再抛出异常,有可能同时会有多个进程同时在更新arctic 上的同一个数据,会引起数据已经进行过了更新
            # raise RuntimeError(err_msg)
            # meta 还是更新,避免下次还会被 update
            lib_chunk_store.write_metadata(
                symbol, {
                    self.META_KEY_MTIME: datetime.now(),
                    self.META_KEY_MAX_T_INSTORE: max_date_in_db
                })
            return

        # 叠加最后一个 chunk 的数据
        df = df.append(df_last_chunk)
        df.sort_index(axis=0, ascending=True, inplace=True)
        run_start = time.time()
        lib_chunk_store.update(symbol, df, upsert=True)
        logger.debug(
            f"Upsert {lib_name}-{symbol} , used {time.time() - run_start} secs, {_get_df_rows_count(df)} rows "
        )

        max_date_in_db = df.index.max()
        lib_chunk_store.write_metadata(
            symbol, {
                self.META_KEY_MTIME: datetime.now(),
                self.META_KEY_MAX_T_INSTORE: max_date_in_db
            })

    def _read_all(self, lib_name: str, symbol: str) -> pd.DataFrame:
        lib_chunk_store = self.arctic_store[lib_name]
        if not lib_chunk_store.has_symbol(symbol):
            return None
        run_start = time.time()
        df = lib_chunk_store.read(symbol)
        logger.debug(
            f"Read {lib_name}-{symbol} arctic , used {time.time() - run_start} secs, {_get_df_rows_count(df)} rows "
        )
        return df

    def _read_period(
        self,
        lib_name: str,
        symbol: str,
        start_t: date = date(1990, 1, 1),
        end_t: date = date(2050, 12, 31)
    ) -> pd.DataFrame:
        lib_chunk_store = self.arctic_store[lib_name]
        run_start = time.time()
        df = lib_chunk_store.read(symbol,
                                  chunk_range=pd.date_range(start_t, end_t),
                                  filter_data=True)
        logger.debug(
            f"Read {lib_name}-{symbol} period [{start_t}-{end_t}] , used {time.time() - run_start} secs , {_get_df_rows_count(df)} rows "
        )
        return df

    def _read_cs(self, lib_name: str, api_name: str, t: date) -> pd.DataFrame:
        if not self.is_symbol_exist(lib_name, api_name):
            return None
        lib_chunk_store = self.arctic_store[lib_name]
        run_start = time.time()
        df = lib_chunk_store.read(api_name,
                                  chunk_range=pd.date_range(t, t),
                                  filter_data=True)
        logger.debug(
            f"Read {lib_name}-{api_name} date {t} , used {time.time() - run_start} secs , {_get_df_rows_count(df)} rows "
        )
        return df

    def _read_meta(self, lib_name: str, symbol: str) -> Dict[str, Any]:
        lib_chunk_store = self.arctic_store[lib_name]
        try:
            meta_data = lib_chunk_store.read_metadata(symbol)
            return meta_data
        except NoDataFoundException:
            return None

    def _write_meta(self, lib_name: str, symbol: str, meta: Dict[str, Any]):
        lib_chunk_store = self.arctic_store[lib_name]
        lib_chunk_store.write_metadata(symbol, meta)

    def _remove_symbol(self, lib_name: str, symbol: str):
        if not self.arctic_store.library_exists(lib_name):
            return
        lib_chunk_store = self.arctic_store[lib_name]
        lib_chunk_store.delete(symbol)

    def ts_upsert_arctic_storage(self,
                                 lib_name: str,
                                 symbol: str,
                                 df: pd.DataFrame,
                                 chunk_size: str = "M",
                                 force_reinit: bool = False):
        """一些 derived ts 数据, arctic 只负责存储这些衍生的数据内容"""
        assert df is not None and df.shape[0] > 0  # 不允许写入一个空的 df 对象
        if force_reinit:
            self._remove_symbol(lib_name, symbol)

        if not self.arctic_store.library_exists(lib_name):
            self.arctic_store.initialize_library(lib_name,
                                                 lib_type=CHUNK_STORE)
        lib_chunk_store = self.arctic_store[lib_name]
        run_start = time.time()
        if not lib_chunk_store.has_symbol(symbol):  # 第一次写入
            lib_chunk_store.write(symbol,
                                  df,
                                  chunk_size=chunk_size,
                                  upsert=True)
            logger.debug(
                f"Init write {lib_name}-{symbol} arctic , used {time.time() - run_start} secs, {_get_df_rows_count(df)} rows "
            )
        else:  # upsert
            min_date = df.index.min()
            df_last_chunk = next(lib_chunk_store.reverse_iterator(
                symbol))  # 读取出最后一个 chunk 的数据,需要一同进行更新
            max_date_in_db = df_last_chunk.index.max()
            if max_date_in_db >= min_date:
                err_msg = f"Can't append data {lib_name}-{symbol} already existed in db. max_db_t:{max_date_in_db} , min_data_t:{min_date}." \
                          f"Maybe another one has updated ts data!"
                logger.error(err_msg)
                return

            # 叠加最后一个 chunk 的数据
            df = df.append(df_last_chunk)
            df.sort_index(axis=0, ascending=True, inplace=True)
            run_start = time.time()
            lib_chunk_store.update(symbol, df, upsert=True)
            logger.debug(
                f"Upsert {lib_name}-{symbol} , used {time.time() - run_start} secs, {_get_df_rows_count(df)} rows "
            )

        max_date_in_db = df.index.max()
        lib_chunk_store.write_metadata(
            symbol, {
                self.META_KEY_MTIME: datetime.now(),
                self.META_KEY_MAX_T_INSTORE: max_date_in_db
            })
class TrailMetricsArcticReporter:
    NNI_EXPERIMENT_LIB = "NNI_EXPERIMENT_METRICS"
    COL_FINAL_RESULT = "final_result"
    INTERMEDIATE_START_DATE = datetime(2000, 1, 1)
    FINAL_METRICS_DATE = datetime(2010, 1, 1)

    def __init__(self, experiment_name: str, experiment_uuid: str, trial_uuid: str):
        set_http_proxy()
        self.experiment_name = experiment_name
        self.experiment_uuid = experiment_uuid
        self.trial_uuid = trial_uuid
        # 不论程序跑在哪里,都使用 google 上的 arctic 进行沟通
        self.arctic_store = Arctic(get_google_mongo_conn_str(), connectTimeoutMS=600 * 1000,
                                   serverSelectionTimeoutMS=600 * 1000)
        if not self.arctic_store.library_exists(TrailMetricsArcticReporter.NNI_EXPERIMENT_LIB):
            self.arctic_store.initialize_library(TrailMetricsArcticReporter.NNI_EXPERIMENT_LIB, lib_type=CHUNK_STORE)
        self.arctic_lib = self.arctic_store[TrailMetricsArcticReporter.NNI_EXPERIMENT_LIB]

        self._curr_write_epoch_id: int = 0

    def _write_arctic(self, df: pd.DataFrame):
        if not self.arctic_lib.has_symbol(self.trial_uuid):
            self.arctic_lib.write(self.trial_uuid, df, chunk_size="D", upsert=True)
            self.arctic_lib.write_metadata(self.trial_uuid, {"experiment_name": self.experiment_name,
                                                             "experiment_uuid": self.experiment_uuid})
        else:
            self.arctic_lib.update(self.trial_uuid, df, upsert=True)

    def report_intermediate_result(self, epoch: int, metrics: Mapping[str, Any]):
        df = pd.DataFrame(data=metrics, index=pd.DatetimeIndex(
            [TrailMetricsArcticReporter.INTERMEDIATE_START_DATE + timedelta(days=epoch)], name="date"))
        self._write_arctic(df)

    def report_final_result(self, val: float):
        df = pd.DataFrame(data={TrailMetricsArcticReporter.COL_FINAL_RESULT: val},
                          index=pd.DatetimeIndex([TrailMetricsArcticReporter.FINAL_METRICS_DATE], name="date"))
        self._write_arctic(df)

    def query_metrics(self, latest_epoch: Optional[int]) -> Tuple[
        Optional[int], Optional[List[Dict[str, float]]], Optional[float]]:
        """
        查询 metrics 内容

        Parameters
        ----------
        latest_epoch : int
            从 epoch(不包含) 开始增量查询,不填表示从第一期开始查询

        Returns
        -------
            latest epoch : optional[int]
                最新一个 epoch

            intermediate metrics : Optional[List[Dict[str,float]]]
                从 input latest_epoch 之后的 intermediate metrics 内容

            final result : Optional[float]
                如果已经得到 final result 则提供该数值
        """
        if not self.arctic_lib.has_symbol(self.trial_uuid):
            return None, None, None
        start_t = TrailMetricsArcticReporter.INTERMEDIATE_START_DATE
        if latest_epoch is not None:
            start_t = TrailMetricsArcticReporter.INTERMEDIATE_START_DATE + timedelta(days=latest_epoch + 1)
        end_t = TrailMetricsArcticReporter.FINAL_METRICS_DATE
        df = self.arctic_lib.read(self.trial_uuid, chunk_range=pd.date_range(start_t, end_t), filter_data=True)
        if df is None or df.shape[0] == 0:
            return latest_epoch, None, None
        metrics_cols = df.columns.to_list()
        if TrailMetricsArcticReporter.COL_FINAL_RESULT in metrics_cols:
            metrics_cols.remove(TrailMetricsArcticReporter.COL_FINAL_RESULT)
        final_result = None
        ls_intermediate_metrics = []
        rlt_latest_epoch = latest_epoch
        for row_index, row in df.iterrows():
            if row_index < TrailMetricsArcticReporter.FINAL_METRICS_DATE:
                curr_epoch_id = (row_index - TrailMetricsArcticReporter.INTERMEDIATE_START_DATE).days
                if rlt_latest_epoch is None or curr_epoch_id > rlt_latest_epoch:
                    rlt_latest_epoch = curr_epoch_id
                ls_intermediate_metrics.append({k: row[k] for k in metrics_cols})
            elif row_index == TrailMetricsArcticReporter.FINAL_METRICS_DATE:
                final_result = row[TrailMetricsArcticReporter.COL_FINAL_RESULT]
            else:
                raise RuntimeError(f"invalid date {row_index} in trail {self.trial_uuid} metrics")
        return rlt_latest_epoch, ls_intermediate_metrics if len(ls_intermediate_metrics) > 0 else None, final_result
Exemplo n.º 8
0
from arctic import Arctic, CHUNK_STORE  # pyright: reportMissingImports=false
import os
import pandas as pd
from pymongo import MongoClient
import keyring
import ssl

if __name__ == "__main__":

    # client = MongoClient("localhost")
    client = MongoClient(keyring.get_password('atlas', 'connection_string'),
                         ssl_cert_reqs=ssl.CERT_NONE)

    a = Arctic(client)

    if a.library_exists('fund'):
        a.delete_library('fund')
    if a.library_exists('fund_adj'):
        a.delete_library('fund_adj')

    fund = a.initialize_library('fund', CHUNK_STORE)
    fund_adj = a.initialize_library('fund_adj', CHUNK_STORE)

    fund = a['fund']
    fund_adj = a['fund_adj']

    local = Arctic('localhost')

    fund_local = local['fund']
    fund_adj_local = local['fund_adj']
Exemplo n.º 9
0
from collections import defaultdict
from arctic import Arctic, TICK_STORE
from arctic.date import mktz
from akira.position_manager.models import Order

from faust.livecheck import Case, Signal

app = faust.App(
    "akira-env-position-manager",
    broker=f"kafka://{os.environ.get('KAFKA_BOOSTRAPHOST', 'localhost:9092')}",
    origin='position-manager.livecheck')

store = Arctic(os.environ.get("MONGODB_URI", "localhost:27017"))
libname = os.environ.get("ORDER_LIBNAME", "akira-env.order")

if store.library_exists(libname):
    lib = store[libname]
else:
    lib = store.initialize_library(libname, lib_type=TICK_STORE)

execution_topic = app.topic('order-execution', value_type=Order)
orders_executed_topic = app.topic('order-executed', value_type=Order)
orders_topic = app.topic('orders', value_type=Order)


class Position(faust.Record):
    amount: float = 0
    price: float = 0

    def __add__(self, order):
        old = self.amount