Python Arctic.close примеры использования

Язык программирования: Python

Пространство имен/Пакет: arctic

Класс/Тип: Arctic

Метод/Функция: close

Примеров на hotexamples.com: 6

Python Arctic.close - 6 примеров найдено. Это лучшие примеры Python кода для arctic.Arctic.close, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Arctic(30)

initialize_library(30)

delete_library(15)

get_library(10)

library_exists(9)

close(6)

get_min_elec_dist(2)

Пример #1

Показать файл

    def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None,
                                         db_server=DataConstants().db_server,
                                         db_port=DataConstants().db_port, username=None, password=None):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' = reads from bcolz file (not fully implemented)
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not(isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                import redis

                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)
                    msg = r.get(fname_single)

                except:
                    self.logger.info("Cache not existent for " + fname_single + " in Redis")

                if msg is None:
                    data_frame = None
                else:

                    self.logger.info('Load Redis cache: ' + fname_single)

                    data_frame = pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                self.logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                        connect=False)  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

                store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)
                    else:
                        from arctic.date import DateRange
                        item = library.read(fname_single, date_range=DateRange(start_date, finish_date))

                    c.close()

                    self.logger.info('Read ' + fname_single)

                    data_frame = item.data

                except Exception as e:
                    self.logger.warning('Library does not exist: ' + fname_single + ' & message is ' + str(e))
                    data_frame = None

            elif os.path.isfile(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif os.path.isfile(fname_single):
                data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list

Пример #2

Показать файл

    def write_time_series_cache_to_disk(self, fname, data_frame,
                                        engine='hdf5_fixed', append_data=False, db_server=DataConstants().db_server,
                                        db_port=DataConstants().db_port, username=None, password=None,
                                        filter_out_matching=None, timeout=10):
        """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        engine : str
            'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this
            'hdf5_table' - use HDF5 table format, slower but can append to
            'parquet' - use Parquet
            'arctic' - use Arctic/MongoDB database
            'redis' - use Redis
        append_data : bool
            False - write a fresh copy of data on disk each time
            True - append data to disk
        db_server : str
            Database server for arctic (default: '127.0.0.1')
        timeout : int
            Number of seconds to do timeout
        """

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'redis'):
            import redis

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if isinstance(data_frame, pandas.DataFrame):
                    r.set(fname, data_frame.to_msgpack(compress='blosc'))

                self.logger.info("Pushed " + fname + " to Redis")
            except Exception as e:
                self.logger.warning("Couldn't push " + fname + " to Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load Arctic/MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

            store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                self.logger.info("Created MongoDB library: " + fname)
            else:
                self.logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # can duplicate values if we have existing dates
            if append_data:
                library.append(fname, data_frame)
            else:
                library.write(fname, data_frame)

            c.close()

            self.logger.info("Written MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="blosc", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i, stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data', value=data_frame, format=hdf5_format, append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="blosc", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

            self.logger.info("Written HDF5: " + fname)

        elif (engine == 'parquet'):
            if fname[-5:] != '.gzip':
                fname = fname + '.gzip'

            data_frame.to_parquet(fname, compression='gzip')

            self.logger.info("Written Parquet: " + fname)

Пример #3

Показать файл

    def read_time_series_cache_from_disk(self,
                                         fname,
                                         engine='hdf5',
                                         start_date=None,
                                         finish_date=None,
                                         db_server=constants.db_server,
                                         db_port=constants.db_port,
                                         username=constants.db_username,
                                         password=constants.db_password):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' - reads from bcolz file (not fully implemented)
            'parquet' - reads from Parquet
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not (isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if engine == 'parquet' and '.gzip' not in fname_single and '.parquet' not in fname_single:
                fname_single = fname_single + '.parquet'

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(
                        data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    # for pyarrow
                    context = pa.default_serialization_context()

                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)

                    # is there a compressed key stored?)
                    k = r.keys('comp_*_' + fname_single)

                    # if so, then it means that we have stored it as a compressed object
                    # if have more than 1 element, take the last (which will be the latest to be added)
                    if (len(k) >= 1):
                        k = k[-1].decode('utf-8')

                        comp = r.get(k)

                        siz = int(k.split('_')[1])
                        dec = pa.decompress(comp,
                                            codec='lz4',
                                            decompressed_size=siz)

                        msg = context.deserialize(dec)
                    else:
                        msg = r.get(fname_single)

                        # print(fname_single)
                        if msg is not None:
                            msg = context.deserialize(msg)
                            # logger.warning("Key " + fname_single + " not in Redis cache?")

                except Exception as e:
                    logger.info("Cache not existent for " + fname_single +
                                " in Redis: " + str(e))

                if msg is None:
                    data_frame = None
                else:
                    logger.info('Load Redis cache: ' + fname_single)

                    data_frame = msg  # pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" +
                        str(db_server) + ":" + str(db_port),
                        connect=False
                    )  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" +
                                            str(db_server) + ":" +
                                            str(db_port),
                                            connect=False)

                store = Arctic(c,
                               socketTimeoutMS=socketTimeoutMS,
                               serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)

                    else:
                        from arctic.date import DateRange
                        item = library.read(
                            fname_single,
                            date_range=DateRange(
                                start_date.replace(tzinfo=None),
                                finish_date.replace(tzinfo=None)))

                    c.close()

                    logger.info('Read ' + fname_single)

                    data_frame = item.data

                except Exception as e:
                    logger.warning('Library may not exist or another error: ' +
                                   fname_single + ' & message is ' + str(e))
                    data_frame = None

            elif self.path_exists(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif self.path_exists(fname_single) and '.csv' in fname_single:
                data_frame = pandas.read_csv(fname_single, index_col=0)

                data_frame.index = pd.to_datetime(data_frame.index)

            elif self.path_exists(fname_single):
                data_frame = self.read_parquet(fname_single)
                # data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list

Пример #4

Показать файл

    def write_time_series_cache_to_disk(
            self,
            fname,
            data_frame,
            engine='hdf5_fixed',
            append_data=False,
            db_server=constants.db_server,
            db_port=constants.db_port,
            username=constants.db_username,
            password=constants.db_password,
            filter_out_matching=None,
            timeout=10,
            use_cache_compression=constants.use_cache_compression,
            parquet_compression=constants.parquet_compression,
            md_request=None,
            ticker=None):
        """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        engine : str
            'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this
            'hdf5_table' - use HDF5 table format, slower but can append to
            'parquet' - use Parquet
            'arctic' - use Arctic/MongoDB database
            'redis' - use Redis
        append_data : bool
            False - write a fresh copy of data on disk each time
            True - append data to disk
        db_server : str
            Database server for arctic (default: '127.0.0.1')
        timeout : int
            Number of seconds to do timeout
        """

        logger = LoggerManager().getLogger(__name__)

        if md_request is not None:
            fname = self.path_join(
                fname, md_request.create_category_key(ticker=ticker))

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(
                data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index,
                                                    unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'redis'):

            fname = os.path.basename(fname).replace('.', '_')

            # Will fail if Redis is not installed
            try:
                r = redis.StrictRedis(host=db_server,
                                      port=db_port,
                                      db=0,
                                      socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                ping = r.ping()

                # If Redis is alive, try pushing to it
                if ping:
                    if data_frame is not None:
                        if isinstance(data_frame, pandas.DataFrame):
                            mem = data_frame.memory_usage(deep='deep').sum()
                            mem_float = round(
                                float(mem) / (1024.0 * 1024.0), 3)

                            if mem_float < 500:
                                # msgpack/blosc is deprecated
                                # r.set(fname, data_frame.to_msgpack(compress='blosc'))

                                # now uses pyarrow
                                context = pa.default_serialization_context()

                                ser = context.serialize(data_frame).to_buffer()

                                if use_cache_compression:
                                    comp = pa.compress(ser,
                                                       codec='lz4',
                                                       asbytes=True)
                                    siz = len(ser)  # siz = 3912

                                    r.set('comp_' + str(siz) + '_' + fname,
                                          comp)
                                else:
                                    r.set(fname, ser.to_pybytes())

                                logger.info("Pushed " + fname + " to Redis")
                            else:
                                logger.warn("Did not push " + fname +
                                            " to Redis, given size")
                    else:
                        logger.info("Object " + fname +
                                    " is empty, not pushed to Redis.")
                else:
                    logger.warning("Didn't push " + fname +
                                   " to Redis given not running")

            except Exception as e:
                logger.warning("Couldn't push " + fname + " to Redis: " +
                               str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            logger.info('Load Arctic/MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" +
                    str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) +
                                        ":" + str(db_port),
                                        connect=False)

            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                logger.info("Created MongoDB library: " + fname)
            else:
                logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # Problems with Arctic when writing timezone to disk sometimes, so strip
            data_frame = data_frame.copy().tz_localize(None)

            try:
                # Can duplicate values if we have existing dates
                if append_data:
                    library.append(fname, data_frame)
                else:
                    library.write(fname, data_frame)

                c.close()
                logger.info("Written MongoDB library: " + fname)
            except Exception as e:
                logger.warning("Couldn't write MongoDB library: " + fname +
                               " " + str(e))

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename,
                                        format=hdf5_format,
                                        complib="zlib",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i,
                                              stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data',
                          value=data_frame,
                          format=hdf5_format,
                          append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp,
                                        complib="zlib",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

            logger.info("Written HDF5: " + fname)

        elif (engine == 'parquet'):
            if '.parquet' not in fname:
                if fname[-5:] != '.gzip':
                    fname = fname + '.parquet'

            self.to_parquet(data_frame,
                            fname,
                            aws_region=constants.aws_region,
                            parquet_compression=parquet_compression)
            # data_frame.to_parquet(fname, compression=parquet_compression)

            logger.info("Written Parquet: " + fname)
        elif engine == 'csv':
            if '.csv' not in fname:
                fname = fname + '.csv'

            data_frame.to_csv(fname)

            logger.info("Written CSV: " + fname)

Пример #5

Показать файл

    def read_time_series_cache_from_disk(self,
                                         fname,
                                         engine='hdf5',
                                         start_date=None,
                                         finish_date=None,
                                         db_server='127.0.0.1'):
        """
        read_time_series_cache_from_disk - Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str
            file to be read from

        Returns
        -------
        DataFrame
        """

        if (engine == 'bcolz'):
            try:
                name = self.get_bcolz_filename(fname)
                zlens = bcolz.open(rootdir=name)
                data_frame = zlens.todataframe()

                data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                data_frame.index.name = 'Date'
                del data_frame['DTS_']

                # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                data_frame.columns = self.find_replace_chars(
                    data_frame.columns, _replace_chars, _invalid_chars)
                data_frame.columns = [x[2:] for x in data_frame.columns]

                return data_frame
            except:
                return None
        elif (engine == 'arctic'):
            socketTimeoutMS = 2 * 1000

            import pymongo
            from arctic import Arctic

            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            c = pymongo.MongoClient(db_server, connect=False)

            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS)

            # Access the library
            library = store[fname]

            if start_date is None and finish_date is None:
                item = library.read(fname)
            else:
                from arctic.date import DateRange
                item = library.read(fname,
                                    date_range=DateRange(
                                        start_date, finish_date))

            c.close()

            self.logger.info('Read ' + fname)

            return item.data
        elif os.path.isfile(self.get_h5_filename(fname)):
            store = pandas.HDFStore(self.get_h5_filename(fname))
            data_frame = store.select("data")

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            store.close()

            return data_frame

        return None

Пример #6

Показать файл

    def write_time_series_cache_to_disk(self,
                                        fname,
                                        data_frame,
                                        engine='hdf5_fixed',
                                        append_data=False,
                                        db_server='127.0.0.1',
                                        filter_out_matching=None):
        """
        write_time_series_cache_to_disk - writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        """

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(
                data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index,
                                                    unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load MongoDB library: ' + fname)

            c = pymongo.MongoClient(db_server, connect=False)
            store = Arctic(c,
                           socketTimeoutMS=socketTimeoutMS,
                           serverSelectionTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                self.logger.info("Created MongoDB library: " + fname)
            else:
                self.logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # can duplicate values if we have existing dates
            if append_data:
                library.append(fname, data_frame)
            else:
                library.write(fname, data_frame)

            c.close()

            self.logger.info("Written MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename,
                                        format=hdf5_format,
                                        complib="blosc",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i,
                                              stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data',
                          value=data_frame,
                          format=hdf5_format,
                          append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp,
                                        format=hdf5_format,
                                        complib="blosc",
                                        complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)