Пример #1
0
    def __init__(self, db_cache_server=None, db_cache_port=None, engine='redis'):
        from findatapy.util import DataConstants
        if db_cache_server is None:
            self.db_cache_server = DataConstants().db_cache_server

        if db_cache_port is None:
            self.db_cache_port = DataConstants().db_cache_port

        self.engine = engine
        self.io_engine = IOEngine()
Пример #2
0
    def force_type_conversion(self, data_frame):
        constants = DataConstants()

        logger = LoggerManager().getLogger(__name__)

        if data_frame is not None:
            if not (data_frame.empty):
                # Need to convert numerical and datetime columns separately
                # post pandas 0.23
                for c in data_frame.columns:
                    is_date = False

                    # Special case for ECO_RELEASE_DT / FIRST_REVISION_DATE
                    if 'ECO_RELEASE_DT' in c or 'FIRST_REVISION_DATE' in c:
                        try:
                            temp_col = []  # data_frame[c].values

                            for i in range(0, len(data_frame[c].values)):
                                try:
                                    temp_col.append(
                                        pd.to_datetime(str(
                                            int(data_frame[c].values[i])),
                                                       format='%Y%m%d'))
                                except:
                                    temp_col.append(np.datetime64('NaT'))

                            data_frame[c] = temp_col
                        except Exception as e:
                            logger.warning(
                                "Couldn't convert " + str(c) +
                                " to date.. was this column empty? " + str(e))

                    else:
                        # Only convert those Bloomberg reference fields to
                        # dates which have been listed explicitly
                        for d in constants.always_date_columns:
                            if d in c:
                                try:
                                    data_frame[c] = pd.to_datetime(
                                        data_frame[c], errors='coerce')

                                    is_date = True
                                    break
                                except:
                                    pass

                        # Otherwise this is not a date field so attempt to
                        # convert into numbers
                        if not (is_date):
                            try:
                                data_frame[c] = pd.to_numeric(data_frame[c],
                                                              errors='ignore')
                            except:
                                pass

        logger.debug("Returning converted dataframe...")

        return data_frame
Пример #3
0
    def get_reference_data(self, md_request_vendor, md_request):
        logger = LoggerManager().getLogger(__name__)

        constants = DataConstants()

        end = datetime.utcnow()

        from datetime import timedelta
        end = end + timedelta(
            days=365)  # because very often we may with to download data about
        # future calendar events
        #  end.replace(year = end.year + 1)

        md_request_vendor.finish_date = end

        logger.debug("Requesting ref for " + md_request_vendor.tickers[0] +
                     " etc.")

        data_frame = self.download_ref(md_request_vendor)

        logger.debug("Waiting for ref...")

        # Convert from vendor to findatapy tickers/fields
        if data_frame is not None:
            if data_frame.empty:
                return None

            returned_fields = data_frame.columns.get_level_values(0)
            returned_tickers = data_frame.columns.get_level_values(1)

        if data_frame is not None:
            # TODO if empty try downloading again a year later
            fields = self.translate_from_vendor_field(returned_fields,
                                                      md_request)
            tickers = self.translate_from_vendor_ticker(
                returned_tickers, md_request)

            ticker_combined = []

            for i in range(0, len(fields)):
                ticker_combined.append(tickers[i] + "." + fields[i])

            data_frame.columns = ticker_combined

            # Need to convert numerical and datetime columns separately post
            # pandas 0.23
            data_frame = self.force_type_conversion(data_frame)

            # data_frame = data_frame.apply(pd.to_datetime, errors='ignore')
            # data_frame = data_frame.apply(pd.to_numeric, errors='ignore')

            # TODO coerce will be deprecated from pandas 0.23.0 onwards) so
            #  remove!
            # data_frame = data_frame.convert_objects(convert_dates = 'coerce',
            # convert_numeric= 'coerce')

        return data_frame
Пример #4
0
    def to_parquet(self,
                   df,
                   path,
                   aws_region=constants.aws_region,
                   parquet_compression=constants.parquet_compression):

        constants = DataConstants()

        # is_date = False
        #
        # # Force any date columns to default time units (Parquet with pyarrow has problems with ns dates)
        # for c in df.columns:
        #
        #     # If it's a date column don't append to convert to a float
        #     for d in constants.always_date_columns:
        #         if d in c or 'release-dt' in c:
        #             is_date = True
        #             break
        #
        #     if is_date:
        #         try:
        #             df[c] = pd.to_datetime(df[c], errors='coerce', unit=constants.default_time_units)
        #         except:
        #             pass

        try:
            df.index = pd.to_datetime(df.index,
                                      unit=constants.default_time_units)
        except:
            pass

        if 's3://' in path:
            s3 = pyarrow.fs.S3FileSystem(region=aws_region)
            table = pa.Table.from_pandas(df)

            path_in_s3 = path.replace("s3://", "")

            with s3.open_output_stream(path_in_s3) as f:
                pq.write_table(
                    table,
                    f,
                    compression=parquet_compression,
                    coerce_timestamps=constants.default_time_units,
                    allow_truncated_timestamps=True,
                )

        else:
            # Using pandas.to_parquet, doesn't let us pass in parameters to allow coersion of timestamps
            # ie. ns -> us
            table = pa.Table.from_pandas(df)

            pq.write_table(table,
                           path,
                           compression=parquet_compression,
                           coerce_timestamps=constants.default_time_units,
                           allow_truncated_timestamps=True)
Пример #5
0
    def getLogger(name=None):
        if not name:
            try:
                logging.config.fileConfig(DataConstants().logging_conf)
            except: pass

            log = logging.getLogger();
        elif name not in LoggerManager._loggers.keys():
            try:
                logging.config.fileConfig(DataConstants().logging_conf)
            except: pass

            LoggerManager._loggers[name] = logging.getLogger(str(name))

        log = LoggerManager._loggers[name]

        # when recalling appears to make other loggers disabled
        # hence apply this hack!
        for name in LoggerManager._loggers.keys():
            LoggerManager._loggers[name].disabled = False

        return log
Пример #6
0
    def start_bloomberg_session(self):
        tries = 0

        session = None

        logger = LoggerManager().getLogger(__name__)

        # Try up to 5 times to start a session
        while (tries < 5):
            try:
                # fill SessionOptions
                sessionOptions = blpapi.SessionOptions()
                sessionOptions.setServerHost(DataConstants().bbg_server)
                sessionOptions.setServerPort(DataConstants().bbg_server_port)

                logger.info("Starting Bloomberg session...")

                # create a Session
                session = blpapi.Session(sessionOptions)

                # start a Session
                if not session.start():
                    logger.error("Failed to start session.")
                    return

                logger.info("Returning session...")

                tries = 5
            except:
                tries = tries + 1

        # BBGLowLevelTemplate._session = session

        if session is None:
            logger.error("Failed to start session.")
            return

        return session
Пример #7
0
    def get_instance(cls, data_constants=None):
        if not ConfigManager.__instance:
            with ConfigManager.__lock:
                if not ConfigManager.__instance:
                    ConfigManager.__instance = super(
                        ConfigManager, cls).__new__(ConfigManager)

                    if data_constants is None:
                        data_constants = DataConstants()

                    ConfigManager.__instance.populate_time_series_dictionaries(
                        data_constants=data_constants)

        return ConfigManager.__instance
Пример #8
0
    def process_message(self, msg):

        constants = DataConstants()
        # Process received events

        # SLOW loop (careful, not all the fields will be returned every time
        # hence need to include the field name in the tuple)
        # perhaps try to run in parallel?
        logger = LoggerManager().getLogger(__name__)

        ticker = msg.getElement('securityData').getElement(
            'security').getValue()
        fieldData = msg.getElement('securityData').getElement('fieldData')

        data = defaultdict(dict)

        # FASTER avoid calling getValue/getElement methods in blpapi,
        # very slow, better to cache variables
        for i in range(fieldData.numValues()):
            mini_field_data = fieldData.getValue(i)
            date = mini_field_data.getElement(0).getValue()

            for j in range(1, mini_field_data.numElements()):
                field_value = mini_field_data.getElement(j)

                data[(str(field_value.name()),
                      ticker)][date] = field_value.getValue()

        # ORIGINAL repeated calling getValue/getElement much slower
        # for i in range(fieldData.numValues()):
        #     for j in range(1, fieldData.getValue(i).numElements()):
        #         data[(str(fieldData.getValue(i).getElement(j).name()),
        #         ticker)][fieldData.getValue(i).getElement(0).getValue()] \
        #             = fieldData.getValue(i).getElement(j).getValue()

        data_frame = pd.DataFrame(data)

        # If obsolete ticker could return no values
        if data_frame.empty:
            return None
        else:
            # data_frame.columns = pd.MultiIndex.from_tuples(data,
            # names=['field', 'ticker'])
            data_frame.index = pd.to_datetime(data_frame.index)
            logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) +
                        ' - ' + str(data_frame.index[-1]))

        return data_frame
Пример #9
0
    def fill_options(self, md_request):
        constants = DataConstants()

        options = OptionsBBG()

        options.security = None  # md_request.tickers
        options.startDateTime = md_request.start_date
        options.endDateTime = md_request.finish_date
        options.fields = md_request.fields

        options.overrides = md_request.overrides

        options_list = []

        override_dict = {}

        if md_request.old_tickers is not None:

            ticker_list = []
            # curr_options = OptionsBBG(options_bbg=options)

            ## Special case for GDP where the advance, final and preliminary
            # releases (but can define more in DataConstants)
            # have the same ticker but different overrides
            bbg_keyword_dict_override = constants.bbg_keyword_dict_override

            for tick, old_tick in zip(md_request.tickers,
                                      md_request.old_tickers):
                if old_tick is not None:

                    t = old_tick.lower()

                    # eg. RELEASE_STAGE_OVERRIDE
                    for bbg_override in bbg_keyword_dict_override.keys():

                        keyword_dict = bbg_keyword_dict_override[bbg_override]

                        for bbg_keyword in keyword_dict.keys():

                            # eg. ['gdp', 'advance']
                            keyword = keyword_dict[bbg_keyword]

                            # if this matches a case, we have override
                            if all(k.lower() in t for k in keyword):

                                # In case we have multiple overrides for
                                # this ticker
                                if tick in override_dict:
                                    override_dict[tick][
                                        bbg_override] = bbg_keyword
                                else:
                                    override_dict[tick] = {
                                        bbg_override: bbg_keyword
                                    }

                    ## Add other special cases
                    if tick not in override_dict:
                        override_dict[tick] = {}

            # if ticker_list != []:
            #    curr_options.security = ticker_list

            last_override = {}

            def add_new_options(tick_):
                curr_options = OptionsBBG(options_bbg=options)
                curr_options.security = [tick_]

                if override != {}:
                    curr_options.overrides = override

                options_list.append(curr_options)

            # Combine the securities into a list of options (each with common
            # overrides)
            for tick, override in override_dict.items():
                if override == last_override:

                    if len(options_list) > 0:
                        options_list[-1].security.append(tick)
                    else:
                        add_new_options(tick)
                else:
                    add_new_options(tick)

                last_override = override

            # print('stop')
            # options_list.append(curr_options)
        else:
            options.security = md_request.tickers

            return options

        if len(options_list) == 1:
            return options_list[0]

        return options_list
Пример #10
0
    def load_ticker(self, md_request):
        """Retrieves market data from external data source (in this case 
        Bloomberg)

        Parameters
        ----------
        md_request : MarketDataRequest
            contains all the various parameters detailing time series start 
            and finish, tickers etc

        Returns
        -------
        DataFrame
        """
        constants = DataConstants()

        md_request = MarketDataRequest(md_request=md_request)
        md_request_vendor = self.construct_vendor_md_request(md_request)

        data_frame = None

        logger = LoggerManager().getLogger(__name__)
        logger.info("Request Bloomberg data")

        # Do we need daily or intraday data?
        if (md_request.freq
                in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']):

            # Work out the fields which need to be downloaded via Bloomberg ref request (BDP) and
            # those that can be downloaded via Historical request (BDH)
            ref_fields = []
            ref_vendor_fields = []

            # Get user defined list of BBG fields/vendor fields which need to
            # be downloaded by BDP
            bbg_ref_fields = list(constants.bbg_ref_fields.keys())
            bbg_ref_vendor_fields = list(constants.bbg_ref_fields.values())

            for i in range(0, len(md_request.fields)):
                if md_request.fields[i] in bbg_ref_fields \
                        or md_request_vendor.fields[
                    i] in bbg_ref_vendor_fields:
                    ref_fields.append(md_request.fields[i])
                    ref_vendor_fields.append(md_request_vendor.fields[i])

            non_ref_fields = []
            non_ref_vendor_fields = []

            for i in range(0, len(md_request.fields)):
                if md_request.fields[i] not in bbg_ref_fields \
                        and md_request_vendor.fields[
                    i] not in bbg_ref_vendor_fields:
                    non_ref_fields.append(md_request.fields[i])
                    non_ref_vendor_fields.append(md_request_vendor.fields[i])

            # For certain cases, need to use ReferenceDataRequest
            # eg. for events times/dates, last tradeable date fields (when specified)
            if len(ref_fields) > 0:

                # Careful: make sure you copy the market data request object
                # (when threading, altering that can
                # cause concurrency issues!)
                old_fields = copy.deepcopy(md_request.fields)
                old_vendor_fields = copy.deepcopy(md_request_vendor.fields)

                # md_request = MarketDataRequest(md_request=md_request_copy)

                md_request.fields = ref_fields
                md_request.vendor_fields = ref_vendor_fields
                md_request_vendor = self.construct_vendor_md_request(
                    md_request)

                # Just select those reference fields to download via reference
                datetime_data_frame = self.get_reference_data(
                    md_request_vendor, md_request)

                # Download all the other event or non-ref fields
                # (uses HistoricalDataRequest to Bloomberg)
                # concatenate with date time fields
                if len(non_ref_fields) > 0:

                    md_request.fields = non_ref_fields
                    md_request.vendor_fields = non_ref_vendor_fields
                    md_request_vendor = self.construct_vendor_md_request(
                        md_request)

                    events_data_frame = self.get_daily_data(
                        md_request, md_request_vendor)

                    col = events_data_frame.index.name
                    events_data_frame = events_data_frame.reset_index(
                        drop=False)

                    data_frame = pd.concat(
                        [events_data_frame, datetime_data_frame], axis=1)
                    temp = data_frame[col]
                    del data_frame[col]
                    data_frame.index = temp
                else:
                    data_frame = datetime_data_frame

                md_request.fields = copy.deepcopy(old_fields)
                md_request_vendor.fields = copy.deepcopy(old_vendor_fields)

            # For all other daily/monthly/quarter data, we can use
            # HistoricalDataRequest to Bloomberg
            else:
                data_frame = self.get_daily_data(md_request, md_request_vendor)

                # if data_frame is not None:
                #     # Convert fields with release-dt to dates (special case!) and assume everything else numerical
                #     for c in data_frame.columns:
                #         try:
                #             if 'release-dt' in c:
                #                 data_frame[c] = (data_frame[c]).astype('int').astype(str).apply(
                #                         lambda x: pd.to_datetime(x, format='%Y%m%d'))
                #             else:
                #                 data_frame[c] = pd.to_numeric(data_frame[c])
                #         except:
                #             pass

        # Assume one ticker only for intraday data and use IntradayDataRequest
        # to Bloomberg
        if (md_request.freq
                in ['tick', 'intraday', 'second', 'minute', 'hourly']):
            md_request_vendor.tickers = \
            md_request_vendor.tickers[0]

            if md_request.freq in ['tick', 'second']:
                data_frame = self.download_tick(md_request_vendor)
            else:
                data_frame = self.download_intraday(md_request_vendor)

            if data_frame is not None:
                if data_frame.empty:
                    try:
                        logger.info("No tickers returned for: " +
                                    md_request_vendor.tickers)
                    except:
                        pass

                    return None

                cols = data_frame.columns.values

                import pytz

                try:
                    data_frame = data_frame.tz_localize(pytz.utc)
                except:
                    data_frame = data_frame.tz_convert(pytz.utc)

                cols = md_request.tickers[0] + "." + cols
                data_frame.columns = cols

        logger.info("Completed request from Bloomberg.")

        return data_frame
Пример #11
0
    def read_time_series_cache_from_disk(self,
                                         fname,
                                         engine='hdf5',
                                         start_date=None,
                                         finish_date=None,
                                         db_server=DataConstants().db_server,
                                         db_port=DataConstants().db_port,
                                         username=None,
                                         password=None):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' = reads from bcolz file (not fully implemented)
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not (isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(
                        data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    # for pyarrow
                    context = pa.default_serialization_context()

                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)

                    # is there a compressed key stored?)
                    k = r.keys('comp_*_' + fname_single)

                    # if so, then it means that we have stored it as a compressed object
                    # if have more than 1 element, take the last (which will be the latest to be added)
                    if (len(k) >= 1):
                        k = k[-1].decode('utf-8')

                        comp = r.get(k)

                        siz = int(k.split('_')[1])
                        dec = pa.decompress(comp,
                                            codec='lz4',
                                            decompressed_size=siz)

                        msg = context.deserialize(dec)
                    else:
                        msg = r.get(fname_single)

                        # print(fname_single)
                        if msg is not None:
                            msg = context.deserialize(msg)
                            # logger.warning("Key " + fname_single + " not in Redis cache?")

                except Exception as e:
                    logger.info("Cache not existent for " + fname_single +
                                " in Redis: " + str(e))

                if msg is None:
                    data_frame = None
                else:
                    logger.info('Load Redis cache: ' + fname_single)

                    data_frame = msg  # pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" +
                        str(db_server) + ":" + str(db_port),
                        connect=False
                    )  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" +
                                            str(db_server) + ":" +
                                            str(db_port),
                                            connect=False)

                store = Arctic(c,
                               socketTimeoutMS=socketTimeoutMS,
                               serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                if True:  #try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)

                    else:
                        from arctic.date import DateRange
                        item = library.read(
                            fname_single,
                            date_range=DateRange(
                                start_date.replace(tzinfo=None),
                                finish_date.replace(tzinfo=None)))

                    c.close()

                    logger.info('Read ' + fname_single)

                    data_frame = item.data

                #except Exception as e:
                #    logger.warning('Library may not exist or another error: ' + fname_single + ' & message is ' + str(e))
                #    data_frame = None

            elif os.path.isfile(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif os.path.isfile(fname_single):
                data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list
Пример #12
0
    def process_message(self, msg):

        constants = DataConstants()
        # Process received events

        # SLOW loop (careful, not all the fields will be returned every time hence need to include the field name in the tuple)
        # perhaps try to run in parallel?
        logger = LoggerManager().getLogger(__name__)

        implementation = 'simple'

        if implementation == 'simple':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            data = defaultdict(dict)
            #
            # # FASTER avoid calling getValue/getElement methods in blpapi, very slow, better to cache variables
            for i in range(fieldData.numValues()):
                mini_field_data = fieldData.getValue(i)
                date = mini_field_data.getElement(0).getValue()

                for j in range(1, mini_field_data.numElements()):
                    field_value = mini_field_data.getElement(j)

                    data[(str(field_value.name()),
                          ticker)][date] = field_value.getValue()

            # ORIGINAL repeated calling getValue/getElement much slower
            # for i in range(fieldData.numValues()):
            #     for j in range(1, fieldData.getValue(i).numElements()):
            #         data[(str(fieldData.getValue(i).getElement(j).name()), ticker)][fieldData.getValue(i).getElement(0).getValue()] \
            #             = fieldData.getValue(i).getElement(j).getValue()
        elif implementation == 'py4j':
            pass

            # TODO Py4J
            # from findatapy.market.bbgloop import bbgloop
            # from py4j.java_gateway import JavaGateway

            # gateway = JavaGateway()
            # data = gateway.entry_point.parseFieldDataArray(msg)
        elif implementation == 'cython':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop import bbgloop

            data = bbgloop(fieldData, ticker)
        elif implementation == 'numba':
            ticker = msg.getElement('securityData').getElement(
                'security').getValue()
            fieldData = msg.getElement('securityData').getElement('fieldData')

            from findatapy.market.bbgloop_numba import bbgloop_numba

            data = bbgloop_numba(fieldData, ticker)
            # TODO cython

        data_frame = pd.DataFrame(data)

        # if obsolete ticker could return no values
        if (not (data_frame.empty)):
            # data_frame.columns = pd.MultiIndex.from_tuples(data, names=['field', 'ticker'])
            data_frame.index = pd.to_datetime(data_frame.index)
            logger.info("Read: " + ticker + ' ' + str(data_frame.index[0]) +
                        ' - ' + str(data_frame.index[-1]))
        else:
            return None

        return data_frame
Пример #13
0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
# See the License for the specific language governing permissions and limitations under the License.
#

from findatapy.util.dataconstants import DataConstants
from findatapy.util.loggermanager import LoggerManager

from datetime import timedelta
import datetime

import copy

data_constants = DataConstants()


class MarketDataRequest(object):
    """Provides parameters for requesting market data.

    Includes parameters to define the ticker we'd like to fetch, the start and finish dates for our request, as well as
    the various fields we would like and also the frequency of the data.

    """

    # properties
    #
    # data_source eg. bbg, yahoo, quandl
    # start_date
    # finish_date
Пример #14
0
#

if __name__ == '__main__':
    ###### below line CRUCIAL when running Windows, otherwise multiprocessing
    # doesn't work! (not necessary on Linux)
    from findatapy.util import SwimPool

    SwimPool()

    from findatapy.market import Market, MarketDataRequest, MarketDataGenerator

    market = Market(market_data_generator=MarketDataGenerator())

    from findatapy.util.dataconstants import DataConstants

    eikon_api_key = DataConstants().eikon_api_key

    df = None

    if eikon_api_key is None:
        eikon_api_key = 'TYPE_YOUR_API'

    import datetime
    from datetime import timedelta

    # You need to have Eikon installed and to have a valid licence for this to
    # work

    # For intraday pricing, you can usually access a history back a few months
    # from the current date
    # (if you need older history there are other Refinitiv products like
Пример #15
0
    def populate_time_series_dictionaries(data_constants=None):

        if data_constants is None:
            data_constants = DataConstants()

        # There are several CSV files which contain data on the tickers

        # time_series_tickers_list - contains every tickers (findatapy tickers => vendor tickers)
        # category,	data_source,	freq, tickers, cut, fields, vendor_tickers (from your data provider)
        # eg. fx / bloomberg / daily / EURUSD / TOK / close,open,high,low / EURUSD CMPT Curncy

        # time_series_fields_list - translate findatapy fields name to vendor fields names
        # findatapy fields => vendor fields
        # data_source, fields, vendor_fields

        # time_series_categories_fields - for each category specific generic properties
        # category,	freq, data_source, fields, startdate
        # eg. fx / daily / bloomberg / close,high,low,open / 01-Jan-70

        # eg. bloomberg / close / PX_LAST

        ## Populate tickers list (allow for multiple files)

        if isinstance(data_constants.time_series_tickers_list, str):
            time_series_tickers_list_file = \
                data_constants.time_series_tickers_list.split(
                ";")
        else:
            time_series_tickers_list_file = \
                data_constants.time_series_tickers_list

        df_tickers = []

        for tickers_list_file in time_series_tickers_list_file:

            if os.path.isfile(tickers_list_file):
                # reader = csv.DictReader(open(tickers_list_file))
                df = pd.read_csv(tickers_list_file)
                df = df.dropna(how="all")
                df_tickers.append(df)

                for index, line in df.iterrows():
                    category = line["category"]
                    data_source = line["data_source"]
                    freq_list = line["freq"].split(",")

                    if isinstance(freq_list, str):
                        freq_list = [freq_list]

                    for freq in freq_list:
                        tickers = line["tickers"]
                        cut = line["cut"]
                        vendor_tickers = line["vendor_tickers"]
                        expiry = None

                        try:
                            expiry = line["expiry"]
                        except:
                            pass

                        if category != "":
                            # Conversion from library tickers to vendor vendor_tickers
                            ConfigManager.\
                            _dict_time_series_tickers_list_library_to_vendor[
                                category + "." +
                                data_source + "." +
                                freq + "." +
                                cut + "." +
                                tickers] = vendor_tickers

                            try:
                                if expiry != "":
                                    expiry = parse(expiry)
                                else:
                                    expiry = None
                            except:
                                pass

                            # Library of tickers by category
                            key = category + "." + data_source + "." + freq \
                                  + "." + cut

                            # Conversion from library tickers to library expiry date
                            ConfigManager._dict_time_series_ticker_expiry_date_library_to_library[
                                data_source + "." +
                                tickers] = expiry

                            # Conversion from vendor vendor_tickers to library tickers
                            try:
                                ConfigManager._dict_time_series_tickers_list_vendor_to_library[
                                    key + "." + vendor_tickers] = tickers
                            except:
                                logger.warning(
                                    "Ticker not specified correctly (is some "
                                    "of this missing?) " + str(
                                        key) + "." + str(vendor_tickers))

                            if key in ConfigManager._dict_time_series_category_tickers_library_to_library:
                                ConfigManager._dict_time_series_category_tickers_library_to_library[
                                    key].append(tickers)
                            else:
                                ConfigManager._dict_time_series_category_tickers_library_to_library[
                                    key] = [tickers]

        try:
            df_tickers = pd.concat(df_tickers).sort_values(
                by=["category", "data_source", "freq", "cut"])
        except:
            pass

        try:
            df_tickers = df_tickers.reset_index()
        except:
            pass

        try:
            df_tickers = df_tickers.drop("level_0", axis=1).reset_index()
        except:
            pass

        ConfigManager._data_frame_time_series_tickers = df_tickers

        ## Populate fields conversions
        # reader = csv.DictReader(open(data_constants.time_series_fields_list))
        df = pd.read_csv(data_constants.time_series_fields_list)
        df = df.dropna(how="all")

        for index, line in df.iterrows():
            data_source = line["data_source"]
            fields = line["fields"]
            vendor_fields = line["vendor_fields"]

            # Conversion from vendor vendor_fields to library fields
            ConfigManager._dict_time_series_fields_list_vendor_to_library[
                data_source + "." + vendor_fields] = fields

            # Conversion from library tickers to vendor vendor_fields
            ConfigManager._dict_time_series_fields_list_library_to_vendor[
                data_source + "." + fields] = vendor_fields

        ## Populate categories fields list
        # reader = csv.DictReader(open(data_constants.time_series_categories_fields))
        df = pd.read_csv(data_constants.time_series_categories_fields)
        df = df.dropna(how="all")

        for index, line in df.iterrows():
            category = line["category"]
            data_source = line["data_source"]
            freq = line["freq"]
            cut = line["cut"]
            fields = line["fields"].split(",")  # Can have multiple fields
            startdate = line["startdate"]
            revision_periods = line["revision_periods"]

            if category != "":
                # Conversion from library category to library fields list
                ConfigManager._dict_time_series_category_fields_library_to_library[
                    category + "." + data_source + "." + freq + "." + cut] = fields

                # Conversion from library category to library startdate
                ConfigManager._dict_time_series_category_startdate_library_to_library[
                    category + "." + data_source + "." + freq + "." + cut] = parse(
                    startdate).date()

                # Conversion from library category to library revision periods
                ConfigManager._dict_time_series_category_revision_periods_library_to_library[
                    category + "." + data_source + "." + freq + "." + cut] = revision_periods
Пример #16
0
    def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None,
                                         db_server=DataConstants().db_server,
                                         db_port=DataConstants().db_port, username=None, password=None):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' = reads from bcolz file (not fully implemented)
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not(isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                import redis

                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)
                    msg = r.get(fname_single)

                except:
                    self.logger.info("Cache not existent for " + fname_single + " in Redis")

                if msg is None:
                    data_frame = None
                else:

                    self.logger.info('Load Redis cache: ' + fname_single)

                    data_frame = pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                self.logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                        connect=False)  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

                store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)
                    else:
                        from arctic.date import DateRange
                        item = library.read(fname_single, date_range=DateRange(start_date, finish_date))

                    c.close()

                    self.logger.info('Read ' + fname_single)

                    data_frame = item.data

                except Exception as e:
                    self.logger.warning('Library does not exist: ' + fname_single + ' & message is ' + str(e))
                    data_frame = None

            elif os.path.isfile(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif os.path.isfile(fname_single):
                data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list
Пример #17
0
    @staticmethod
    def convert_vendor_to_library_field(source, sourcefield):
        return ConfigManager._dict_time_series_fields_list_vendor_to_library[
            source + '.' + sourcefield]

    @staticmethod
    def convert_library_to_vendor_field(source, field):
        return ConfigManager._dict_time_series_fields_list_library_to_vendor[
            source + '.' + field]


## test function
if __name__ == '__main__':
    logger = LoggerManager().getLogger(__name__)

    data_constants = DataConstants(
        override_fields={'use_cache_compression': False})

    print(data_constants.use_cache_compression)

    cm = ConfigManager().get_instance()

    categories = cm.get_categories_from_fields()

    logger.info("Categories from fields list")
    print(categories)

    categories = cm.get_categories_from_tickers()

    logger.info("Categories from tickers list")
    print(categories)
Пример #18
0
        # need to specify cache_algo_return
        md_request.cache_algo = "cache_algo_return"

        df = market.fetch_market(md_request)

        print(df)

    if run_example == 3:
        # In this case we are saving predefined daily tickers to disk, and then
        # reading back
        from findatapy.util.dataconstants import DataConstants
        from findatapy.market.ioengine import IOEngine
        import os

        quandl_api_key = DataConstants().quandl_api_key  # change with your own
        # Quandl API key!

        md_request = MarketDataRequest(
            category="fx",
            data_source="quandl",
            freq="daily",
            quandl_api_key=quandl_api_key
        )

        market = Market(market_data_generator=MarketDataGenerator())

        df = market.fetch_market(md_request=md_request)

        print(df)
Пример #19
0
    def write_time_series_cache_to_disk(self, fname, data_frame,
                                        engine='hdf5_fixed', append_data=False, db_server=DataConstants().db_server,
                                        db_port=DataConstants().db_port, username=None, password=None,
                                        filter_out_matching=None, timeout=10):
        """Writes Pandas data frame to disk as HDF5 format or bcolz format or in Arctic

        Parmeters
        ---------
        fname : str
            path of file
        data_frame : DataFrame
            data frame to be written to disk
        engine : str
            'hdf5_fixed' - use HDF5 fixed format, very quick, but cannot append to this
            'hdf5_table' - use HDF5 table format, slower but can append to
            'parquet' - use Parquet
            'arctic' - use Arctic/MongoDB database
            'redis' - use Redis
        append_data : bool
            False - write a fresh copy of data on disk each time
            True - append data to disk
        db_server : str
            Database server for arctic (default: '127.0.0.1')
        timeout : int
            Number of seconds to do timeout
        """

        # default HDF5 format
        hdf5_format = 'fixed'

        if 'hdf5' in engine:
            hdf5_format = engine.split('_')[1]
            engine = 'hdf5'

        if (engine == 'bcolz'):
            # convert invalid characters to substitutes (which Bcolz can't deal with)
            data_frame.columns = self.find_replace_chars(data_frame.columns, _invalid_chars, _replace_chars)
            data_frame.columns = ['A_' + x for x in data_frame.columns]

            data_frame['DTS_'] = pandas.to_datetime(data_frame.index, unit='ns')

            bcolzpath = self.get_bcolz_filename(fname)
            shutil.rmtree(bcolzpath, ignore_errors=True)
            zlens = bcolz.ctable.fromdataframe(data_frame, rootdir=bcolzpath)
        elif (engine == 'redis'):
            import redis

            fname = os.path.basename(fname).replace('.', '_')

            try:
                r = redis.StrictRedis(host=db_server, port=db_port, db=0, socket_timeout=timeout,
                                      socket_connect_timeout=timeout)

                if isinstance(data_frame, pandas.DataFrame):
                    r.set(fname, data_frame.to_msgpack(compress='blosc'))

                self.logger.info("Pushed " + fname + " to Redis")
            except Exception as e:
                self.logger.warning("Couldn't push " + fname + " to Redis: " + str(e))

        elif (engine == 'arctic'):
            from arctic import Arctic
            import pymongo

            socketTimeoutMS = 30 * 1000
            fname = os.path.basename(fname).replace('.', '_')

            self.logger.info('Load Arctic/MongoDB library: ' + fname)

            if username is not None and password is not None:
                c = pymongo.MongoClient(
                    host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                    connect=False)  # , username=username, password=password)
            else:
                c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

            store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS,
                           connectTimeoutMS=socketTimeoutMS)

            database = None

            try:
                database = store[fname]
            except:
                pass

            if database is None:
                store.initialize_library(fname, audit=False)
                self.logger.info("Created MongoDB library: " + fname)
            else:
                self.logger.info("Got MongoDB library: " + fname)

            # Access the library
            library = store[fname]

            if ('intraday' in fname):
                data_frame = data_frame.astype('float32')

            if filter_out_matching is not None:
                cols = data_frame.columns

                new_cols = []

                for col in cols:
                    if filter_out_matching not in col:
                        new_cols.append(col)

                data_frame = data_frame[new_cols]

            # can duplicate values if we have existing dates
            if append_data:
                library.append(fname, data_frame)
            else:
                library.write(fname, data_frame)

            c.close()

            self.logger.info("Written MongoDB library: " + fname)

        elif (engine == 'hdf5'):
            h5_filename = self.get_h5_filename(fname)

            # append data only works for HDF5 stored as tables (but this is much slower than fixed format)
            # removes duplicated entries at the end
            if append_data:
                store = pandas.HDFStore(h5_filename, format=hdf5_format, complib="blosc", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                # get last row which matches and remove everything after that (because append
                # function doesn't check for duplicated rows
                nrows = len(store['data'].index)
                last_point = data_frame.index[-1]

                i = nrows - 1

                while (i > 0):
                    read_index = store.select('data', start=i, stop=nrows).index[0]

                    if (read_index <= last_point): break

                    i = i - 1

                # remove rows at the end, which are duplicates of the incoming time series
                store.remove(key='data', start=i, stop=nrows)
                store.put(key='data', value=data_frame, format=hdf5_format, append=True)
                store.close()
            else:
                h5_filename_temp = self.get_h5_filename(fname + ".temp")

                # delete the old copy
                try:
                    os.remove(h5_filename_temp)
                except:
                    pass

                store = pandas.HDFStore(h5_filename_temp, format=hdf5_format, complib="blosc", complevel=9)

                if ('intraday' in fname):
                    data_frame = data_frame.astype('float32')

                store.put(key='data', value=data_frame, format=hdf5_format)
                store.close()

                # delete the old copy
                try:
                    os.remove(h5_filename)
                except:
                    pass

                # once written to disk rename
                os.rename(h5_filename_temp, h5_filename)

            self.logger.info("Written HDF5: " + fname)

        elif (engine == 'parquet'):
            if fname[-5:] != '.gzip':
                fname = fname + '.gzip'

            data_frame.to_parquet(fname, compression='gzip')

            self.logger.info("Written Parquet: " + fname)
Пример #20
0
    def populate_time_series_dictionaries(data_constants=None):

        if data_constants is None:
            data_constants = DataConstants()

        # There are several CSV files which contain data on the tickers

        # time_series_tickers_list - contains every ticker (findatapy tickers => vendor tickers)
        # category,	source,	freq, ticker, cut, fields, sourceticker (from your data provider)
        # eg. fx / bloomberg / daily / EURUSD / TOK / close,open,high,low / EURUSD CMPT Curncy

        # time_series_fields_list - translate findatapy field name to vendor field names
        # findatapy fields => vendor fields
        # source, field, sourcefield

        # time_series_categories_fields - for each category specific generic properties
        # category,	freq, source, fields, startdate
        # eg. fx / daily / bloomberg / close,high,low,open / 01-Jan-70

        # eg. bloomberg / close / PX_LAST

        ## Populate tickers list (allow for multiple files)
        time_series_tickers_list_file = data_constants.time_series_tickers_list.split(
            ';')

        import os

        for tickers_list_file in time_series_tickers_list_file:

            if os.path.isfile(tickers_list_file):
                reader = csv.DictReader(open(tickers_list_file))

                for line in reader:
                    category = line["category"]
                    source = line["source"]
                    freq_list = line["freq"].split(',')

                    if isinstance(freq_list, str):
                        freq_list = [freq_list]

                    for freq in freq_list:
                        ticker = line["ticker"]
                        cut = line["cut"]
                        sourceticker = line["sourceticker"]
                        expiry = None

                        try:
                            expiry = line['expiry']
                        except:
                            pass

                        if category != "":
                            # print("stop" + category + '.' +
                            #                                                  source + '.' +
                            #                                                  freq + '.' +
                            #                                                  cut + '.' +
                            #                                                  ticker)

                            # conversion from library ticker to vendor sourceticker
                            ConfigManager._dict_time_series_tickers_list_library_to_vendor[
                                category + '.' + source + '.' + freq + '.' +
                                cut + '.' + ticker] = sourceticker

                            try:
                                if expiry != '':
                                    expiry = parse(expiry)
                                else:
                                    expiry = None
                            except:
                                pass

                            # conversion from library ticker to library expiry date
                            ConfigManager._dict_time_series_ticker_expiry_date_library_to_library[
                                source + '.' + ticker] = expiry

                            # conversion from vendor sourceticker to library ticker
                            ConfigManager._dict_time_series_tickers_list_vendor_to_library[
                                source + '.' + sourceticker] = ticker

                            # library of tickers by category
                            key = category + '.' + source + '.' + freq + '.' + cut

                            if key in ConfigManager._dict_time_series_category_tickers_library_to_library:
                                ConfigManager._dict_time_series_category_tickers_library_to_library[
                                    key].append(ticker)
                            else:
                                ConfigManager._dict_time_series_category_tickers_library_to_library[
                                    key] = [ticker]

        ## Populate fields conversions
        reader = csv.DictReader(open(data_constants.time_series_fields_list))

        for line in reader:
            source = line["source"]
            field = line["field"]
            sourcefield = line["sourcefield"]

            # Conversion from vendor sourcefield to library field
            ConfigManager._dict_time_series_fields_list_vendor_to_library[
                source + '.' + sourcefield] = field

            # Conversion from library ticker to vendor sourcefield
            ConfigManager._dict_time_series_fields_list_library_to_vendor[
                source + '.' + field] = sourcefield

        ## Populate categories field list
        reader = csv.DictReader(
            open(data_constants.time_series_categories_fields))

        for line in reader:
            category = line["category"]
            source = line["source"]
            freq = line["freq"]
            cut = line["cut"]
            fields = line["fields"].split(',')  # can have multiple fields
            startdate = line["startdate"]

            if category != "":
                # conversion from library category to library fields list
                ConfigManager._dict_time_series_category_fields_library_to_library[
                    category + '.' + source + '.' + freq + '.' + cut] = fields

                # conversion from library category to library startdate
                ConfigManager._dict_time_series_category_startdate_library_to_library[
                    category + '.' + source + '.' + freq + '.' +
                    cut] = parse(startdate).date()
Пример #21
0
 def create_cache_file_name(self, filename):
     return DataConstants().folder_time_series_data + "/" + filename
Пример #22
0
    def populate_time_series_dictionaries():

        # there are several CSV files which contain data on the tickers

        # time_series_tickers_list - contains every ticker (findatapy tickers => vendor tickers)
        # category,	source,	freq, ticker, cut, fields, sourceticker (from your data provider)
        # eg. fx / bloomberg / daily / EURUSD / TOK / close,open,high,low / EURUSD CMPT Curncy

        # time_series_fields_list - translate findatapy field name to vendor field names
        # findatapy fields => vendor fields
        # source, field, sourcefield

        # time_series_categories_fields - for each category specific generic properties
        # category,	freq, source, fields, startdate
        # eg. fx / daily / bloomberg / close,high,low,open / 01-Jan-70

        # eg. bloomberg / close / PX_LAST

        ## populate tickers list (allow for multiple files)
        time_series_tickers_list_file = DataConstants(
        ).time_series_tickers_list.split(';')

        for tickers_list_file in time_series_tickers_list_file:
            reader = csv.DictReader(open(tickers_list_file))

            for line in reader:
                category = line["category"]
                source = line["source"]
                freq = line["freq"]
                ticker = line["ticker"]
                cut = line["cut"]
                sourceticker = line["sourceticker"]

                if category == "":
                    # print("stop")
                    pass

                # conversion from library ticker to vendor sourceticker
                ConfigManager._dict_time_series_tickers_list_library_to_vendor[
                    category + '.' + source + '.' + freq + '.' + cut + '.' +
                    ticker] = sourceticker

                # conversion from vendor sourceticker to library ticker
                ConfigManager._dict_time_series_tickers_list_vendor_to_library[
                    source + '.' + sourceticker] = ticker

                # library of tickers by category
                key = category + '.' + source + '.' + freq + '.' + cut

                if key in ConfigManager._dict_time_series_category_tickers_library_to_library:
                    ConfigManager._dict_time_series_category_tickers_library_to_library[
                        key].append(ticker)
                else:
                    ConfigManager._dict_time_series_category_tickers_library_to_library[
                        key] = [ticker]

        ## populate fields conversions
        reader = csv.DictReader(open(DataConstants().time_series_fields_list))

        for line in reader:
            source = line["source"]
            field = line["field"]
            sourcefield = line["sourcefield"]

            # conversion from vendor sourcefield to library field
            ConfigManager._dict_time_series_fields_list_vendor_to_library[
                source + '.' + sourcefield] = field

            # conversion from library ticker to vendor sourcefield
            ConfigManager._dict_time_series_fields_list_library_to_vendor[
                source + '.' + field] = sourcefield

        ## populate categories field list
        reader = csv.DictReader(
            open(DataConstants().time_series_categories_fields))

        for line in reader:
            category = line["category"]
            source = line["source"]
            freq = line["freq"]
            cut = line["cut"]
            fields = line["fields"].split(',')  # can have multiple fields
            startdate = line["startdate"]

            # conversion from library category to library fields list
            ConfigManager._dict_time_series_category_fields_library_to_library[
                category + '.' + source + '.' + freq + '.' + cut] = fields

            # conversion from library category to library startdate
            ConfigManager._dict_time_series_category_startdate_library_to_library[
                category + '.' + source + '.' + freq + '.' +
                cut] = parse(startdate).date()
Пример #23
0
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied_vol.
#
# See the License for the specific language governing permissions and limitations under the License.
#

from chartpy import Chart, Style, ChartConstants
from findatapy.util.dataconstants import DataConstants
from findatapy.market import Market, MarketDataRequest, MarketDataGenerator
from findatapy.timeseries import Calculations

import datetime

from datetime import timedelta

dataconstants = DataConstants()


class QuickChart(object):
    """Displays charts from downloaded data, in a single line code call. Ideal for quickly generating charts from sources
    including Bloomberg, Quandl, ALFRED/FRED etc.

    """
    def __init__(self,
                 engine='plotly',
                 data_source='bloomberg',
                 market_data_generator=MarketDataGenerator()):
        self._chart = Chart(engine=engine)
        self._market = Market(market_data_generator=market_data_generator)
        self._data_source = data_source
Пример #24
0
    def populate_time_series_dictionaries(data_constants=None):

        if data_constants is None:
            data_constants = DataConstants()

        # There are several CSV files which contain data on the tickers

        # time_series_tickers_list - contains every tickers (findatapy tickers => vendor tickers)
        # category,	data_source,	freq, tickers, cut, fields, vendor_tickers (from your data provider)
        # eg. fx / bloomberg / daily / EURUSD / TOK / close,open,high,low / EURUSD CMPT Curncy

        # time_series_fields_list - translate findatapy fields name to vendor fields names
        # findatapy fields => vendor fields
        # data_source, fields, vendor_fields

        # time_series_categories_fields - for each category specific generic properties
        # category,	freq, data_source, fields, startdate
        # eg. fx / daily / bloomberg / close,high,low,open / 01-Jan-70

        # eg. bloomberg / close / PX_LAST

        ## Populate tickers list (allow for multiple files)

        if isinstance(data_constants.time_series_tickers_list, str):
            time_series_tickers_list_file = data_constants.time_series_tickers_list.split(
                ';')
        else:
            time_series_tickers_list_file = data_constants.time_series_tickers_list

        df_tickers = []

        for tickers_list_file in time_series_tickers_list_file:

            if os.path.isfile(tickers_list_file):
                reader = csv.DictReader(open(tickers_list_file))
                df_tickers.append(pd.read_csv(tickers_list_file))

                for line in reader:
                    category = line["category"]
                    data_source = line["data_source"]
                    freq_list = line["freq"].split(',')

                    if isinstance(freq_list, str):
                        freq_list = [freq_list]

                    for freq in freq_list:
                        tickers = line["tickers"]
                        cut = line["cut"]
                        vendor_tickers = line["vendor_tickers"]
                        expiry = None

                        try:
                            expiry = line['expiry']
                        except:
                            pass

                        if category != "":
                            # print("stop" + category + '.' +
                            #                                                  data_source + '.' +
                            #                                                  freq + '.' +
                            #                                                  cut + '.' +
                            #                                                  tickers)

                            # Conversion from library tickers to vendor vendor_tickers
                            ConfigManager._dict_time_series_tickers_list_library_to_vendor[
                                category + '.' + data_source + '.' + freq +
                                '.' + cut + '.' + tickers] = vendor_tickers

                            try:
                                if expiry != '':
                                    expiry = parse(expiry)
                                else:
                                    expiry = None
                            except:
                                pass

                            # Library of tickers by category
                            key = category + '.' + data_source + '.' + freq + '.' + cut

                            # Conversion from library tickers to library expiry date
                            ConfigManager._dict_time_series_ticker_expiry_date_library_to_library[
                                data_source + '.' + tickers] = expiry

                            # Conversion from vendor vendor_tickers to library tickers
                            ConfigManager._dict_time_series_tickers_list_vendor_to_library[
                                key + '.' + vendor_tickers] = tickers

                            if key in ConfigManager._dict_time_series_category_tickers_library_to_library:
                                ConfigManager._dict_time_series_category_tickers_library_to_library[
                                    key].append(tickers)
                            else:
                                ConfigManager._dict_time_series_category_tickers_library_to_library[
                                    key] = [tickers]

        try:
            df_tickers = pd.concat(df_tickers).sort_values(
                by=['category', 'data_source', 'freq', 'cut'])
        except:
            pass

        try:
            df_tickers = df_tickers.reset_index()
        except:
            pass

        try:
            df_tickers = df_tickers.drop('level_0', axis=1).reset_index()
        except:
            pass

        ConfigManager._data_frame_time_series_tickers = df_tickers

        ## Populate fields conversions
        reader = csv.DictReader(open(data_constants.time_series_fields_list))

        for line in reader:
            data_source = line["data_source"]
            fields = line["fields"]
            vendor_fields = line["vendor_fields"]

            # Conversion from vendor vendor_fields to library fields
            ConfigManager._dict_time_series_fields_list_vendor_to_library[
                data_source + '.' + vendor_fields] = fields

            # Conversion from library tickers to vendor vendor_fields
            ConfigManager._dict_time_series_fields_list_library_to_vendor[
                data_source + '.' + fields] = vendor_fields

        ## Populate categories fields list
        reader = csv.DictReader(
            open(data_constants.time_series_categories_fields))

        for line in reader:
            category = line["category"]
            data_source = line["data_source"]
            freq = line["freq"]
            cut = line["cut"]
            fields = line["fields"].split(',')  # Can have multiple fields
            startdate = line["startdate"]
            revision_periods = line["revision_periods"]

            if category != "":
                # Conversion from library category to library fields list
                ConfigManager._dict_time_series_category_fields_library_to_library[
                    category + '.' + data_source + '.' + freq + '.' +
                    cut] = fields

                # Conversion from library category to library startdate
                ConfigManager._dict_time_series_category_startdate_library_to_library[
                    category + '.' + data_source + '.' + freq + '.' +
                    cut] = parse(startdate).date()

                # Conversion from library category to library revision periods
                ConfigManager._dict_time_series_category_revision_periods_library_to_library[
                    category + '.' + data_source + '.' + freq + '.' +
                    cut] = revision_periods