def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        time_series_calcs = TimeSeriesCalcs()

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants().time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series, time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        # import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            if data_frame_group is not None:
                data_frame_agg = time_series_calcs.pandas_outer_join(data_frame_group)

            # for data_frame_single in data_frame_group:
            #     # if you call for returning multiple tickers, be careful with memory considerations!
            #     if data_frame_single is not None:
            #         if data_frame_agg is not None:
            #             data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
            #         else:
            #             data_frame_agg = data_frame_single

        return data_frame_agg
示例#2
0
    def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        time_series_calcs = TimeSeriesCalcs()

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants(
        ).time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[
                time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series,
                                time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        # import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            if data_frame_group is not None:
                data_frame_agg = time_series_calcs.pandas_outer_join(
                    data_frame_group)

            # for data_frame_single in data_frame_group:
            #     # if you call for returning multiple tickers, be careful with memory considerations!
            #     if data_frame_single is not None:
            #         if data_frame_agg is not None:
            #             data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
            #         else:
            #             data_frame_agg = data_frame_single

        return data_frame_agg
    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None
        time_series_calcs = TimeSeriesCalcs()

        ticker_cycle = 0

        data_frame_group = []

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(time_series_request.tickers) == 1 or Constants().time_series_factory_thread_no['other'] == 1:
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = loader.load_ticker(time_series_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        data_frame_group.append(data_frame_single)

                        # # if you call for returning multiple tickers, be careful with memory considerations!
                        # if data_frame_agg is not None:
                        #     data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        # else:
                        #     data_frame_agg = data_frame_single

                # key = self.create_category_key(time_series_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)


            # if you call for returning multiple tickers, be careful with memory considerations!
            if data_frame_group is not None:
                data_frame_agg = time_series_calcs.pandas_outer_join(data_frame_group)

            return data_frame_agg
        else:
            time_series_request_list = []

            # create a list of TimeSeriesRequests
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [time_series_request.vendor_tickers[ticker_cycle]]
                    ticker_cycle = ticker_cycle + 1

                time_series_request_list.append(time_series_request_single)

            return self.fetch_group_time_series(time_series_request_list)
示例#4
0
    def download_intraday_tick(self, time_series_request, loader):
        """
        download_intraday_tick - Loads intraday time series from specified data provider

        Parameters
        ----------
        time_series_request : TimeSeriesRequest
            contains various properties describing time series to fetched, including ticker, start & finish date etc.

        Returns
        -------
        pandas.DataFrame
        """

        data_frame_agg = None
        time_series_calcs = TimeSeriesCalcs()

        ticker_cycle = 0

        data_frame_group = []

        # single threaded version
        # handle intraday ticker calls separately one by one
        if len(time_series_request.tickers) == 1 or Constants(
        ).time_series_factory_thread_no['other'] == 1:
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                # we downscale into float32, to avoid memory problems in Python (32 bit)
                # data is stored on disk as float32 anyway
                data_frame_single = loader.load_ticker(
                    time_series_request_single)

                # if the vendor doesn't provide any data, don't attempt to append
                if data_frame_single is not None:
                    if data_frame_single.empty == False:
                        data_frame_single.index.name = 'Date'
                        data_frame_single = data_frame_single.astype('float32')

                        data_frame_group.append(data_frame_single)

                        # # if you call for returning multiple tickers, be careful with memory considerations!
                        # if data_frame_agg is not None:
                        #     data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
                        # else:
                        #     data_frame_agg = data_frame_single

                # key = self.create_category_key(time_series_request, ticker)
                # fname = self.create_cache_file_name(key)
                # self._time_series_cache[fname] = data_frame_agg  # cache in memory (disable for intraday)

            # if you call for returning multiple tickers, be careful with memory considerations!
            if data_frame_group is not None:
                data_frame_agg = time_series_calcs.pandas_outer_join(
                    data_frame_group)

            return data_frame_agg
        else:
            time_series_request_list = []

            # create a list of TimeSeriesRequests
            for ticker in time_series_request.tickers:
                time_series_request_single = copy.copy(time_series_request)
                time_series_request_single.tickers = ticker

                if hasattr(time_series_request, 'vendor_tickers'):
                    time_series_request_single.vendor_tickers = [
                        time_series_request.vendor_tickers[ticker_cycle]
                    ]
                    ticker_cycle = ticker_cycle + 1

                time_series_request_list.append(time_series_request_single)

            return self.fetch_group_time_series(time_series_request_list)