Пример #1
0
    def _get_benchmark(self, prices):
        field = None
        fields = prices.index.get_level_values("Field").unique()
        candidate_fields = ("Close", "Open", "Bid", "Ask", "High", "Low")
        for candidate in candidate_fields:
            if candidate in fields:
                field = candidate
                break
            else:
                raise MoonshotParameterError(
                    "Cannot extract BENCHMARK {0} from {1} without one of {2}".
                    format(self.BENCHMARK, self.CODE,
                           ", ".join(candidate_fields)))
        try:
            benchmark = prices.loc[field][self.BENCHMARK]
        except KeyError:
            raise MoonshotError(
                "{0} BENCHMARK ConId {1} is not in backtest data".format(
                    self.CODE, self.BENCHMARK))

        if "Time" in prices.index.names:
            if not self.BENCHMARK_TIME:
                raise MoonshotParameterError(
                    "Cannot extract BENCHMARK {0} from {1} because prices contains intraday "
                    "prices but no BENCHMARK_TIME specified".format(
                        self.BENCHMARK, self.CODE))
            try:
                benchmark = benchmark.xs(self.BENCHMARK_TIME, level="Time")
            except KeyError:
                raise MoonshotError(
                    "{0} BENCHMARK_TIME {1} is not in backtest data".format(
                        self.CODE, self.BENCHMARK_TIME))

        return pd.DataFrame(benchmark)
Пример #2
0
    def _infer_timezone(self, prices):
        """
        Infers the strategy timezone from the component securities if possible.
        """
        if "Timezone" not in prices.index.get_level_values("Field"):
            raise MoonshotParameterError(
                "Cannot infer strategy timezone because Timezone field is missing, "
                "please set TIMEZONE parameter or include Timezone in MASTER_FIELDS"
            )

        timezones = prices.loc["Timezone"].stack().unique()

        if len(timezones) > 1:
            raise MoonshotParameterError(
                "cannot infer strategy timezone because multiple timezones are present "
                "in data, please set TIMEZONE parameter explicitly (timezones: {0})"
                .format(", ".join(timezones)))

        return timezones[0]
Пример #3
0
    def _load_model(self):
        """
        Loads a model from file, either using joblib or pickle or keras.
        """
        if not self.MODEL:
            raise MoonshotParameterError("please specify a model file")

        if "joblib" in self.MODEL:
            self.model = joblib.load(self.MODEL)
        elif "keras.h5" in self.MODEL:
            from keras.models import load_model
            self.model = load_model(self.MODEL)
        else:
            with open(self.MODEL, "rb") as f:
                self.model = pickle.load(f)
Пример #4
0
    def get_historical_prices(self,
                              start_date,
                              end_date=None,
                              nlv=None,
                              max_cache=None):
        """
        Downloads historical prices from a history db. Downloads security
        details from the master db and broadcasts the values to be shaped
        like the historical prices.
        """
        if start_date:
            start_date = self._get_start_date_with_lookback(start_date)

        dbs = self.DB
        if not isinstance(dbs, (list, tuple)):
            dbs = [self.DB]

        db_universes = set()
        db_bar_sizes = set()
        for db in dbs:
            db_config = get_db_config(db)
            universes = db_config.get("universes", None)
            if universes:
                db_universes.update(set(universes))
            bar_size = db_config.get("bar_size")
            db_bar_sizes.add(bar_size)

        db_universes = list(db_universes)
        db_bar_sizes = list(db_bar_sizes)

        if len(db_bar_sizes) > 1:
            raise MoonshotParameterError(
                "databases must contain same bar size but have different bar sizes "
                "(databases: {0}; bar sizes: {1})".format(
                    ", ".join(dbs), ", ".join(db_bar_sizes)))

        all_prices = []

        for db in dbs:

            kwargs = dict(start_date=start_date,
                          end_date=end_date,
                          universes=self.UNIVERSES,
                          conids=self.CONIDS,
                          exclude_universes=self.EXCLUDE_UNIVERSES,
                          exclude_conids=self.EXCLUDE_CONIDS,
                          times=self.DB_TIME_FILTERS,
                          cont_fut=self.CONT_FUT,
                          fields=self.DB_FIELDS,
                          tz_naive=False)

            if max_cache:
                prices = HistoryCache.load(db, kwargs, max_cache)

                if prices is not None:
                    all_prices.append(prices)
                    continue

            if max_cache:
                f = HistoryCache.get_filepath(db, kwargs)
            else:
                f = io.StringIO()
            download_history_file(db, f, **kwargs)

            prices = pd.read_csv(f)
            all_prices.append(prices)

        prices = pd.concat(all_prices)

        prices = prices.pivot(index="ConId", columns="Date").T
        prices.index.set_names(["Field", "Date"], inplace=True)

        # Next, get the master file
        universes = self.UNIVERSES
        conids = self.CONIDS
        if not conids and not universes:
            universes = db_universes
            if not universes:
                conids = list(prices.columns)

        f = io.StringIO()
        download_master_file(f,
                             conids=conids,
                             universes=universes,
                             exclude_conids=self.EXCLUDE_CONIDS,
                             exclude_universes=self.EXCLUDE_UNIVERSES,
                             fields=self.MASTER_FIELDS)
        securities = pd.read_csv(f, index_col="ConId")

        nlv = nlv or self._get_nlv()
        if nlv:
            missing_nlvs = set(securities.Currency) - set(nlv.keys())
            if missing_nlvs:
                raise ValueError(
                    "NLV dict is missing values for required currencies: {0}".
                    format(", ".join(missing_nlvs)))

            securities['Nlv'] = securities.apply(
                lambda row: nlv.get(row.Currency, None), axis=1)

        # Append securities, indexed to the min date, to allow easy ffill on demand
        securities = pd.DataFrame(securities.T, columns=prices.columns)
        securities.index.name = "Field"
        idx = pd.MultiIndex.from_product(
            (securities.index, [prices.index.get_level_values("Date").min()]),
            names=["Field", "Date"])

        securities = securities.reindex(index=idx, level="Field")
        prices = pd.concat((prices, securities))

        timezone = self.TIMEZONE or self._infer_timezone(prices)

        dates = pd.to_datetime(prices.index.get_level_values("Date"), utc=True)
        dates = dates.tz_convert(timezone)

        prices.index = pd.MultiIndex.from_arrays(
            (prices.index.get_level_values("Field"), dates),
            names=("Field", "Date"))

        # Split date and time
        dts = prices.index.get_level_values("Date")
        dates = pd.to_datetime(dts.date)
        dates.tz = timezone
        prices.index = pd.MultiIndex.from_arrays(
            (prices.index.get_level_values("Field"), dates,
             dts.strftime("%H:%M:%S")),
            names=["Field", "Date", "Time"])

        if db_bar_sizes[0] in ("1 day", "1 week", "1 month"):
            prices.index = prices.index.droplevel("Time")

        return prices