示例#1
0
class Generator:
    def __init__(self, corpus, sig_order, **params):
        self.corpus = corpus
        self.order = sig_order

        # Model parameters
        n_latent = params.get("n_latent", 8)
        alpha = params.get("alpha", 0.003)

        self._build_dataset()
        self.generator = CVAE(n_latent=n_latent, alpha=alpha)


    def _logsig(self, path):
        return tosig.stream2logsig(path, self.order)

    def _build_dataset(self):
        self.logsigs = np.array([self._logsig(path) for path in tqdm(self.corpus, desc="Computing log-signatures")])

        self.scaler = MinMaxScaler(feature_range=(0.00001, 0.99999))
        self.logsigs_norm = self.scaler.fit_transform(self.logsigs)

    def train(self, n_epochs=10000):
        self.generator.train(self.logsigs_norm, data_cond=None, n_epochs=n_epochs)

    def generate(self, n_samples=None, normalised=False):
        generated = self.generator.generate(cond=None, n_samples=n_samples)

        if normalised:
            return generated

        if n_samples is None:
            return self.scaler.inverse_transform(generated.reshape(1, -1))[0]

        return self.scaler.inverse_transform(generated)
class MarketGenerator:
    def __init__(self,
                 ticker,
                 start=datetime.date(2000, 1, 1),
                 end=datetime.date(2019, 1, 1),
                 freq="M",
                 sig_order=4,
                 rough_bergomi=None):

        self.ticker = ticker
        self.start = start
        self.end = end
        self.freq = freq
        self.order = sig_order

        if rough_bergomi:
            self._load_rough_bergomi(rough_bergomi)
        else:
            self._load_data()

        self._build_dataset()
        self.generator = CVAE(n_latent=8, alpha=0.003)

    def _load_rough_bergomi(self, params):
        grid_points_dict = {"M": 28, "W": 5, "Y": 252}
        grid_points = grid_points_dict[self.freq]
        params["T"] = grid_points / grid_points_dict["Y"]

        paths = rough_bergomi(grid_points, **params)

        self.windows = [leadlag(path) for path in paths]

    def _load_data(self):
        try:
            self.data = pdr.get_data_yahoo(self.ticker, self.start,
                                           self.end)["Close"]
        except:
            raise RuntimeError(
                f"Could not download data for {self.ticker} from {self.start} to {self.end}."
            )

        self.windows = []
        for _, window in self.data.resample(self.freq):
            values = window.values  # / window.values[0]
            path = leadlag(values)

            self.windows.append(path)

    def _logsig(self, path):
        return tosig.stream2logsig(path, self.order)

    def _build_dataset(self):
        if self.order:
            self.orig_logsig = np.array([
                self._logsig(path)
                for path in tqdm(self.windows, desc="Computing log-signatures")
            ])
        else:
            self.orig_logsig = np.array(
                [np.diff(np.log(path[::2, 1])) for path in self.windows])

            self.orig_logsig = np.array(
                [p for p in self.orig_logsig if len(p) >= 4])
            steps = min(map(len, self.orig_logsig))
            self.orig_logsig = np.array(
                [val[:steps] for val in self.orig_logsig])

        self.scaler = MinMaxScaler(feature_range=(0.00001, 0.99999))
        logsig = self.scaler.fit_transform(self.orig_logsig)

        self.logsigs = logsig[1:]
        self.conditions = logsig[:-1]

    def train(self, n_epochs=10000):
        self.generator.train(self.logsigs, self.conditions, n_epochs=n_epochs)

    def generate(self, logsig, n_samples=None, normalised=False):
        generated = self.generator.generate(logsig, n_samples=n_samples)

        if normalised:
            return generated

        if n_samples is None:
            return self.scaler.inverse_transform(generated.reshape(1, -1))[0]

        return self.scaler.inverse_transform(generated)