Exemplo n.º 1
0
def fbprophet_univariate_best_r() -> float:
    """
      :return:  Provides best r for a randomly chosen data stream
                Takes about 12 hrs to run
    """
    mr = MicroReader()
    names = mr.get_stream_names()
    okay = False
    while not okay:
        name = random.choice(names)
        n_obs = len(mr.get_lagged_values(name=name))
        okay = n_obs > PROPHET_META['n_warm'] + 50 and '~' not in name
    url = 'https://www.microprediction.org/stream_dashboard.html?stream=' + name.replace(
        '.json', '')
    print('We will find the best fbprophet hyper-parameters for ' + url)
    print('There are ' + str(n_obs) + ' observations in the series.')
    print(
        "Prophet will be fit for most of them, after a burn_in, and for many different hyper-params. Don't hold your breathe."
    )

    best_r, best_value, info = optimal_r_for_stream(
        f=fbprophet_univariate_r2,
        name=name,
        k=10,
        optimizer=dlib_default_cube,
        n_burn=PROPHET_META['n_warm'] + 20,
        n_trials=50,
        n_dim=2)
    pprint(info)
    params = prophet_params(r=best_r, dim=2)
    pprint(params)
Exemplo n.º 2
0
 def stream_data(name: str, n_obs: int):
     """ values and times for a univariate stream """
     mr = MicroReader()
     lagged_values, lagged_times = mr.get_lagged_values_and_times(
         name=name, count=n_obs)
     y, t = list(reversed(lagged_values)), list(reversed(lagged_times))
     return y, t
Exemplo n.º 3
0
 def random_regular(min_len=500):
     """ Randomly selected univariate series
     :return:  y, t, url
     """
     mr = MicroReader()
     names = mr.get_stream_names()
     okay = False
     while not okay:
         name = random.choice(names)
         n_obs = len(mr.get_lagged_values(name=name,count=10000))
         okay = n_obs > min_len and '~' not in name
     url = 'https://www.microprediction.org/stream_dashboard.html?stream=' +name.replace('.json','')
     return name, url
Exemplo n.º 4
0
    def next_opinionated_forecast(n_train, k, n_recent, multiple, name=None):
        while True:
            try:
                from microprediction import MicroReader
                n_obs = 0
                while n_obs < 1000:
                    mr = MicroReader()
                    if name is None:
                        names = mr.get_stream_names()
                        random_name = random.choice(names)
                    else:
                        random_name = name
                    lag_values, lag_times = mr.get_lagged_values_and_times(
                        name=random_name, count=2000)
                    y = list(reversed(lag_values))
                    t = list(reversed(lag_times))
                    n_obs = len(y)
            except ImportError:
                from timemachines.skatertools.data import hospital
                y = hospital()
                t = [15 * 60 * i for i in range(len(y))]
                name = 'hospital'

            for i in [100 * j for j in range(10)]:
                print('Looking at ' + random_name + ' ' + str(i) + '/1000')
                if len(y) > i + 2 * k + n_train:
                    y_fit = y[i:i + n_train]
                    t_fit = t[i:i + n_train]
                    y_hats, _, forecast, m = prophet_iskater_factory(y=y_fit,
                                                                     k=k,
                                                                     t=t_fit)
                    if is_opinonated(y=y_fit,
                                     forecast=forecast,
                                     k=k,
                                     n_recent=n_recent,
                                     multiple=multiple):
                        y_3avg = np.mean(
                            y[i + n_train - 3:i +
                              n_train])  # avg of last three points
                        y_true_mean = np.mean(
                            y[i + n_train + k - 1:i + n_train + k +
                              1])  # avg of 3 future points
                        error = (y_hats[-1] - y_true_mean) / abs(0.01 + y_3avg)
                        avg_error = (y_3avg - y_true_mean) / abs(0.01 + y_3avg)
                        return forecast, m, random_name, error, avg_error, y[
                            i + n_train:i + n_train + k]
            print(random_name + ' is okay.')

        return None, None, None
Exemplo n.º 5
0
 def random_stream_name(min_len=500, exclude_str=None, include_str=None):
     """ Randomly selected univariate series
             :return:  y, t, url
             """
     mr = MicroReader()
     names = mr.get_stream_names()
     okay = False
     while not okay:
         name = random.choice(names)
         n_obs = len(mr.get_lagged_values(name=name, count=10000))
         okay = True
         if exclude_str is not None and exclude_str in name:
             okay = False
         if include_str is not None and include_str not in name:
             okay = False
         if n_obs < min_len:
             okay = False
     url = 'https://www.microprediction.org/stream_dashboard.html?stream=' + name.replace(
         '.json', '')
     return name, url
Exemplo n.º 6
0
 def optimal_r_for_stream(
     f,
     name: str,
     k: int,
     evaluator=None,
     optimizer=None,
     n_trials=None,
     n_dim=None,
     n_burn: int = None,
     test_objective_first=True
 ) -> (float, float, dict):  # best_r, best_val, info
     """  Find the best hyper-parameters for a univariate skater using live from www.microprediction.org
     :param f:
     :param name:     Choose from https://www.microprediction.org/browse_streams.html but add '.json' to the end
     :param k:
     :param evaluator:
     :param optimizer:
     :param n_trials:
     :param n_dim:
     :param n_burn:
     :param test_objective_first:
     :return: best_r, best_value, info
     """
     mr = MicroReader()
     lagged_values, lagged_times = mr.get_lagged_values_and_times(name=name)
     t = list(reversed(lagged_times))
     y = list(reversed(lagged_values))
     return optimal_r(f=f,
                      y=y,
                      k=k,
                      a=None,
                      t=t,
                      e=None,
                      evaluator=evaluator,
                      optimizer=optimizer,
                      n_trials=n_trials,
                      n_dim=n_dim,
                      n_burn=n_burn,
                      test_objective_first=test_objective_first)
from microprediction import MicroReader
import random

# Live tests
mr = MicroReader(base_url='https://devapi.microprediction.org')
TEST_STREAM = random.choice(list(mr.get_streams().items()))[0]
DIE = 'die.json'
print(TEST_STREAM)


def test_getters():
    """ Not a great test :) """
    assert mr.get_current_value(DIE)
    assert len(mr.get_lagged_values(DIE)) > 10
    for delay in mr.DELAYS:
        p1 = mr.get_discrete_pdf_lagged(name=DIE, delay=delay)
        assert p1 is not None
        if p1.get('x'):
            assert abs(p1['y'][0] - 0.1666
                       ) < 0.1, "Oh man this die market is so inefficient!"


def test_z_getters():
    zs = mr.get_lagged_zvalues(name='z2~copula_x~copula_y~70.json', count=3)
    ps = mr.get_lagged_copulas(name='z2~copula_x~copula_y~70.json', count=3)
    assert len(zs) == 3
    assert len(zs[0]) == 2
    assert len(ps) == 3
    assert len(ps[0]) == 2
    assert ps[0][0] <= 1
    assert ps[0][0] >= 0
Exemplo n.º 8
0
def plot_helicopter_lags():
    """ Plot a subset of the SciML helicopter challenge data .. psi only """
    mr = MicroReader()
    xs = mr.get_lagged_values('helicopter_psi.json')
    plt.plot(xs)
Exemplo n.º 9
0
# A collection of mostly standalone functions illustrating copula functionality at dev.microprediction.org

# See also https://github.com/microprediction/PDCI/blob/master/helicopula.ipynb
# See article at https://www.linkedin.com/pulse/helicopulas-peter-cotton-phd/

import pandas as pd
import matplotlib.pyplot as plt
from microprediction import MicroReader, MicroWriter, new_key
from copulas.multivariate import GaussianMultivariate
import numpy as np
from pprint import pprint

mr = MicroReader()


def get_wind_z2():
    """ Retrieve bivariate wind data lagged values """
    HELISTREAM = 'z2~seattle_wind_direction~seattle_wind_speed~70.json'
    lagged_values = mr.get_lagged_values(name=HELISTREAM)
    return lagged_values


def plot_helicopter_data():
    """ Plot SciML helicopter challenge data """
    pd.read_csv(
        'https://raw.githubusercontent.com/SciML/HelicopterSciML.jl/master/data/Lab-Helicopter_Experimental-data.csv'
    ).plot()


def plot_helicopter_lags():
    """ Plot a subset of the SciML helicopter challenge data .. psi only """
Exemplo n.º 10
0
def test_internet():
    mr = MicroReader()
    streams = mr.get_sponsors()
    assert len(streams) > 10
Exemplo n.º 11
0
def test_imports():
    mr = MicroReader()
    values = mr.get_lagged_values(name='cop.json')
    assert len(values)>500
Exemplo n.º 12
0
from microprediction.univariate.expnormdist import ExpNormDist, DEFAULT_EXPNORM_LOWER
import os
import numpy as np
from copy import deepcopy
import time
from microprediction import MicroReader

mr = MicroReader()
STREAMS = mr.get_stream_names()


DEFAULT_EXPNORM_PARAMS = {'g1': 0.5, 'g2': 5.0, 'logK': -2., 'loc': 0.0, 'logScale': 0.0}
DEFAULT_EXPNORM_LOWER = {'g1': 0.001, 'g2': 0.001, 'logK': -5, 'loc': -0.15, 'logScale': -4}
DEFAULT_EXPNORM_UPPER = {'g1': 1.0, 'g2': 15.0, 'logK': 1, 'loc': 0.15, 'logScale': 4.0}
OFFLINE_EXPNORM_HYPER = {'lower_bounds': deepcopy(DEFAULT_EXPNORM_LOWER),
                         'upper_bounds': deepcopy(DEFAULT_EXPNORM_UPPER),
                         'space': None, 'algo': None, 'max_evals': 3}


class ExpNormAccumulator(ExpNormDist):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def anchors(self, lagged_values, lagged_times):
        def post_getter(state,value):
            return state['anchor']
        return self.replay(lagged_values=lagged_values, lagged_times=lagged_times, post_getter=post_getter)

    def manual_loss(self, lagged_values, lagged_times, params, state, burn_in=10):
        """ Loss function for a series of values, calculated manually as check """
Exemplo n.º 13
0
def test_delays_1():
    mc = MicroReader()
    assert len(mc.DELAYS) == 3
from microprediction import MicroReader
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt


def to_density(cdf):
    """  """
    # CDF is a fast, noisy O(1) approximation so this isn't the greatest
    dys = np.diff([0] + cdf['y'])
    dxs = np.diff([cdf['x'][0] - 1.0] + cdf['x'])
    dsty = [dy / dx for dx, dy in zip(dxs, dys)]
    return [d / sum(dsty) for d in dsty]


if __name__ == "__main__":
    mr = MicroReader()
    HOUR = mr.DELAYS[3]
    cdf = mr.get_cdf(name='altitude.json', delay=HOUR)
    # plt.plot(cdf['x'], cdf['y'])
    plt.plot(cdf['x'], to_density(cdf))
    plt.show()
    print(
        'https://www.microprediction.org/stream_dashboard.html?stream=altitude.json'
    )