def initial_state(self, name, lagged_values, lagged_times, machine_params=None, machine_state=None, **ignore): # machine = self.machine_type(params=deepcopy(machine_params), state=deepcopy(machine_state), hyper_params=deepcopy( self.machine_hyper_params)) chronological_values = list(reversed(lagged_values)) chronological_times = list(reversed(lagged_times)) as_process = is_process(chronological_values) values = list(np.diff( [0.] + chronological_values)) if as_process else chronological_values dts = list( np.diff([chronological_times[0] - 1.0] + chronological_times)) for value, dt in zip(values, dts): machine.update(value=value, dt=dt) return { 't': lagged_times[0], 'machine': machine, 'as_process': True, 'dt': approx_dt(lagged_times), 'name': name }
def fox_sample(lagged_values, lagged_times, delay, num, name, as_process=None): " Elementary but not completely woeful sampler, used by Malaxable Fox" dt = approx_dt(lagged_times) lag = max(10, math.ceil(delay / dt)) print('lag = ' + str(lag)) is_proc = as_process or ('~' not in name and StatsConventions.is_process(lagged_values)) if len(lagged_values) < 250 + lag or not is_proc: values = exponential_bootstrap(lagged=lagged_values, decay=0.1, num=num, as_process=as_process) ret_values = StatsConventions.nudged(project_on_lagged_lattice(values=values, lagged_values=lagged_values)) else: changes = np.diff(list(reversed(lagged_values)), n=lag) counter = dict(Counter(changes)) d = dict(counter) num_total = len(changes) d1 = dict([(change, round(175 * change_count / num_total)) for change, change_count in d.items()]) values = list() for change, rounded_count in d1.items(): values.extend([change] * rounded_count) change_spray = list(range(-50, 50)) values.extend(change_spray) change_values = values[:num] abs_values = [lagged_values[0] + chg for chg in change_values] if not len(abs_values) == num: # Too many rounded down ... may not be discrete abs_values = exponential_bootstrap(lagged=lagged_values, decay=0.1, num=num, as_process=True) ret_values = StatsConventions.nudged(project_on_lagged_lattice(values=abs_values, lagged_values=lagged_values)) return ret_values
def update_state(self, state, lagged_values=None, lagged_times=None, **ignore): """ Use recently added values to update the digest """ name = state['name'] times = lagged_times or self.get_lagged_times(name=name) values = lagged_values or self.get_lagged_values(name=name) state['dt'] = approx_dt(times) new_values = [ v for t, v in zip(times, values) if t > state['t'] - 0.0001 ] # Include one previous value in new_values, so we can difference new_data = np.diff( list(new_values)) if state['as_process'] else new_values[1:] for data in new_data: state['digest'].update(data) return state
def initial_state(self, name, **ignore): """ Decide if it is a process or not, and create initial sketch of CDF of values or changes in values """ # This is one off. Restarting may change the classification ! values = self.get_lagged_values(name=name) times = self.get_lagged_times(name=name) digest = TDigest() as_process = is_process(values) data = np.diff(list(values) + [0., 0.]) if is_process(values) else values for value in data: digest.update(value) return { 't': times[0], 'digest': digest, 'as_process': as_process, 'dt': approx_dt(times), 'name': name }
def update_state(self, state, lagged_values=None, lagged_times=None, **ignore): """ Use recently added values to update the machine """ machine = state['machine'] chronological_values = list(reversed(lagged_values)) chronological_times = list(reversed(lagged_times)) state['dt'] = approx_dt(chronological_times) new_data = [ (t, v) for t, v in zip(chronological_times, chronological_values) if t > state['t'] - 0.0001 ] # Include one previous value in new_values, so we can difference new_chronological_values = list(np.diff([ d[0] for d in new_data ])) if state['as_process'] else [d[0] for d in new_data[1:]] new_chronological_dt = list(np.diff([d[0] for d in new_data])) for value, dt in zip(new_chronological_values, new_chronological_dt): machine.update(value=value, dt=dt) state['machine'] = machine return state
def sample(self, lagged_values, lagged_times=None, name=None, delay=None, **ignored): """ Use skater to move and scale """ if name not in self.stream_state: self.stream_state[name] = { 'skater_state': {}, 'x': None, 'x_std': None, 'dt': None, 't': None, 'lookup': None } # Map from delay state = self.stream_state[name] if state['dt'] is None: # Initialize lookups from delay to steps ahead state['dt'] = approx_dt(lagged_times) state['lookup'] = dict([ (dly, split_k(max(1, 0.1 + dly / (0.01 + state['dt'])) - 1)) for dly in self.DELAYS ]) state['k'] = int(math.ceil( (self.DELAYS[-1] + 1.0) / state['dt'])) # max k # Determine which observations are yet to be processed by the skater if state['t'] is None: ys = reversed(lagged_values[:self.n_warm]) ts = reversed(lagged_times[:self.n_warm]) else: all_t = reversed(lagged_times) all_y = reversed(lagged_values) yt = [(y_, t_) for y_, t_ in zip(all_y, all_t) if t_ > state['t'] + 1e-6] ys = [yt_[0] for yt_ in yt] ts = [yt_[1] for yt_ in yt] # Run the skater for y_, t_ in zip(ys, ts): state['x'], state['x_std'], state['skater_state'] = self.f( y=y_, s=state['skater_state'], k=state['k'], a=None, t=t_, e=None) state['t'] = t_ # Interpolate point estimate and std errors (low_k, low_k_weight), (high_k, high_k_weight) = state['lookup'][delay] x_interp = low_k_weight * state['x'][low_k] + high_k_weight * state[ 'x'][high_k] x_std_interp = low_k_weight * state['x_std'][ low_k] + high_k_weight * state['x_std'][high_k] # Save stream state for next invocation self.stream_state[name] = state # Create a hacky estimate of standard error, if necessary if not self.use_std: x_std_interp = k_std(lagged_values, k=high_k) return self.sample_using_point_estimate(x=x_interp, x_std=x_std_interp, k=high_k, name=name, delay=delay, lagged_values=lagged_values, lagged_times=lagged_times)