示例#1
0
    def _volume_shift_detection(self,
                                mean_list=None,
                                sd_list=None,
                                probability_threshold=0.5):
        """
        This function detects any significant shift in the training data volume using a Bayesian change point detection
        technique.

        :param list mean_list: The list of means from each training sub-window.
        :param list sd_list: The list of standard deviations from each training sub-window.
        :param float probability_threshold: Threshold for the probability value to be flagged as a change point.
        :return: Indices with significant vdata volume shift.
        :rtype: int
        """
        import numpy as np
        from bayesian_changepoint_detection import offline_changepoint_detection as offcd
        from functools import partial

        # Volume shift detection over the means of the training window
        q, p, pcp = offcd.offline_changepoint_detection(
            data=np.array(mean_list),
            prior_func=partial(offcd.const_prior, l=(len(mean_list) + 1)),
            observation_log_likelihood_function=offcd.
            gaussian_obs_log_likelihood,
            truncate=-10)

        mask_mean = np.append(0, np.exp(pcp).sum(0)) > probability_threshold

        # Volume shift detection over the standard deviations of the training window
        change_points = np.array(mask_mean).nonzero()
        last_mean_cp = change_points[0][-1] if len(
            change_points[0]) > 0 else []

        q, p, pcp = offcd.offline_changepoint_detection(
            data=np.array(sd_list),
            prior_func=partial(offcd.const_prior, l=(len(sd_list) + 1)),
            observation_log_likelihood_function=offcd.
            gaussian_obs_log_likelihood,
            truncate=-10)

        mask_sd = np.append(0, np.exp(pcp).sum(0)) > probability_threshold

        change_points = np.array(mask_sd).nonzero()
        last_sd_cp = change_points[0][-1] if len(change_points[0]) > 0 else []

        # Change point is the maximum obtained from mean list and the standard deviation list
        cdate = max(last_mean_cp, last_sd_cp)

        return cdate
def cp_func1(d):
    signal = (d[['smooth_speed']]).values

    #print(signal)
    Q, P, Pcp = offcd.offline_changepoint_detection(
        signal,
        partial(offcd.const_prior, l=(len(signal) + 1)),
        offcd.gaussian_obs_log_likelihood,
        truncate=-50)
    j2 = (np.exp(Pcp).sum(0))
    j2 = np.insert(j2, 0, 0)
    series = pd.DataFrame({
        'Speed': signal.flatten(),
        'Prob': j2,
        'time': d.time,
        'segment': d.segment,
        'date': d.date,
        'raw_speed': d.speed,
        'Traveltime': d.traveltime,
        'score': d.score_30,
        'ref_speed': d.ref_speed
    })

    #series.to_csv('C:/Users/atousaz/Desktop/loop/CP/'+str(segment)+'/'+str(date1)+'.csv')

    #complete = time.time()
    #print(c, 'done in ', (complete-now), ' s')
    return series
示例#3
0
def find_changes(series, truncate=-np.inf):

    data = series.to_frame()
    Q, P, Pcp = offcd.offline_changepoint_detection(
        data,
        partial(offcd.const_prior, l=(len(data) + 1)),
        offcd.gaussian_obs_log_likelihood,
        truncate=truncate)
    data['Change'] = np.append([0], np.exp(Pcp).sum(0))

    return data.Change
示例#4
0
    def get_cp_prob(self, ts):
        l = np.asarray(ts.y)
        prior = self.get_prior(self.prior, self.p, self.k, len(l) + 1)

        # ALERT: the truncate parameter can affect the performance
        q, p, pcp = offcd.offline_changepoint_detection(
            l, prior, offcd.gaussian_obs_log_likelihood, truncate=-90)
        cp_prob = np.exp(pcp).sum(0)

        ts_cp_prob = time_series.dist_ts(ts)
        for i in xrange(len(ts.x) - 1):
            ts_cp_prob.x.append(ts.x[i])
            ts_cp_prob.y.append(cp_prob[i])
        return ts_cp_prob
def plot_changepoint(data, col, interval=10, title='', output=''):
    """
    data: data frame
    col: column in the data frame
    interval: interval of years to show in the x-axis
    ci = if True, plot confidence intervals
    """

    print('Computing change point for', title)

    # Changepoint detection
    Q, P, Pcp = offcd.offline_changepoint_detection(
        data[col],
        partial(offcd.const_prior, l=(len(data[col]) + 1)),
        offcd.gaussian_obs_log_likelihood,
        truncate=-40)

    print('Plotting...')

    # Getting info for the x-axis
    indexes = data.index[data.index % interval == 0].tolist()
    labels = list(map(str, indexes))
    time_ticks = np.where(data.index.isin(indexes))[0].tolist()

    # Plotting
    fig, (ax1, ax2) = plt.subplots(2, 1)

    line = savgol_filter(data[col], 33, 3)

    ax1.plot(range(len(data.index)), line)
    ax1.plot(range(len(data.index)), data[col], '.', ms=2)
    ax1.set_xticks(time_ticks)
    ax1.set_xticklabels(labels)
    ax1.set_ylabel(title)

    ax2.plot(np.exp(Pcp).sum(0))
    ax2.set_ylim([0, 1])
    ax2.set_xticks(time_ticks)
    ax2.set_xticklabels(labels)
    ax2.set_ylabel('Probability')

    if output:
        filename = output
    else:
        min_sample_size = min(data.corpus_N)
        filename = title.lower() + str(min_sample_size) + '.png'
    plt.savefig(filename)
示例#6
0
    def identify_change_points(self):
        #print('estimating change points')

        if 'cp_params' in self.extra_inputs.keys():
            cp_params = self.extra_inputs['cp_params']
            method = cp_params['method']
        else:  # defaults
            method == 'Online'

        if method == 'Online':  # online method
            R, maxes = oncd.online_changepoint_detection(
                self.feature_by_time_matrix_reduced,
                partial(oncd.constant_hazard, 250),
                oncd.MV_Norm(
                    mu=np.zeros(self.feature_by_time_matrix_reduced.shape[1]),
                    Sigma=5.0 * np.diag(
                        np.ones(self.feature_by_time_matrix_reduced.shape[1])),
                    n=np.array([1.0])))

            diff_in_max = np.abs(np.diff(np.argmax(
                R,
                axis=0)))  # looks for differences in most likely run lengths
            expected_run_len = np.dot(R.T, np.arange(len(R)))
            self.R = R
            self.diff_in_max = diff_in_max
            self.expected_run_len = expected_run_len
            # calculate change points
            self.change_points = np.zeros(
                self.feature_by_time_matrix_reduced.shape[0])
            self.change_points[diff_in_max > 5] = 1.0
        elif method == 'Offline':
            Q, P, Pcp = offcd.offline_changepoint_detection(
                self.feature_by_time_matrix_reduced,
                partial(offcd.const_prior,
                        l=(self.feature_by_time_matrix_reduced.shape[0] + 1)),
                offcd.fullcov_obs_log_likelihood,
                truncate=-20)
            self.cp_prob = np.exp(Pcp).sum(0)
            self.change_points = self.cp_prob > 0.7
def update_p_cp(world, use_ros):
    P_cp = []
    pid = multiprocessing.current_process().pid
    for j, joint in enumerate(world.joints):
        if use_ros:
            q = Record.records[pid]["q_" + str(j)].as_matrix()
            af = Record.records[pid]["applied_force_" + str(j)][0:].as_matrix()
            v = q[1:] - q[0:-1]  # we can't measure the velocity directly

            vn = v[:] + af[1:]
            d = np.zeros((v.shape[0] + 1,))
            d[1:] = abs((vn**2 - v[:]**2)/(0.1 * vn))
        else:
            v = Record.records[pid]["v_" + str(j)][0:].as_matrix()
            af = Record.records[pid]["applied_force_" + str(j)][0:].as_matrix()

            vn = v[:-1] + af[:-1]
            d = np.zeros(v.shape)
            d[1:] = abs((vn**2 - v[1:]**2)/(0.1 * vn))

        nans, x = nan_helper(d)
        d[nans] = np.interp(x(nans), x(~nans), d[~nans])

        Q, P, Pcp = bcd.offline_changepoint_detection(
            data=d,
            prior_func=partial(bcd.const_prior, l=(len(d)+1)),
            observation_log_likelihood_function=
            bcd.gaussian_obs_log_likelihood,
            truncate=-50)

        p_cp, count = get_probability_over_degree(
            np.exp(Pcp).sum(0)[:1],
            Record.records[pid]['q_' + str(j)][-1:].as_matrix())

        P_cp.append(p_cp)
    return P_cp
示例#8
0
def update_p_cp(world, use_ros):
    P_cp = []
    pid = multiprocessing.current_process().pid
    for j, joint in enumerate(world.joints):
        if use_ros:
            q = Record.records[pid]["q_" + str(j)].as_matrix()
            af = Record.records[pid]["applied_force_" + str(j)][0:].as_matrix()
            v = q[1:] - q[0:-1]  # we can't measure the velocity directly

            vn = v[:] + af[1:]
            d = np.zeros((v.shape[0] + 1,))
            d[1:] = abs((vn**2 - v[:]**2)/(0.1 * vn))
        else:
            v = Record.records[pid]["v_" + str(j)][0:].as_matrix()
            af = Record.records[pid]["applied_force_" + str(j)][0:].as_matrix()

            vn = v[:-1] + af[:-1]
            d = np.zeros(v.shape)
            d[1:] = abs((vn**2 - v[1:]**2)/(0.1 * vn))

        nans, x = nan_helper(d)
        d[nans] = np.interp(x(nans), x(~nans), d[~nans])

        Q, P, Pcp = bcd.offline_changepoint_detection(
            data=d,
            prior_func=partial(bcd.const_prior, l=(len(d)+1)),
            observation_log_likelihood_function=
            bcd.gaussian_obs_log_likelihood,
            truncate=-50)

        p_cp, count = get_probability_over_degree(
            np.exp(Pcp).sum(0)[:1],
            Record.records[pid]['q_' + str(j)][-1:].as_matrix())

        P_cp.append(p_cp)
    return P_cp
示例#9
0
import numpy as np
import matplotlib.pyplot as plt

import bayesian_changepoint_detection.offline_changepoint_detection as offcd
import bayesian_changepoint_detection.generate_data as gd
from functools import partial

if __name__ == '__main__':
    show_plot = True

    partition, data = gd.generate_xuan_motivating_example(50, 200)
    changes = np.cumsum(partition)

    Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(
        data,
        partial(offcd.const_prior, l=(len(data) + 1)),
        offcd.ifm_obs_log_likelihood,
        truncate=-20)
    Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(
        data,
        partial(offcd.const_prior, l=(len(data) + 1)),
        offcd.fullcov_obs_log_likelihood,
        truncate=-20)

    if show_plot:
        fig, ax = plt.subplots(figsize=[18, 16])
        ax = fig.add_subplot(3, 1, 1)
        for p in changes:
            ax.plot([p, p], [np.min(data), np.max(data)], 'r')
        for d in range(2):
            ax.plot(data[:, d])
示例#10
0
    ope = []
    clo = []
    for p in range(nrows):
        temp = table.row_values(p, start_colx=1)
        ope.append(float(temp[0]))
        clo.append(float(temp[3]))
        for q in range(len(temp)):
            temp[q] = (float(temp[q]) - minn[q]) / (maxn[q] - minn[q])
        x.append(temp)
    arr = np.array(x)
    #print arr

    Q, P, Pcp = offcd.offline_changepoint_detection(
        arr,
        partial(offcd.const_prior, l=(len(arr) + 1)),
        offcd.gaussian_obs_log_likelihood,
        truncate=-20)
    #Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.ifm_obs_log_likelihood,truncate=-20)

    #Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.fullcov_obs_log_likelihood, truncate=-20)

    if show_plot:
        fig, ax = plt.subplots(figsize=[18, 16])
        #ax = fig.add_subplot(2, 1, 1)
        #  for p in changes:
        #    ax.plot([p,p],[np.min(data),np.max(data)],'r')
        for d in range(dim):
            ax.plot(arr[:, d])
            #print arr[:,d]
            #print d
''' Example from Xiang Xuan's thesis: Section 3.2'''
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt

import bayesian_changepoint_detection.offline_changepoint_detection as offcd
import bayesian_changepoint_detection.generate_data as gd
from functools import partial

if __name__ == '__main__':
  show_plot = True

  partition, data = gd.generate_xuan_motivating_example(50,200)
  changes = np.cumsum(partition)

  Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(data,partial(offcd.const_prior, l=(len(data)+1)),offcd.ifm_obs_log_likelihood,truncate=-20)
  Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(data,partial(offcd.const_prior, l=(len(data)+1)),offcd.fullcov_obs_log_likelihood, truncate=-20)

  if show_plot:
    fig, ax = plt.subplots(figsize=[18, 16])
    ax = fig.add_subplot(3, 1, 1)
    for p in changes:
      ax.plot([p,p],[np.min(data),np.max(data)],'r')
    for d in range(2):
      ax.plot(data[:,d])
    plt.legend(['Raw data with Original Changepoints'])
    ax1 = fig.add_subplot(3, 1, 2, sharex=ax)
    ax1.plot(np.exp(Pcp_ifm).sum(0))
    plt.legend(['Independent Factor Model'])
    ax2 = fig.add_subplot(3, 1, 3, sharex=ax)
    ax2.plot(np.exp(Pcp_full).sum(0))
示例#12
0
  ope = []
  clo = []
  for p in range(nrows):
    temp = table.row_values(p, start_colx=1)
    ope.append(float(temp[0]))
    clo.append(float(temp[3]))
    for q in range(len(temp)):
      temp[q] = (float(temp[q])-minn[q])/(maxn[q]-minn[q])
    x.append(temp)
  arr = np.array(x)
  #print arr 



  Q, P, Pcp = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.gaussian_obs_log_likelihood, truncate=-20)
  #Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.ifm_obs_log_likelihood,truncate=-20)

  #Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.fullcov_obs_log_likelihood, truncate=-20)


  if show_plot:
    fig, ax = plt.subplots(figsize=[18, 16])
    #ax = fig.add_subplot(2, 1, 1)
  #  for p in changes:
  #    ax.plot([p,p],[np.min(data),np.max(data)],'r')
    for d in range(dim):
      ax.plot(arr[:,d])
      #print arr[:,d]
      #print d
    #ax = fig.add_subplot(2, 1, 2, sharex=ax)
示例#13
0
    index = table1.cell_value(i,4)
    temp = float(index)
    if i !=0:
        index = float(float(index)-pre)/pre*100
    else:
        index = 0
    pre = temp
    re = table2.cell_value(i,2)
    re = float(re)
    y = []
    y.append(index)
    y.append(re)
    x.append(y)
  arr2 = np.array(x)

  Q_1, P_1, Pcp_1 = offcd.offline_changepoint_detection(arr1,partial(offcd.const_prior, l=(len(arr1)+1)),offcd.gaussian_obs_log_likelihood, truncate=-20)
  Q_2, P_2, Pcp_2 = offcd.offline_changepoint_detection(arr2,partial(offcd.const_prior, l=(len(arr2)+1)),offcd.gaussian_obs_log_likelihood, truncate=-20)
  #Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.ifm_obs_log_likelihood,truncate=-20)

  #Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.fullcov_obs_log_likelihood, truncate=-20)

  #if show_plot:
  #  fig, ax = plt.subplots(figsize=[18, 16])
  #  ax = fig.add_subplot(2, 1, 1)
  #  for p in changes:
  #    ax.plot([p,p],[np.min(data),np.max(data)],'r')
    #for d in range(dim):
    #  ax.plot(arr[:,d])
      #print arr[:,d]
      #print d
    #ax = fig.add_subplot(2, 1, 2, sharex=ax)
示例#14
0
        if i != 0:
            index = float(float(index) - pre) / pre * 100
        else:
            index = 0
        pre = temp
        re = table2.cell_value(i, 2)
        re = float(re)
        y = []
        y.append(index)
        y.append(re)
        x.append(y)
    arr2 = np.array(x)

    Q_1, P_1, Pcp_1 = offcd.offline_changepoint_detection(
        arr1,
        partial(offcd.const_prior, l=(len(arr1) + 1)),
        offcd.gaussian_obs_log_likelihood,
        truncate=-20)
    Q_2, P_2, Pcp_2 = offcd.offline_changepoint_detection(
        arr2,
        partial(offcd.const_prior, l=(len(arr2) + 1)),
        offcd.gaussian_obs_log_likelihood,
        truncate=-20)
    #Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.ifm_obs_log_likelihood,truncate=-20)

    #Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(arr,partial(offcd.const_prior, l=(len(arr)+1)),offcd.fullcov_obs_log_likelihood, truncate=-20)

    #if show_plot:
    #  fig, ax = plt.subplots(figsize=[18, 16])
    #  ax = fig.add_subplot(2, 1, 1)
    #  for p in changes: