示例#1
0
文件: cusum.py 项目: dingfengqian/cdg
def test1():
    
    x = np.random.uniform(100, 500, size=(10000, 3))
    x_test = np.random.uniform(100, 500, size=(200000, 3))

    
    cdt = GaussianCusum(arl=100, window_size=100)
    cdt.fit(x, estimate_threshold=True, len_simulation=1e3)

    pred, cum_sum = cdt.predict(x_test, reset=True)
    pred = np.array(pred).astype(int)

    y_true = np.zeros((1,20000))
    y_pred = pred
    # y_pred = pred.reshape(-1,1000).mean(-1).round().reshape(-1)

    import matplotlib.pyplot as plt
    plt.plot(cum_sum)
    plt.axhline(cdt.threshold)
    plt.show()
    from sklearn.metrics import confusion_matrix
    # # tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    # tpr = tp / (tp + fn)
    # fpr = fp / (fp + tn)
    return
示例#2
0
文件: cusum.py 项目: dingfengqian/cdg
 def precomp_threshold(dof, len_sim=1e5, beta=.75):
     """
     Generates once for all certain common thresholds.
     :param dof: list of degrees of freedom.
     :param len_sim: length of the simulated sequence (default is 1e5).
     :param beta: sensitivity parameter (default is .75)
     """
     for d in dof:
         cdt = GaussianCusum(arl=None, beta=beta)
         cdt.fit(x=np.zeros((1, d)), estimate_threshold=True, len_simulation=len_sim,
                 verbose=True, precompute_thresholds=True)
示例#3
0
文件: cusum.py 项目: dingfengqian/cdg
def test3():
    from cdg.graph import DelaunayGraphs, convert
    no_nodes = 5
    no_graphs = {0: 500, 8: 50}
    model = DelaunayGraphs()
    G = model.get(seed_points=no_nodes, classes=list(no_graphs.keys()),
                  no_graphs=no_graphs, sigma=.3, include_seed_graph=False)
    
    from cdg.graph.distance import GraphEditDistanceNX
    ged = GraphEditDistanceNX(node_cost='euclidean', n_jobs=2)
    Gnx = convert(G[0] + G[8], format_in='cdg', format_out='nx')
    G_train, G_test = Gnx[:50], Gnx[50:]
    
    from cdg.embedding import MultiDimensionalScaling
    mds = MultiDimensionalScaling(emb_dim=2, nprot=5)
    mds.fit(graphs=G_train, dist_fun=ged.get_measure_fun(verbose=True))
    x = mds.transform(G_test)
    
    from cdg.changedetection import GaussianCusum
    cdt = GaussianCusum(window_size=5, arl=20)
    cdt.fit(x[:100])
    y, g = cdt.predict(x, reset=False)
示例#4
0
def test_cusum_alarm_curve():
    from tqdm import tqdm
    import numpy as np
    # np.random.seed(20190225)

    d=3
    n=10000
    arl=30

    mu = np.random.randn(d)
    sigma = np.eye(d) + np.random.rand(d, d)
    # Sigma += Sigma.transpose()

    from cdg.changedetection import GaussianCusum
    cdt = GaussianCusum(arl=arl, window_size=10)
    for i in range(2):

        x_train = mu + np.dot(np.random.randn(n, d), sigma.transpose())
        y_train = x_train[:, 0] * 0

        x = mu + np.dot(np.random.randn(n, d), sigma.transpose())
        y_true = x[:, 0] * 0
    
        if i == 0:
            cdt.fit(x, estimate_threshold=True)
            th_true = cdt.threshold
            gamma_true = cdt.gamma
        else:
            # cdt.fit(x, estimate_threshold=True, threshold_type='data')
            cdt.fit(x, estimate_threshold=True, gamma_type='data', threshold_type='data')
            print('thresholds:\ttrue={}\test={}'.format(th_true, cdt.threshold))
            # cdt.fit(x, gamma_type='data')
            # print(f'gamma:\ttrue={gamma_true}\test={cdt.gamma}')
        # th_true = cdt.threshold
        #
        # _, _, th = cusum_alarm_curve(cusum=cdt, sequence=x_train, arl=arl, y_true=y_train, verbose=True)[0]
    # cdt._mu_0 = mu
    # cdt._s2_0inv = np.linalg.inv(np.dot(sigma, sigma.transpose()))
    y_predict, cumulative_sums = cdt.predict(x, reset=True, verbose=False)

    print(np.sum(y_predict)/n)
示例#5
0
文件: cusum.py 项目: dingfengqian/cdg
def test2():
    import numpy as np
    N = 400
    N_train = 100
    N_change = 320
    alpha = 0.01
    
    x = np.random.normal(size=(N, 1))
    x[N_change:] += 1.
    
    from cdg.changedetection import GaussianCusum
    cdt = GaussianCusum(arl=round(1. / alpha))
    cdt.fit(x[:N_train])
    y, g = cdt.predict(x, reset=False)

    cdt.reset()
    print(cdt.threshold)
    cdt.fit(x[:N_train])
    for t in range(N):
        alarm, _ = cdt.iterate(x[t:t + 1])
        if alarm:
            print("An alarm is raised at time {}".format(t))
            cdt.reset()
示例#6
0
文件: cusum.py 项目: dingfengqian/cdg
def demo():
    import matplotlib.pyplot as plt
    from scipy.stats import multivariate_normal
    import cdg.embedding
    
    np.random.seed(123)
    # setup
    sample_size = 5000
    arl = 100
    win_size = 10
    cusum = []
    data_train = []
    data_test = []

    # create two multivariate distributions
    rv1 = multivariate_normal(mean=[0., 0.], cov=[[1., 0.], [0., .2]])
    rv2 = multivariate_normal(mean=[0., 0.2], cov=[[1., 0.], [0., 1.]])
    training_stream = rv1.rvs(size=1000)
    x1 = rv1.rvs(size=int(sample_size / 5 * 4))
    x2 = rv2.rvs(size=int(sample_size / 5))
    test_stream = np.concatenate((x1, x2), axis=0)
    
    # univariate
    training_stream_uni = training_stream[:, :1]
    test_stream_uni = test_stream[:, :1]

    # # euclidean data
    # man_euc = cdg.embedding.ccm.EuclideanManifold()
    # tmp = np.random.rand(1, 3) * 5  # wlog generate a mean
    # true_mean = man_tmp.clip(X_mat=tmp, radius=man_tmp.radius)
    # stream_euc_tr = man_tmp.exp_map(x0_mat=true_mean, Nu_mat=training_stream)
    # stream_euc_te = man_tmp.exp_map(x0_mat=true_mean, Nu_mat=test_stream)
    
    # spherical data
    man_sph = cdg.geometry.SphericalManifold(man_dim=2, radius=3)
    tmp = np.random.rand(1, 3) * 5  # wlog generate a mean
    true_mean = man_sph.clip(X_mat=tmp, radius=man_sph.radius)
    stream_sph_tr = man_sph.exp_map(x0_mat=true_mean, Nu_mat=training_stream)
    stream_sph_te = man_sph.exp_map(x0_mat=true_mean, Nu_mat=test_stream)

    # hyperbolic data
    man_hyp = cdg.geometry.HyperbolicManifold(man_dim=2, radius=3)
    tmp = np.random.rand(1, 3) * 5  # wlog generate a mean
    true_mean = man_hyp.clip(X_mat=tmp, radius=man_hyp.radius)
    stream_hyp_tr = man_hyp.exp_map(x0_mat=true_mean, Nu_mat=training_stream)
    stream_hyp_te = man_hyp.exp_map(x0_mat=true_mean, Nu_mat=test_stream)

    # gaussian no window
    cusum.append(GaussianCusum(arl=arl))
    data_train.append(training_stream)
    data_test.append(test_stream)
    # gaussian windowed
    cusum.append(GaussianCusum(arl=arl, window_size=win_size))
    data_train.append(training_stream)
    data_test.append(test_stream)
    
    for i in range(2):
        cusum.append(None)
        data_train.append(None)
        data_test.append(None)

    # lower
    cusum.append(LowerCusum(arl=arl))
    data_train.append(training_stream_uni)
    data_test.append(test_stream_uni)
    # greater
    cusum.append(GreaterCusum(arl=arl))
    data_train.append(training_stream_uni)
    data_test.append(test_stream_uni)
    # two-sided
    cusum.append(TwoSidedCusum(arl=arl))
    data_train.append(training_stream_uni)
    data_test.append(test_stream_uni)
    # bonferroni on different cusum
    bonf_cusum = BonferroniCusum(arl=arl, cusum_list=[LowerCusum(arl=arl),
                                                      TwoSidedCusum(arl=arl)]) #,
                                                      # GreaterCusum(arl=arl)])
    cusum.append(bonf_cusum)
    data_train.append(training_stream_uni)
    data_test.append(test_stream_uni)

    # euclidean windowed
    # cusum_euc = ManifoldCLTCusum(arl=arl, manifold=man_euc, window_size=win_size)
    cusum_euc = GaussianCusum(arl=arl, window_size=win_size)
    cusum.append(cusum_euc)
    data_train.append(training_stream)
    data_test.append(test_stream)
    # spherica windowed
    cusum_sph = ManifoldCLTCusum(arl=arl, manifold=man_sph, window_size=win_size)
    cusum.append(cusum_sph)
    data_train.append(stream_sph_tr)
    data_test.append(stream_sph_te)
    # hyperbolic windowed
    cusum_hyp = ManifoldCLTCusum(arl=arl, manifold=man_hyp, window_size=win_size)
    cusum.append(cusum_hyp)
    data_train.append(stream_hyp_tr)
    data_test.append(stream_hyp_te)
    # bonferroni on different cusum
    bonf_cusum = BonferroniCusum(arl=arl, cusum_list=[cusum_euc, cusum_sph, cusum_hyp])
    cusum.append(bonf_cusum)
    data_train.append([training_stream, stream_sph_tr, stream_hyp_tr])
    data_test.append([test_stream, stream_sph_te, stream_hyp_te])

   

    fig1 = plt.figure()
    for ci in range(len(cusum)):
        if not cusum[ci] is None:
            cusum[ci].fit(data_train[ci], estimate_threshold=True, len_simulation=1000)
            y_pred, gg = cusum[ci].predict(data_test[ci], reset=False)
            gg = np.mean(gg, axis=1) # only necessary for bonferroni
            sp = fig1.add_subplot(3,4, 1 + ci)
            sp.plot(y_pred*max(gg), '+k')
            sp.plot(gg, label='g')
            sp.plot([cusum[ci].threshold] * len(gg), label='h')
            sp.grid(True)
            sp.set_title(str(type(cusum[ci]))[-20:])

    plt.show()
示例#7
0
def _d_cdt(_path, _c):
    _id = _path.split('/')[-2]
    tpr_avg = []
    fpr_avg = []
    auc_avg = []
    run = 0
    skipped = 0
    crashed = False
    while run < P['N_RUNS'] and (skipped < 100 or skipped /
                                 (run + skipped) < 0.9):
        # Read data
        data = dataset_load(_path)
        try:
            nominal, live, labels = data
        except:
            live, labels = data
            nominal = live[labels == 0].copy()
        live = live[(labels == 0) | (labels == _c)]
        labels = labels[(labels == 0) | (labels == _c)]
        labels[labels != 0] = 1
        CUSUM_WINDOW_SIZE = int(nominal.shape[0] * P['CUSUM_WINDOW_RATIO'])
        cut = CUSUM_WINDOW_SIZE * (nominal.shape[0] // CUSUM_WINDOW_SIZE)
        nominal = nominal[:cut]
        cut = CUSUM_WINDOW_SIZE * (labels.shape[0] // CUSUM_WINDOW_SIZE)
        live = live[:cut]
        labels = labels[:cut]
        live_n = live[labels == 0].copy()
        live_nn = live[labels == 1].copy()
        live = np.vstack((live_n, live_nn))

        # Compute distances
        distances_nom = []
        distances_test = []
        try:
            for i_, r_ in enumerate(P['radius']):
                start = i_ * P['latent_space']
                stop = start + P['latent_space']
                if r_ > 0.:
                    # Spherical
                    s_mean = SphericalManifold.sample_mean(nominal[:,
                                                                   start:stop],
                                                           radius=r_)
                    d_nom = SphericalManifold.distance(nominal[:, start:stop],
                                                       s_mean,
                                                       radius=r_)
                    d_test = SphericalManifold.distance(live[:, start:stop],
                                                        s_mean,
                                                        radius=r_)
                elif r_ < 0.:
                    # Hyperbolic
                    s_mean = HyperbolicManifold.sample_mean(
                        nominal[:, start:stop], radius=-r_)
                    d_nom = HyperbolicManifold.distance(nominal[:, start:stop],
                                                        s_mean,
                                                        radius=-r_)
                    d_test = HyperbolicManifold.distance(live[:, start:stop],
                                                         s_mean,
                                                         radius=-r_)
                else:
                    # Euclidean
                    s_mean = np.mean(nominal[:, start:stop], 0)
                    d_nom = np.linalg.norm(nominal[:, start:stop] - s_mean,
                                           axis=-1)[..., None]
                    d_test = np.linalg.norm(live[:, start:stop] - s_mean,
                                            axis=-1)[..., None]
                distances_nom.append(d_nom)
                distances_test.append(d_test)
        except FloatingPointError:
            print('D-CDT: FloatingPointError')
            skipped += 1
            continue

        # Combined
        distances_nom = np.concatenate(distances_nom, -1)
        distances_test = np.concatenate(distances_test, -1)

        # Change detection
        cdt = GaussianCusum(arl=P['CUSUM_ARL'], window_size=CUSUM_WINDOW_SIZE)
        cdt.fit(distances_nom,
                estimate_threshold=True,
                len_simulation=P['CUSUM_SIM_LEN'])

        pred, cum_sum = cdt.predict(distances_test, reset=True)
        pred = np.array(pred).astype(int)

        y_true = labels.reshape(-1,
                                CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1)
        y_pred = pred.reshape(-1,
                              CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1)

        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        tpr = tp / (tp + fn)
        fpr = fp / (fp + tn)
        auc, _ = detection_score(y_pred, y_true)

        if auc > 0.:
            tpr_avg.append(tpr)
            fpr_avg.append(fpr)
            auc_avg.append(auc)
            run += 1
        else:
            print('No true positive predictions')
            skipped += 1

    if len(auc_avg) == 0 or np.isnan(np.mean(auc_avg)):
        crashed = True

    result_str = 'crashed' if crashed else 'TPR: {:.5f} FPR: {:.5f} - AUC: {:.3f}'.format(
        np.mean(tpr_avg), np.mean(fpr_avg), np.mean(auc_avg))
    print('Done: {} {} - {}'.format(_id, _c, result_str))

    if not crashed:
        return (_id, _c, np.mean(tpr_avg), np.std(tpr_avg), np.mean(fpr_avg),
                np.std(fpr_avg), np.mean(auc_avg), np.std(auc_avg))
    else:
        return _id, _c, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
示例#8
0
def _r_cdt(_path, _c):
    _id = _path.split('/')[-2]
    tpr_avg = []
    fpr_avg = []
    auc_avg = []
    run = 0
    skipped = 0
    crashed = False
    while run < P['N_RUNS'] and (skipped < 100 or skipped /
                                 (run + skipped) < 0.9):
        # Read data
        data = dataset_load(_path)
        try:
            nominal, live, labels = data
        except:
            live, labels = data
            nominal = live[labels == 0].copy()
        live = live[(labels == 0) | (labels == _c)]
        labels = labels[(labels == 0) | (labels == _c)]
        labels[labels != 0] = 1
        CUSUM_WINDOW_SIZE = int(nominal.shape[0] * P['CUSUM_WINDOW_RATIO'])
        cut = CUSUM_WINDOW_SIZE * (nominal.shape[0] // CUSUM_WINDOW_SIZE)
        nominal = nominal[:cut]
        cut = CUSUM_WINDOW_SIZE * (labels.shape[0] // CUSUM_WINDOW_SIZE)
        live = live[:cut]
        labels = labels[:cut]
        live_n = live[labels == 0].copy()
        live_nn = live[labels == 1].copy()
        live = np.vstack((live_n, live_nn))

        # Change detection
        cusum_list = []
        indices = []
        for i_, r_ in enumerate(P['radius']):
            start = i_ * P['latent_space']
            stop = start + P['latent_space']
            indices.append((start, stop))
            if r_ < 0.:
                # Hyperbolic
                man_tmp = HyperbolicManifold(radius=-r_)
                cusum_list.append(
                    ManifoldCLTCusum(arl=P['CUSUM_ARL'],
                                     manifold=man_tmp,
                                     window_size=CUSUM_WINDOW_SIZE))
            elif r_ > 0.:
                # Spherical
                man_tmp = SphericalManifold(radius=r_)
                cusum_list.append(
                    ManifoldCLTCusum(arl=P['CUSUM_ARL'],
                                     manifold=man_tmp,
                                     window_size=CUSUM_WINDOW_SIZE))
            else:
                # Euclidean
                cusum_list.append(
                    GaussianCusum(arl=P['CUSUM_ARL'],
                                  window_size=CUSUM_WINDOW_SIZE))

        # Bonferroni on different
        cdt = BonferroniCusum(cusum_list=cusum_list,
                              arl=P['CUSUM_ARL'] // len(P['radius']))
        try:
            cdt.fit([nominal[..., start:stop] for start, stop in indices],
                    estimate_threshold=True,
                    len_simulation=P['CUSUM_SIM_LEN'],
                    radia=P['radius'])
        except FloatingPointError:
            print('R-CDT: FloatingPointError')
            skipped += 1
            continue

        pred, cum_sum = cdt.predict(
            [live[..., start:stop] for start, stop in indices], reset=True)
        pred = np.array(pred).astype(int)

        y_true = labels.reshape(-1,
                                CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1)
        y_pred = pred.reshape(-1,
                              CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1)

        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        tpr = tp / (tp + fn)
        fpr = fp / (fp + tn)
        auc, _ = detection_score(y_pred, y_true)

        if auc > 0.:
            tpr_avg.append(tpr)
            fpr_avg.append(fpr)
            auc_avg.append(auc)
            run += 1
        else:
            print('No true positive predictions')
            skipped += 1

    if len(auc_avg) == 0 or np.isnan(np.mean(auc_avg)):
        crashed = True

    result_str = 'crashed' if crashed else 'TPR: {:.5f} FPR: {:.5f} - AUC: {:.3f}'.format(
        np.mean(tpr_avg), np.mean(fpr_avg), np.mean(auc_avg))
    print('Done: {} {} - {}'.format(_id, _c, result_str))

    if not crashed:
        return (_id, _c, np.mean(tpr_avg), np.std(tpr_avg), np.mean(fpr_avg),
                np.std(fpr_avg), np.mean(auc_avg), np.std(auc_avg))
    else:
        return _id, _c, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan