Exemplo n.º 1
0
def test_gradients():
    K = 1
    B = 3
    T = 100
    dt = 1.0
    true_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, B=B, dt=dt)
    S, R = true_model.generate(T=T)

    # Test with a standard Hawkes model
    test_model = DiscreteTimeStandardHawkesModel(K=K, B=B, dt=dt)
    test_model.add_data(S)

    # Check gradients with the initial parameters
    def objective(x):
        test_model.weights[0, :] = np.exp(x)
        return test_model.log_likelihood()

    def gradient(x):
        test_model.weights[0, :] = np.exp(x)
        return test_model.compute_gradient(0)

    print "Checking initial gradient: "
    print gradient(np.log(test_model.weights[0, :]))
    check_grad(objective, gradient, np.log(test_model.weights[0, :]))

    print "Checking gradient at true model parameters: "
    test_model.initialize_with_gibbs_model(true_model)

    print gradient(np.log(test_model.weights[0, :]))
    check_grad(objective, gradient, np.log(test_model.weights[0, :]))
Exemplo n.º 2
0
def test_gradients():
    K = 1
    B = 3
    T = 100
    dt = 1.0
    true_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, B=B, dt=dt)
    S,R = true_model.generate(T=T)

    # Test with a standard Hawkes model
    test_model = DiscreteTimeStandardHawkesModel(K=K, B=B, dt=dt)
    test_model.add_data(S)

    # Check gradients with the initial parameters
    def objective(x):
        test_model.weights[0,:] = np.exp(x)
        return test_model.log_likelihood()

    def gradient(x):
        test_model.weights[0,:] = np.exp(x)
        return test_model.compute_gradient(0)

    print("Checking initial gradient: ")
    print(gradient(np.log(test_model.weights[0,:])))
    check_grad(objective, gradient,
               np.log(test_model.weights[0,:]))

    print("Checking gradient at true model parameters: ")
    test_model.initialize_with_gibbs_model(true_model)

    print(gradient(np.log(test_model.weights[0,:])))
    check_grad(objective, gradient,
               np.log(test_model.weights[0,:]))
Exemplo n.º 3
0
def fit_network_hawkes_svi(S, K, C, B, dt, dt_max,
                           output_path,
                           standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="svi2")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    else:
        print "Fitting the data with a network Hawkes model using SVI"

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                                network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 500
        import pdb; pdb.set_trace()
        test_model.add_data(S)


        # Stochastic variational inference
        N_iters = 10000
        samples = []
        delay = 1.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        start = time.clock()
        timestamps = []
        for itr in xrange(N_iters):
            print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
            test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump((samples[-1], timestamps[-1] -start), f, protocol=-1)

        # Save the Gibbs samples
        timestamps = np.array(timestamps)
        with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
            print "Saving SVI samples to ", (output_path + ".svi.pkl.gz")
            cPickle.dump((samples, timestamps - start), f, protocol=-1)

    return samples, timestamps
Exemplo n.º 4
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic", "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T      = S.shape[0]
    K      = true_model.K
    B      = true_model.B
    dt     = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len   = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B,
                                                     alpha=1.0, beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['v'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                            basis_hypers=true_model.basis_hypers,
                                                            bkgd_hypers=true_model.bkgd_hypers,
                                                            impulse_hypers=true_model.impulse_hypers,
                                                            weight_hypers=true_model.weight_hypers,
                                                            network_hypers=network_hypers)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)


    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    # plls = []
    for itr in xrange(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print ""
        print "Gibbs iteration ", itr
        print "LP: ", lps[-1]

        test_model.resample_model()

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    A_samples       = np.array([s.weight_model.A     for s in samples])
    W_samples       = np.array([s.weight_model.W     for s in samples])
    g_samples       = np.array([s.impulse_model.g    for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps             = np.array(lps)

    offset = N_samples // 2
    A_mean       = A_samples[offset:, ...].mean(axis=0)
    W_mean       = W_samples[offset:, ...].mean(axis=0)
    g_mean       = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    plt.figure()
    plt.plot(np.arange(N_samples), lps, 'k')
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")
    plt.show()

    # Compute the link prediction accuracy curves
    auc_init = roc_auc_score(true_model.weight_model.A.ravel(),
                             init_model.W.ravel())
    auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               A_mean.ravel())
    auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               W_mean.ravel())

    aucs = []
    for A in A_samples:
        aucs.append(roc_auc_score(true_model.weight_model.A.ravel(), A.ravel()))

    plt.figure()
    plt.plot(aucs, '-r')
    plt.plot(auc_A_mean * np.ones_like(aucs), '--r')
    plt.plot(auc_W_mean * np.ones_like(aucs), '--b')
    plt.plot(auc_init * np.ones_like(aucs), '--k')
    plt.xlabel("Iteration")
    plt.ylabel("Link prediction AUC")
    plt.show()


    plt.ioff()
    plt.show()
Exemplo n.º 5
0
    :return:
    """
    T = 50
    dt = 1.0
    dt_max = 3.0
    network_hypers = {
        'c': np.array([0], dtype=np.int),
        'p': 0.5,
        'kappa': 3.0,
        'v': 15.0
    }
    weight_hypers = {"kappa_0": 3.0, "nu_0": 15.0}
    model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=1,
        dt=dt,
        dt_max=dt_max,
        weight_hypers=weight_hypers,
        network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model(resample_network=False)
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
Exemplo n.º 6
0
def fit_network_hawkes_gibbs(S, K, C, B, dt, dt_max,
                             output_path,
                             standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="gibbs")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".gibbs.pkl"):
    #     with open(output_path + ".gibbs.pkl", 'r') as f:
    #         print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
    #         (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                                network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 1000
        samples = []
        lps = []
        timestamps = [time.clock()]
        for itr in xrange(N_samples):
            lps.append(test_model.log_probability())
            # lps.append(test_model.log_likelihood())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                print "Iteration ", itr, "\t LL: ", lps[-1]
            #    im.set_data(test_model.weight_model.A * \
            #                test_model.weight_model.W)
            #    plt.pause(0.001)

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump((samples[-1], timestamps[-1]-timestamps[0]), f, protocol=-1)

        # Save the Gibbs timestamps
        timestamps = np.array(timestamps)
        with open(output_path + ".gibbs.timestamps.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.timestamps.pkl")
            cPickle.dump(timestamps, f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps[1:] - timestamps[0]), f, protocol=-1)

    return samples, timestamps
Exemplo n.º 7
0
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['v'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
def fit_network_hawkes_vb(S,
                          K,
                          C,
                          B,
                          dt,
                          dt_max,
                          output_path,
                          standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="vb")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".vb.pkl.gz"):
    #     with gzip.open(output_path + ".vb.pkl.gz", 'r') as f:
    #         print "Loading vb results from ", (output_path + ".vb.pkl.gz")
    #         (samples, timestamps) = cPickle.load(f)
    #
    #         if isinstance(timestamps, list):
    #             timestamps = np.array(timestamps)

    else:
        print("Fitting the data with a network Hawkes model using Batch VB")

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0 / 20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
            K=K, dt=dt, dt_max=dt_max, B=B, network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A,
                          test_model.weight_model.W,
                          vmax=0.5)
        plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 500
        test_model.add_data(S)

        # Stochastic variational inference
        N_iters = 1000
        vlbs = []
        samples = []
        start = time.clock()
        timestamps = []
        for itr in range(N_iters):
            vlbs.append(test_model.meanfield_coordinate_descent_step())
            print("Batch VB Iter: ", itr, "\tVLB: ", vlbs[-1])
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".vb.itr%04d.pkl" % itr, 'w') as f:
                pickle.dump((samples[-1], timestamps[-1] - start),
                            f,
                            protocol=-1)

        # Save the Gibbs samples
        timestamps = np.array(timestamps)
        with gzip.open(output_path + ".vb.pkl.gz", 'w') as f:
            print("Saving VB samples to ", (output_path + ".vb.pkl.gz"))
            pickle.dump((samples, timestamps - start), f, protocol=-1)

    return samples, timestamps
Exemplo n.º 9
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic",
                             "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K,
                                                     dt=dt,
                                                     dt_max=dt_max,
                                                     B=B,
                                                     alpha=1.0,
                                                     beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['v'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network_hypers=network_hypers)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    # plls = []
    for itr in xrange(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print ""
        print "Gibbs iteration ", itr
        print "LP: ", lps[-1]

        test_model.resample_model()

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps = np.array(lps)

    offset = N_samples // 2
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    g_mean = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    plt.figure()
    plt.plot(np.arange(N_samples), lps, 'k')
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")
    plt.show()

    # Compute the link prediction accuracy curves
    auc_init = roc_auc_score(true_model.weight_model.A.ravel(),
                             init_model.W.ravel())
    auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               A_mean.ravel())
    auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               W_mean.ravel())

    aucs = []
    for A in A_samples:
        aucs.append(roc_auc_score(true_model.weight_model.A.ravel(),
                                  A.ravel()))

    plt.figure()
    plt.plot(aucs, '-r')
    plt.plot(auc_A_mean * np.ones_like(aucs), '--r')
    plt.plot(auc_W_mean * np.ones_like(aucs), '--b')
    plt.plot(auc_init * np.ones_like(aucs), '--k')
    plt.xlabel("Iteration")
    plt.ylabel("Link prediction AUC")
    plt.show()

    plt.ioff()
    plt.show()
Exemplo n.º 10
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    import warnings
    warnings.warn("This test runs but the parameters need to be tuned. "
                  "Right now, the SVI algorithm seems to walk away from "
                  "the MAP estimate and yield suboptimal results. "
                  "I'm not convinced the variational inference with the "
                  "gamma mixture provides the best estimates of the sparsity.")

    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic",
                             "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = pickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    if init_with_map:
        init_len = T
        print("Initializing with BFGS on first ", init_len, " time bins.")
        init_model = DiscreteTimeStandardHawkesModel(K=K,
                                                     dt=dt,
                                                     dt_max=dt_max,
                                                     B=B,
                                                     alpha=1.0,
                                                     beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    # network_hypers = true_model.network_hypers.copy()
    # network_hypers['c'] = None
    # network_hypers['v'] = None
    # network_hypers['m'] = None
    # test_network = StochasticBlockModel(K=K, **network_hypers)
    test_network = StochasticBlockModel(K=K, C=1)
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network=test_network)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with variational Bayesian inference
    ###########################################################
    # VB coordinate descent
    N_iters = 100
    vlbs = []
    samples = []
    for itr in range(N_iters):
        vlbs.append(test_model.meanfield_coordinate_descent_step())
        print("VB Iter: ", itr, "\tVLB: ", vlbs[-1])
        if itr > 0:
            if (vlbs[-2] - vlbs[-1]) > 1e-1:
                print("WARNING: VLB is not increasing!")

        # Resample from variational distribution and plot
        test_model.resample_from_mf()
        samples.append(test_model.copy_sample())

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    # Compute sample statistics for second half of samples
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    vlbs = np.array(vlbs)

    offset = N_samples // 2
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    g_mean = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    # Plot the VLBs
    plt.figure()
    plt.plot(np.arange(N_samples), vlbs, 'k')
    plt.xlabel("Iteration")
    plt.ylabel("VLB")
    plt.show()

    # Compute the link prediction accuracy curves
    auc_init = roc_auc_score(true_model.weight_model.A.ravel(),
                             init_model.W.ravel())
    auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               A_mean.ravel())
    auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               W_mean.ravel())

    aucs = []
    for A in A_samples:
        aucs.append(roc_auc_score(true_model.weight_model.A.ravel(),
                                  A.ravel()))

    plt.figure()
    plt.plot(aucs, '-r')
    plt.plot(auc_A_mean * np.ones_like(aucs), '--r')
    plt.plot(auc_W_mean * np.ones_like(aucs), '--b')
    plt.plot(auc_init * np.ones_like(aucs), '--k')
    plt.xlabel("Iteration")
    plt.ylabel("Link prediction AUC")
    plt.show()

    plt.ioff()
    plt.show()
Exemplo n.º 11
0
def geweke_test():
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    T = 50
    dt = 1.0
    dt_max = 3.0
    network_hypers = {
        'c': np.array([0], dtype=np.int),
        'p': 0.5,
        'kappa': 3.0,
        'v': 15.0
    }
    model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=1, dt=dt, dt_max=dt_max, network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 10 == 0:
            print "Iteration: ", itr
        # Resample the model
        model.resample_model(resample_network=False)
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)

    # Compute sample statistics for second half of samples
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    c_samples = np.array([s.network.c for s in samples])
    p_samples = np.array([s.network.p for s in samples])
    v_samples = np.array([s.network.v for s in samples])
    lps = np.array(lps)

    offset = 0
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    g_mean = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    print "A mean:        ", A_mean
    print "W mean:        ", W_mean
    print "g mean:        ", g_mean
    print "lambda0 mean:  ", lambda0_mean

    # Plot the log probability over iterations
    plt.figure()
    plt.plot(np.arange(N_samples), lps)
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")

    # Plot the histogram of bias samples
    plt.figure()
    p_lmbda0 = gamma(model.bias_model.alpha, scale=1. / model.bias_model.beta)
    _, bins, _ = plt.hist(lambda0_samples[:, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_lmbda0.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('lam0')
    plt.ylabel('p(lam0)')

    print "Expected p(A):  ", model.network.P
    print "Empirical p(A): ", A_samples.mean(axis=0)

    # Plot the histogram of weight samples
    plt.figure()
    Aeq1 = A_samples[:, 0, 0] == 1
    p_W1 = gamma(model.network.kappa, scale=1. / model.network.v[0, 0])
    _, bins, _ = plt.hist(W_samples[Aeq1, 0, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('W')
    plt.ylabel('p(W | A=1)')

    plt.figure()
    Aeq0 = A_samples[:, 0, 0] == 0
    p_W1 = gamma(model.weight_model.kappa_0,
                 scale=1. / model.weight_model.nu_0)
    _, bins, _ = plt.hist(W_samples[Aeq0, 0, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('W')
    plt.ylabel('p(W | A=0)')

    # Plot the histogram of impulse samples
    plt.figure()
    for b in range(model.B):
        plt.subplot(1, model.B, b + 1)
        a = model.impulse_model.gamma[b]
        b = model.impulse_model.gamma.sum() - a
        p_beta11b = beta(a, b)

        _, bins, _ = plt.hist(g_samples[:, 0, 0, b],
                              bins=20,
                              alpha=0.5,
                              normed=True)
        bincenters = 0.5 * (bins[1:] + bins[:-1])
        plt.plot(bincenters, p_beta11b.pdf(bincenters), 'r--', linewidth=1)
        plt.xlabel('g_%d' % b)
        plt.ylabel('p(g_%d)' % b)

    plt.show()
Exemplo n.º 12
0
        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['v'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                            basis_hypers=true_model.basis_hypers,
                                                            bkgd_hypers=true_model.bkgd_hypers,
                                                            impulse_hypers=true_model.impulse_hypers,
                                                            weight_hypers=true_model.weight_hypers)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)


    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
def fit_network_hawkes_svi(S, K, C, dt, dt_max,
                           output_path,
                           standard_model=None,
                            N_iters=500):


    # Check for existing Gibbs results
    # if os.path.exists(output_path + ".svi.pkl.gz"):
    #     with gzip.open(output_path + ".svi.pkl.gz", 'r') as f:
    #         print "Loading SVI results from ", (output_path + ".svi.pkl.gz")
    #         (samples, timestamps) = cPickle.load(f)
    if os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters-1)):
            with open(output_path + ".svi.itr%04d.pkl" % (N_iters-1), 'r') as f:
                print "Loading SVI results from ", (output_path + ".svi.itr%04d.pkl" % (N_iters-1))
                sample = cPickle.load(f)
                samples = [sample]
                timestamps = None
                # (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using SVI"

        # Make a new model for inference
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0,
                          'tau1': 1.0, 'tau0': 10.0,
                          'allow_self_connections': False}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max,
                                                                basis=test_basis,
                                                                network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Plot the block affiliations
        plt.figure(2)
        KC = np.zeros((K,C))
        KC[np.arange(K), test_model.network.c] = 1.0
        im_clus = plt.imshow(KC,
                        interpolation="none", cmap="Greys",
                        aspect=float(C)/K)

        # TODO: Add the data in minibatches
        minibatchsize = 1000
        test_model.add_data(S)


        # Stochastic variational inference
        samples = []
        delay = 1.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        timestamps = []
        for itr in xrange(N_iters):
            print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
            test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                plt.figure(1)
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

                plt.figure(2)
                im_clus.set_data(test_model.network.mf_m)
                plt.title("Iteration %d" % itr)
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        # with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
        #     print "Saving SVI samples to ", (output_path + ".svi.pkl.gz")
        #     cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
Exemplo n.º 14
0
def fit_network_hawkes_vb(S, K, C, B, dt, dt_max,
                           output_path,
                           standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="vb")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".vb.pkl.gz"):
    #     with gzip.open(output_path + ".vb.pkl.gz", 'r') as f:
    #         print "Loading vb results from ", (output_path + ".vb.pkl.gz")
    #         (samples, timestamps) = cPickle.load(f)
    #
    #         if isinstance(timestamps, list):
    #             timestamps = np.array(timestamps)

    else:
        print "Fitting the data with a network Hawkes model using Batch VB"

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                                network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 500
        test_model.add_data(S)


        # Stochastic variational inference
        N_iters = 1000
        vlbs = []
        samples = []
        start = time.clock()
        timestamps = []
        for itr in xrange(N_iters):
            vlbs.append(test_model.meanfield_coordinate_descent_step())
            print "Batch VB Iter: ", itr, "\tVLB: ", vlbs[-1]
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".vb.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump((samples[-1], timestamps[-1] - start), f, protocol=-1)

        # Save the Gibbs samples
        timestamps = np.array(timestamps)
        with gzip.open(output_path + ".vb.pkl.gz", 'w') as f:
            print "Saving VB samples to ", (output_path + ".vb.pkl.gz")
            cPickle.dump((samples, timestamps - start), f, protocol=-1)

    return samples, timestamps
Exemplo n.º 15
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2 ** 32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic", "synthetic_K4_C1_T1000.pkl.gz")
    with gzip.open(data_path, "r") as f:
        S, true_model = cPickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    if init_with_map:
        init_len = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B, alpha=1.0, beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers["c"] = None
    network_hypers["v"] = None
    network_hypers["m"] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network_hypers=network_hypers,
    )
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    # Initialize plots
    if do_plot:
        ln, im_net, im_clus = initialize_plots(true_model, test_model, S)

    ###########################################################
    # Fit the test model with stochastic variational inference
    ###########################################################
    N_iters = 500
    minibatchsize = 100
    delay = 1.0
    forgetting_rate = 0.5
    stepsize = (np.arange(N_iters) + delay) ** (-forgetting_rate)
    samples = []
    for itr in xrange(N_iters):
        print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
        test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr])
        test_model.resample_from_mf()
        samples.append(test_model.copy_sample())

        # Update plot
        if itr % 1 == 0 and do_plot:
            update_plots(itr, test_model, S, ln, im_clus, im_net)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, init_model, samples)
Exemplo n.º 16
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic",
                             "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K,
                                                     dt=dt,
                                                     dt_max=dt_max,
                                                     B=B,
                                                     alpha=1.0,
                                                     beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['c'] = None
    network_hypers['v'] = None
    network_hypers['m'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network_hypers=network_hypers)
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    # Initialize plots
    ln, im_net, im_clus = initialize_plots(true_model, test_model, S)

    ###########################################################
    # Fit the test model with stochastic variational inference
    ###########################################################
    N_iters = 500
    minibatchsize = 500
    delay = 1.0
    forgetting_rate = 0.5
    stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
    samples = []
    for itr in xrange(N_iters):
        print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
        test_model.sgd_step(minibatchsize=minibatchsize,
                            stepsize=stepsize[itr])
        test_model.resample_from_mf()
        samples.append(test_model.copy_sample())

        # Update plot
        if itr % 1 == 0:
            update_plots(itr, test_model, S, ln, im_clus, im_net)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, init_model, samples)
Exemplo n.º 17
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic", "synthetic_K4_C1_T1000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T      = S.shape[0]
    K      = true_model.K
    B      = true_model.B
    dt     = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    if init_with_map:
        init_len   = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B,
                                                     alpha=1.0, beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                            basis_hypers=true_model.basis_hypers,
                                                            bkgd_hypers=true_model.bkgd_hypers,
                                                            impulse_hypers=true_model.impulse_hypers,
                                                            weight_hypers=true_model.weight_hypers,
                                                            network_hypers=network_hypers)
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with variational Bayesian inference
    ###########################################################
    # VB coordinate descent
    N_iters = 100
    vlbs = []
    samples = []
    for itr in xrange(N_iters):
        vlbs.append(test_model.meanfield_coordinate_descent_step())
        print "VB Iter: ", itr, "\tVLB: ", vlbs[-1]
        if itr > 0:
            if (vlbs[-2] - vlbs[-1]) > 1e-1:
                print "WARNING: VLB is not increasing!"

        # Resample from variational distribution and plot
        test_model.resample_from_mf()
        samples.append(test_model.copy_sample())

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    # Compute sample statistics for second half of samples
    A_samples       = np.array([s.weight_model.A     for s in samples])
    W_samples       = np.array([s.weight_model.W     for s in samples])
    g_samples       = np.array([s.impulse_model.g    for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    vlbs            = np.array(vlbs)

    offset = N_samples // 2
    A_mean       = A_samples[offset:, ...].mean(axis=0)
    W_mean       = W_samples[offset:, ...].mean(axis=0)
    g_mean       = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    # Plot the VLBs
    plt.figure()
    plt.plot(np.arange(N_samples), vlbs, 'k')
    plt.xlabel("Iteration")
    plt.ylabel("VLB")
    plt.show()

    # Compute the link prediction accuracy curves
    auc_init = roc_auc_score(true_model.weight_model.A.ravel(),
                             init_model.W.ravel())
    auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               A_mean.ravel())
    auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               W_mean.ravel())

    aucs = []
    for A in A_samples:
        aucs.append(roc_auc_score(true_model.weight_model.A.ravel(), A.ravel()))

    plt.figure()
    plt.plot(aucs, '-r')
    plt.plot(auc_A_mean * np.ones_like(aucs), '--r')
    plt.plot(auc_W_mean * np.ones_like(aucs), '--b')
    plt.plot(auc_init * np.ones_like(aucs), '--k')
    plt.xlabel("Iteration")
    plt.ylabel("Link prediction AUC")
    plt.show()


    plt.ioff()
    plt.show()
def fit_network_hawkes_gibbs(S,
                             K,
                             C,
                             B,
                             dt,
                             dt_max,
                             output_path,
                             standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="gibbs")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".gibbs.pkl"):
    #     with open(output_path + ".gibbs.pkl", 'r') as f:
    #         print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
    #         (samples, timestamps) = cPickle.load(f)

    else:
        print(
            "Fitting the data with a network Hawkes model using Gibbs sampling"
        )

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0 / 20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
            K=K, dt=dt, dt_max=dt_max, B=B, network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A,
                          test_model.weight_model.W,
                          vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 1000
        samples = []
        lps = []
        timestamps = [time.clock()]
        for itr in range(N_samples):
            lps.append(test_model.log_probability())
            # lps.append(test_model.log_likelihood())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                print("Iteration ", itr, "\t LL: ", lps[-1])
            #    im.set_data(test_model.weight_model.A * \
            #                test_model.weight_model.W)
            #    plt.pause(0.001)

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                pickle.dump((samples[-1], timestamps[-1] - timestamps[0]),
                            f,
                            protocol=-1)

        # Save the Gibbs timestamps
        timestamps = np.array(timestamps)
        with open(output_path + ".gibbs.timestamps.pkl", 'w') as f:
            print("Saving Gibbs samples to ",
                  (output_path + ".gibbs.timestamps.pkl"))
            pickle.dump(timestamps, f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print("Saving Gibbs samples to ", (output_path + ".gibbs.pkl"))
            pickle.dump((samples, timestamps[1:] - timestamps[0]),
                        f,
                        protocol=-1)

    return samples, timestamps
Exemplo n.º 19
0
def test_sbm_mf(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 5
    K = 50
    T = 1000
    dt = 1.0
    B = 3
    p = 0.4 * np.eye(C) + (0.05) * (1-np.eye(C))

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0/K, 'p': p}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    c = true_model.network.c
    perm = np.argsort(c)
    #
    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)

    # Make a new model for inference
    test_network_hypers = {'C': C, 'beta': 1.0/K, 'tau0': 0.5, 'tau1': 0.5}
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, B=B,
                                                            network_hypers=test_network_hypers)
    test_model.weight_model.initialize_from_gibbs(true_model.weight_model.A,
                                                  true_model.weight_model.W)

    # Plot the block probabilities
    plt.figure()
    im = plt.imshow(test_model.network.mf_m[perm,:],
                    interpolation="none", cmap="Greys",
                    aspect=float(C)/K)
    plt.xlabel('C')
    plt.ylabel('K')
    plt.show()
    plt.pause(0.001)

    # Run mean field updates for the SBM given a fixed network
    N_iters = 50
    c_samples = []
    vlbs = []
    for itr in xrange(N_iters):
        if itr % 5 == 0:
            print "Iteration: ", itr

        # Update the plot
        im.set_data(test_model.network.mf_m[perm,:])
        plt.pause(0.001)


        # Resample from meanfield distribution
        test_model.network.resample_from_mf()
        c_samples.append(copy.deepcopy(test_model.network.c))
        vlbs.append(test_model.network.get_vlb() + test_model.weight_model.get_vlb())

        if itr > 0:

            if vlbs[-1] - vlbs[-2] < -1e-3:
                print "VLBS are not increasing"
                print np.array(vlbs)
                # import pdb; pdb.set_trace()
                # raise Exception("VLBS are not increasing!")


        # Take a mean field step
        test_model.network.meanfieldupdate(test_model.weight_model)

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples = np.array(c_samples)
    vlbs = np.array(vlbs)

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_iters), amis, '-r')
    plt.plot(np.arange(N_iters), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")

    plt.figure()
    plt.plot(np.arange(N_iters), vlbs)
    plt.xlabel("Iteration")
    plt.ylabel("VLB")

    plt.show()
def fit_network_hawkes_svi(S,
                           K,
                           C,
                           B,
                           dt,
                           dt_max,
                           output_path,
                           standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="svi2")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    else:
        print("Fitting the data with a network Hawkes model using SVI")

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0 / 20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
            K=K, dt=dt, dt_max=dt_max, B=B, network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A,
                          test_model.weight_model.W,
                          vmax=0.5)
        plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 500
        import pdb
        pdb.set_trace()
        test_model.add_data(S)

        # Stochastic variational inference
        N_iters = 10000
        samples = []
        delay = 1.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        start = time.clock()
        timestamps = []
        for itr in range(N_iters):
            print("SVI Iter: ", itr, "\tStepsize: ", stepsize[itr])
            test_model.sgd_step(minibatchsize=minibatchsize,
                                stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                pickle.dump((samples[-1], timestamps[-1] - start),
                            f,
                            protocol=-1)

        # Save the Gibbs samples
        timestamps = np.array(timestamps)
        with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
            print("Saving SVI samples to ", (output_path + ".svi.pkl.gz"))
            pickle.dump((samples, timestamps - start), f, protocol=-1)

    return samples, timestamps
Exemplo n.º 21
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    import warnings
    warnings.warn("This test runs but the parameters need to be tuned. "
                  "Right now, the SVI algorithm seems to walk away from "
                  "the MAP estimate and yield suboptimal results. "
                  "I'm not convinced the variational inference with the "
                  "gamma mixture provides the best estimates of the sparsity.")

    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic", "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = pickle.load(f)

    T      = S.shape[0]
    K      = true_model.K
    B      = true_model.B
    dt     = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    if init_with_map:
        init_len   = T
        print("Initializing with BFGS on first ", init_len, " time bins.")
        init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B,
                                                     alpha=1.0, beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['C'] = 1
    network_hypers['c'] = None
    network_hypers['v'] = None
    network_hypers['m'] = None
    test_network = StochasticBlockModel(K=K, **network_hypers)
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                            basis_hypers=true_model.basis_hypers,
                                                            bkgd_hypers=true_model.bkgd_hypers,
                                                            impulse_hypers=true_model.impulse_hypers,
                                                            weight_hypers=true_model.weight_hypers,
                                                            network=test_network)
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with stochastic variational inference
    ###########################################################
    N_iters = 500
    minibatchsize = 1000
    delay = 1.0
    forgetting_rate = 0.5
    stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
    samples = []
    for itr in range(N_iters):
        print("SVI Iter: ", itr, "\tStepsize: ", stepsize[itr])
        test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr])
        test_model.resample_from_mf()
        samples.append(test_model.copy_sample())

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, init_model, samples)
Exemplo n.º 22
0
from pyhawkes.models import DiscreteTimeNetworkHawkesModelGammaMixture

if __name__ == "__main__":
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    T = 50
    dt = 1.0
    dt_max = 3.0
    network_hypers = {'c': np.array([0], dtype=np.int),
                      'p': 0.5, 'kappa': 3.0, 'v': 15.0}
    weight_hypers = {"kappa_0": 3.0, "nu_0": 15.0}
    model = DiscreteTimeNetworkHawkesModelGammaMixture(K=1, dt=dt, dt_max=dt_max,
                                                       weight_hypers=weight_hypers,
                                                       network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model(resample_network=False)
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
        model.data_list.pop()
Exemplo n.º 23
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic",
                             "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K,
                                                     dt=dt,
                                                     dt_max=dt_max,
                                                     B=B,
                                                     alpha=1.0,
                                                     beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['c'] = None
    network_hypers['v'] = None
    network_hypers['m'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network_hypers=network_hypers)
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    # Initialize plots
    ln, im_net, im_clus = initialize_plots(true_model, test_model, S)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    # plls = []
    for itr in xrange(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print ""
        print "Gibbs iteration ", itr
        print "LP: ", lps[-1]

        test_model.resample_model()

        # Update plot
        if itr % 1 == 0:
            update_plots(itr, test_model, S, ln, im_clus, im_net)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, init_model, samples, lps)