def run_comparison(data_path, test_path, output_path, T_train=None, seed=None):
    """
    Run the comparison on the given data file
    :param data_path:
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    assert os.path.exists(
        os.path.dirname(output_path)), "Output directory does not exist!"

    if data_path.endswith(".gz"):
        with gzip.open(data_path, 'r') as f:
            S, true_model = pickle.load(f)
    else:
        with open(data_path, 'r') as f:
            S, true_model = pickle.load(f)

    # If T_train is given, only use a fraction of the dataset
    if T_train is not None:
        S = S[:T_train, :]

    if test_path.endswith(".gz"):
        with gzip.open(test_path, 'r') as f:
            S_test, test_model = pickle.load(f)
    else:
        with open(test_path, 'r') as f:
            S_test, test_model = pickle.load(f)

    K = true_model.K
    C = true_model.C
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    use_parse_results = True
    if use_parse_results and os.path.exists(output_path +
                                            ".parsed_results.pkl"):
        with open(output_path + ".parsed_results.pkl") as f:
            auc_rocs, auc_prcs, plls, timestamps = pickle.load(f)
            timestamps['svi'] = np.array(timestamps['svi'])

    else:
        # Compute the cross correlation to estimate the connectivity
        W_xcorr = infer_net_from_xcorr(S,
                                       dtmax=true_model.dt_max //
                                       true_model.dt)

        # Fit a standard Hawkes model on subset of data with BFGS
        bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs(
            S, K, B, dt, dt_max, output_path=output_path)

        # Fit a standard Hawkes model with SGD
        # standard_models, timestamps = fit_standard_hawkes_model_sgd(S, K, B, dt, dt_max,
        #                                                         init_model=init_model)
        #
        # # Save the models
        # with open(output_path + ".sgd.pkl", 'w') as f:
        #     print "Saving SGD results to ", (output_path + ".sgd.pkl")
        #     cPickle.dump((standard_models, timestamps), f, protocol=-1)

        # Fit a network Hawkes model with Gibbs
        gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs(
            S,
            K,
            C,
            B,
            dt,
            dt_max,
            output_path=output_path,
            standard_model=bfgs_model)

        # Fit a spike and slab network Hawkes model with Gibbs
        gibbs_ss_samples = gibbs_ss_timestamps = None
        # gibbs_ss_samples, gibbs_ss_timestamps = fit_network_hawkes_gibbs_ss(S, K, C, B, dt, dt_max,
        #                                                          output_path=output_path,
        #                                                          standard_model=bfgs_model)

        # Fit a network Hawkes model with Batch VB
        vb_models, vb_timestamps = fit_network_hawkes_vb(
            S,
            K,
            C,
            B,
            dt,
            dt_max,
            output_path=output_path,
            standard_model=bfgs_model)

        # Fit a network Hawkes model with SVI
        # svi_models = svi_timestamps = None
        svi_models, svi_timestamps = fit_network_hawkes_svi(
            S, K, C, B, dt, dt_max, output_path, standard_model=bfgs_model)

        # Combine timestamps into a dict
        timestamps = {}
        timestamps['bfgs'] = bfgs_time
        timestamps['gibbs'] = gibbs_timestamps
        timestamps['gibbs_ss'] = gibbs_ss_timestamps
        timestamps['svi'] = svi_timestamps
        timestamps['vb'] = vb_timestamps

        amis = compute_clustering_score(true_model,
                                        bfgs_model=bfgs_model,
                                        gibbs_samples=gibbs_samples,
                                        gibbs_ss_samples=gibbs_ss_samples,
                                        svi_models=svi_models,
                                        vb_models=vb_models)
        print("AMIS")
        pprint.pprint(amis)

        auc_rocs = compute_auc(true_model,
                               W_xcorr=W_xcorr,
                               bfgs_model=bfgs_model,
                               gibbs_samples=gibbs_samples,
                               gibbs_ss_samples=gibbs_ss_samples,
                               svi_models=svi_models,
                               vb_models=vb_models)
        print("AUC-ROC")
        pprint.pprint(auc_rocs)

        # Compute area under precisino recall curve of inferred network
        auc_prcs = compute_auc_prc(true_model,
                                   W_xcorr=W_xcorr,
                                   bfgs_model=bfgs_model,
                                   gibbs_samples=gibbs_samples,
                                   gibbs_ss_samples=gibbs_ss_samples,
                                   svi_models=svi_models,
                                   vb_models=vb_models)
        print("AUC-PRC")
        pprint.pprint(auc_prcs)

        plls = compute_predictive_ll(S_test,
                                     S,
                                     true_model=true_model,
                                     bfgs_model=bfgs_model,
                                     gibbs_samples=gibbs_samples,
                                     gibbs_ss_samples=gibbs_ss_samples,
                                     svi_models=svi_models,
                                     vb_models=vb_models)

        with open(output_path + ".parsed_results.pkl", 'w') as f:
            print("Saving parsed results to ",
                  output_path + ".parsed_results.pkl")
            pickle.dump((auc_rocs, auc_prcs, plls, timestamps), f, protocol=-1)

    plot_pred_ll_vs_time(plls, timestamps, Z=float(S.size), T_train=T_train)
def run_comparison(data_path, output_path, seed=None, thresh=0.5):
    """
    Run the comparison on the given data file
    :param data_path:
    :return:
    """
    import ipdb
    ipdb.set_trace()
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    assert os.path.exists(
        os.path.dirname(output_path)), "Output directory does not exist!"

    if data_path.endswith("_oopsi.pkl.gz"):
        # The oopsi data has a probability of spike
        with gzip.open(data_path, 'r') as f:
            P, F, Cf, network, pos = cPickle.load(f)
            S_full = P > thresh
            # onespk = np.bitwise_and(P > thresh, Cf < 0.3)
            # twospk = np.bitwise_and(P > thresh, Cf >= 0.3)
            # S_full = np.zeros_like(P)
            # S_full[onespk] = 1
            # S_full[twospk] = 2

    elif data_path.endswith(".gz"):
        with gzip.open(data_path, 'r') as f:
            S_full, F, bins, network, pos = cPickle.load(f)
    else:
        with open(data_path, 'r') as f:
            S_full, F, bins, network, pos = cPickle.load(f)

    # Cast to int
    S_full = S_full.astype(np.int)

    # Train on all but the last ten minutes (20ms time bins = 50Hz)
    T_train = 10 * 60 * 50
    T_test = 10 * 60 * 50
    # S      = S_full[:-T_test, :]
    S = S_full[:T_train, :]
    S_test = S_full[-T_test:, :]

    K = S.shape[1]
    C = 1
    dt = 0.02
    dt_max = 0.08

    # Compute the cross correlation to estimate the connectivity
    print "Estimating network via cross correlation"
    W_xcorr = infer_net_from_xcorr(S, dtmax=dt_max // dt)

    # HACK! Select the threshold by looking at the data
    test_thresholds = False
    if test_thresholds:
        print "Estimating network via cross correlation"
        F_xcorr = infer_net_from_xcorr(F, dtmax=3)
        aucs, _, _ = compute_auc_roc(network, W_xcorr=F_xcorr)
        print "AUC F: ", aucs["xcorr"]
        for thresh in np.linspace(0.1, 0.95, 20):
            S_thr = (P > thresh).astype(np.int)
            S_train = S_thr[:T_train, :]

            W_tmp = infer_net_from_xcorr(S_train, dtmax=dt_max // dt)
            aucs, _, _ = compute_auc_roc(network, W_xcorr=W_tmp)
            print "AUC (", thresh, "): ", aucs["xcorr"]

        import pdb
        pdb.set_trace()

    # Fit a standard Hawkes model on subset of data with BFGS
    bfgs_model, bfgs_time = \
        fit_standard_hawkes_model_bfgs_noxv(S, K, dt, dt_max,
                                            output_path=output_path,
                                            W_max=None)

    # Fit a network Hawkes model with Gibbs
    gibbs_samples = gibbs_timestamps = None
    gibbs_samples, gibbs_timestamps = \
        fit_network_hawkes_gibbs(S, K, C, dt, dt_max,
                                 output_path=output_path,
                                 standard_model=bfgs_model)

    # gibbs_samples, gibbs_timestamps = \
    #     fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max,
    #                              output_path=output_path,
    #                              standard_model=bfgs_model)

    # Fit a network Hawkes model with Batch VB
    # vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, dt, dt_max,
    #                                          standard_model=standard_models[-1])
    #
    # with open(output_path + ".vb.pkl", 'w') as f:
    #     print "Saving VB results to ", (output_path + ".vb.pkl")
    #     cPickle.dump((vb_models, timestamps), f, protocol=-1)

    # Fit a network Hawkes model with SVI
    # svi_models = None
    svi_models, timestamps = fit_network_hawkes_svi(S,
                                                    K,
                                                    C,
                                                    dt,
                                                    dt_max,
                                                    output_path,
                                                    standard_model=bfgs_model,
                                                    true_network=network)

    # Plot the network and its uncertainty
    import ipdb
    ipdb.set_trace()

    # Compute area under roc curve of inferred network
    auc_rocs, fprs, tprs = compute_auc_roc(network,
                                           W_xcorr=W_xcorr,
                                           bfgs_model=bfgs_model,
                                           gibbs_samples=gibbs_samples,
                                           svi_models=svi_models)
    print "AUC-ROC"
    pprint.pprint(auc_rocs)

    plot_roc_curves(fprs, tprs, fig_path=output_path)

    # Compute area under precisino recall curve of inferred network
    auc_prcs, precs, recalls = compute_auc_prc(network,
                                               W_xcorr=W_xcorr,
                                               bfgs_model=bfgs_model,
                                               gibbs_samples=gibbs_samples,
                                               svi_models=svi_models)
    print "AUC-PRC"
    pprint.pprint(auc_prcs)

    plot_prc_curves(precs, recalls, fig_path=output_path)

    # Compute the predictive log likelihoods
    plls = compute_predictive_ll(S_test,
                                 S,
                                 bfgs_model=bfgs_model,
                                 gibbs_samples=gibbs_samples,
                                 svi_models=svi_models)

    print "Log Predictive Likelihoods: "
    pprint.pprint(plls)
def run_comparison(data_path, output_path, seed=None):
    """
    Run the comparison on the given data file
    :param data_path:
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    assert os.path.exists(os.path.dirname(output_path)), "Output directory does not exist!"

    if data_path.endswith("_oopsi.pkl.gz"):
        # The oopsi data has a probability of spike
        thresh = 0.1
        with gzip.open(data_path, 'r') as f:
            P, F, Cf, network, pos = cPickle.load(f)
            S_full = P > thresh
            # onespk = np.bitwise_and(P > thresh, Cf < 0.3)
            # twospk = np.bitwise_and(P > thresh, Cf >= 0.3)
            # S_full = np.zeros_like(P)
            # S_full[onespk] = 1
            # S_full[twospk] = 2

    elif data_path.endswith(".gz"):
        with gzip.open(data_path, 'r') as f:
            S_full, F, bins, network, pos = cPickle.load(f)
    else:
        with open(data_path, 'r') as f:
            S_full, F, bins, network, pos = cPickle.load(f)

    # Cast to int
    S_full = S_full.astype(np.int)

    # Train on all but the last ten minutes (20ms time bins = 50Hz)
    T_train = 5 * 60 * 50
    T_test = 10 * 60 * 50
    # S      = S_full[:-T_test, :]
    S      = S_full[:T_train, :]
    S_test = S_full[-T_test:, :]

    K      = S.shape[1]
    C      = 5
    dt     = 0.02
    dt_max = 0.08

    # Compute the cross correlation to estimate the connectivity
    print "Estimating network via cross correlation"
    F_xcorr = infer_net_from_xcorr(F[:10000,:], dtmax=3)

    # Compute the cross correlation to estimate the connectivity
    # print "Estimating network via cross correlation"
    W_xcorr = infer_net_from_xcorr(S[:10000], dtmax=dt_max // dt)

    # Fit a standard Hawkes model on subset of data with BFGS
    bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs(S, K, dt, dt_max,
                                                           output_path=output_path)

    # Fit a standard Hawkes model with SGD
    # standard_models, timestamps = fit_standard_hawkes_model_sgd(S, K, dt, dt_max,
    #                                                         init_model=init_model)
    #
    # # Save the models
    # with open(output_path + ".sgd.pkl", 'w') as f:
    #     print "Saving SGD results to ", (output_path + ".sgd.pkl")
    #     cPickle.dump((standard_models, timestamps), f, protocol=-1)

    # Fit a network Hawkes model with Gibbs
    gibbs_samples = gibbs_timestamps = None
    gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs(S, K, C, dt, dt_max,
                                             output_path=output_path,
                                             standard_model=bfgs_model)

    # Fit a network Hawkes model with Batch VB
    # vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, dt, dt_max,
    #                                          standard_model=standard_models[-1])
    #
    # with open(output_path + ".vb.pkl", 'w') as f:
    #     print "Saving VB results to ", (output_path + ".vb.pkl")
    #     cPickle.dump((vb_models, timestamps), f, protocol=-1)

    # Fit a network Hawkes model with SVI
    svi_models, timestamps = fit_network_hawkes_svi(S, K, C, dt, dt_max,
                                                    output_path,
                                                    standard_model=bfgs_model)

    # Compute area under roc curve of inferred network
    auc_rocs, fprs, tprs = compute_auc_roc(network,
                               W_xcorr=W_xcorr,
                               bfgs_model=bfgs_model,
                               gibbs_samples=gibbs_samples,
                               svi_models=svi_models)
    print "AUC-ROC"
    pprint.pprint(auc_rocs)

    plot_roc_curves(fprs, tprs)

    # Compute area under precisino recall curve of inferred network
    auc_prcs, precs, recalls = compute_auc_prc(network,
                               W_xcorr=W_xcorr,
                               bfgs_model=bfgs_model,
                               gibbs_samples=gibbs_samples,
                               svi_models=svi_models)
    print "AUC-PRC"
    pprint.pprint(auc_prcs)

    plot_prc_curves(precs, recalls)
def run_comparison(data_path, output_path, seed=None, thresh=0.5):
    """
    Run the comparison on the given data file
    :param data_path:
    :return:
    """
    import ipdb

    ipdb.set_trace()
    if seed is None:
        seed = np.random.randint(2 ** 32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    assert os.path.exists(os.path.dirname(output_path)), "Output directory does not exist!"

    if data_path.endswith("_oopsi.pkl.gz"):
        # The oopsi data has a probability of spike
        with gzip.open(data_path, "r") as f:
            P, F, Cf, network, pos = cPickle.load(f)
            S_full = P > thresh
            # onespk = np.bitwise_and(P > thresh, Cf < 0.3)
            # twospk = np.bitwise_and(P > thresh, Cf >= 0.3)
            # S_full = np.zeros_like(P)
            # S_full[onespk] = 1
            # S_full[twospk] = 2

    elif data_path.endswith(".gz"):
        with gzip.open(data_path, "r") as f:
            S_full, F, bins, network, pos = cPickle.load(f)
    else:
        with open(data_path, "r") as f:
            S_full, F, bins, network, pos = cPickle.load(f)

    # Cast to int
    S_full = S_full.astype(np.int)

    # Train on all but the last ten minutes (20ms time bins = 50Hz)
    T_train = 10 * 60 * 50
    T_test = 10 * 60 * 50
    # S      = S_full[:-T_test, :]
    S = S_full[:T_train, :]
    S_test = S_full[-T_test:, :]

    K = S.shape[1]
    C = 1
    dt = 0.02
    dt_max = 0.08

    # Compute the cross correlation to estimate the connectivity
    print "Estimating network via cross correlation"
    W_xcorr = infer_net_from_xcorr(S, dtmax=dt_max // dt)

    # HACK! Select the threshold by looking at the data
    test_thresholds = False
    if test_thresholds:
        print "Estimating network via cross correlation"
        F_xcorr = infer_net_from_xcorr(F, dtmax=3)
        aucs, _, _ = compute_auc_roc(network, W_xcorr=F_xcorr)
        print "AUC F: ", aucs["xcorr"]
        for thresh in np.linspace(0.1, 0.95, 20):
            S_thr = (P > thresh).astype(np.int)
            S_train = S_thr[:T_train, :]

            W_tmp = infer_net_from_xcorr(S_train, dtmax=dt_max // dt)
            aucs, _, _ = compute_auc_roc(network, W_xcorr=W_tmp)
            print "AUC (", thresh, "): ", aucs["xcorr"]

        import pdb

        pdb.set_trace()

    # Fit a standard Hawkes model on subset of data with BFGS
    bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs_noxv(S, K, dt, dt_max, output_path=output_path, W_max=None)

    # Fit a network Hawkes model with Gibbs
    gibbs_samples = gibbs_timestamps = None
    gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs(
        S, K, C, dt, dt_max, output_path=output_path, standard_model=bfgs_model
    )

    # gibbs_samples, gibbs_timestamps = \
    #     fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max,
    #                              output_path=output_path,
    #                              standard_model=bfgs_model)

    # Fit a network Hawkes model with Batch VB
    # vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, dt, dt_max,
    #                                          standard_model=standard_models[-1])
    #
    # with open(output_path + ".vb.pkl", 'w') as f:
    #     print "Saving VB results to ", (output_path + ".vb.pkl")
    #     cPickle.dump((vb_models, timestamps), f, protocol=-1)

    # Fit a network Hawkes model with SVI
    # svi_models = None
    svi_models, timestamps = fit_network_hawkes_svi(
        S, K, C, dt, dt_max, output_path, standard_model=bfgs_model, true_network=network
    )

    # Plot the network and its uncertainty
    import ipdb

    ipdb.set_trace()

    # Compute area under roc curve of inferred network
    auc_rocs, fprs, tprs = compute_auc_roc(
        network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models
    )
    print "AUC-ROC"
    pprint.pprint(auc_rocs)

    plot_roc_curves(fprs, tprs, fig_path=output_path)

    # Compute area under precisino recall curve of inferred network
    auc_prcs, precs, recalls = compute_auc_prc(
        network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models
    )
    print "AUC-PRC"
    pprint.pprint(auc_prcs)

    plot_prc_curves(precs, recalls, fig_path=output_path)

    # Compute the predictive log likelihoods
    plls = compute_predictive_ll(S_test, S, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models)

    print "Log Predictive Likelihoods: "
    pprint.pprint(plls)
Пример #5
0
def run_comparison(data_path, test_path, output_path, T_train=None, seed=None):
    """
    Run the comparison on the given data file
    :param data_path:
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    assert os.path.exists(os.path.dirname(output_path)), "Output directory does not exist!"

    if data_path.endswith(".gz"):
        with gzip.open(data_path, 'r') as f:
            S, true_model = cPickle.load(f)
    else:
        with open(data_path, 'r') as f:
            S, true_model = cPickle.load(f)

    # If T_train is given, only use a fraction of the dataset
    if T_train is not None:
        S = S[:T_train,:]

    if test_path.endswith(".gz"):
        with gzip.open(test_path, 'r') as f:
            S_test, test_model = cPickle.load(f)
    else:
        with open(test_path, 'r') as f:
            S_test, test_model = cPickle.load(f)

    K      = true_model.K
    C      = true_model.C
    B      = true_model.B
    dt     = true_model.dt
    dt_max = true_model.dt_max

    use_parse_results = True
    if use_parse_results and  os.path.exists(output_path + ".parsed_results.pkl"):
        with open(output_path + ".parsed_results.pkl") as f:
            auc_rocs, auc_prcs, plls, timestamps = cPickle.load(f)
            timestamps['svi'] = np.array(timestamps['svi'])

    else:
        # Compute the cross correlation to estimate the connectivity
        W_xcorr = infer_net_from_xcorr(S, dtmax=true_model.dt_max // true_model.dt)

        # Fit a standard Hawkes model on subset of data with BFGS
        bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs(S, K, B, dt, dt_max,
                                                               output_path=output_path)

        # Fit a standard Hawkes model with SGD
        # standard_models, timestamps = fit_standard_hawkes_model_sgd(S, K, B, dt, dt_max,
        #                                                         init_model=init_model)
        #
        # # Save the models
        # with open(output_path + ".sgd.pkl", 'w') as f:
        #     print "Saving SGD results to ", (output_path + ".sgd.pkl")
        #     cPickle.dump((standard_models, timestamps), f, protocol=-1)

        # Fit a network Hawkes model with Gibbs
        gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs(S, K, C, B, dt, dt_max,
                                                             output_path=output_path,
                                                             standard_model=bfgs_model)

        # Fit a spike and slab network Hawkes model with Gibbs
        gibbs_ss_samples = gibbs_ss_timestamps = None
        # gibbs_ss_samples, gibbs_ss_timestamps = fit_network_hawkes_gibbs_ss(S, K, C, B, dt, dt_max,
        #                                                          output_path=output_path,
        #                                                          standard_model=bfgs_model)

        # Fit a network Hawkes model with Batch VB
        vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, C, B, dt, dt_max,
                                                      output_path=output_path,
                                                      standard_model=bfgs_model)

        # Fit a network Hawkes model with SVI
        # svi_models = svi_timestamps = None
        svi_models, svi_timestamps = fit_network_hawkes_svi(S, K, C, B, dt, dt_max,
                                                        output_path,
                                                        standard_model=bfgs_model)

        # Combine timestamps into a dict
        timestamps = {}
        timestamps['bfgs'] = bfgs_time
        timestamps['gibbs'] = gibbs_timestamps
        timestamps['gibbs_ss'] = gibbs_ss_timestamps
        timestamps['svi'] = svi_timestamps
        timestamps['vb'] = vb_timestamps

        amis = compute_clustering_score(true_model,
                           bfgs_model=bfgs_model,
                           gibbs_samples=gibbs_samples,
                           gibbs_ss_samples=gibbs_ss_samples,
                           svi_models=svi_models,
                           vb_models=vb_models)
        print "AMIS"
        pprint.pprint(amis)

        auc_rocs = compute_auc(true_model,
                           W_xcorr=W_xcorr,
                           bfgs_model=bfgs_model,
                           gibbs_samples=gibbs_samples,
                           gibbs_ss_samples=gibbs_ss_samples,
                           svi_models=svi_models,
                           vb_models=vb_models)
        print "AUC-ROC"
        pprint.pprint(auc_rocs)

        # Compute area under precisino recall curve of inferred network
        auc_prcs = compute_auc_prc(true_model,
                                   W_xcorr=W_xcorr,
                                   bfgs_model=bfgs_model,
                                   gibbs_samples=gibbs_samples,
                                   gibbs_ss_samples=gibbs_ss_samples,
                                   svi_models=svi_models,
                                   vb_models=vb_models)
        print "AUC-PRC"
        pprint.pprint(auc_prcs)


        plls = compute_predictive_ll(S_test, S,
                                     true_model=true_model,
                                     bfgs_model=bfgs_model,
                                     gibbs_samples=gibbs_samples,
                                     gibbs_ss_samples=gibbs_ss_samples,
                                     svi_models=svi_models,
                                     vb_models=vb_models)

        with open(output_path + ".parsed_results.pkl", 'w') as f:
            print "Saving parsed results to ", output_path + ".parsed_results.pkl"
            cPickle.dump((auc_rocs, auc_prcs, plls, timestamps), f, protocol=-1)

    plot_pred_ll_vs_time(plls, timestamps, Z=float(S.size), T_train=T_train)