示例#1
0
文件: ctm.py 项目: ariddell/pgmult
def cached(func):
    mkdir(cachedir)
    cachebase = os.path.join(cachedir, func.__module__ + func.__name__)

    def replace_arrays(v):
        if isinstance(v, np.ndarray):
            return hashlib.sha1(v).hexdigest()
        if isinstance(v, scipy.sparse.csr.csr_matrix):
            out = hashlib.sha1(v.data)
            out.update(v.indices)
            out.update(v.indptr)
            return out.hexdigest()
        return v

    @wraps(func)
    def wrapped(*args, **kwargs):
        argdict = \
            {k:replace_arrays(v) for k,v in
                inspect.getcallargs(func,*args,**kwargs).iteritems()}
        closurevals = \
            [replace_arrays(cell.cell_contents) for cell in func.__closure__ or []]

        key = str(hash(frozenset(argdict.items() + closurevals)))
        cachefile = cachebase + '.' + key

        if os.path.isfile(cachefile):
            with gzip.open(cachefile, 'r') as infile:
                value = pickle.load(infile)
            return value
        else:
            value = func(*args,**kwargs)
            with gzip.open(cachefile, 'w') as outfile:
                pickle.dump(value, outfile, protocol=-1)
            return value

    return wrapped
示例#2
0
        print("Singular vector ", d, " Singular value, ", S[d])
        print("Right: ")
        print(top_k(5, pi_vd))
        print("Left: ")
        print(top_k(5, pi_ud))


if __name__ == "__main__":
    run = 3
    results_dir = os.path.join("results", "ap_indiv", "run%03d" % run)

    # Make sure the results directory exists
    from pgmult.internals.utils import mkdir
    if not os.path.exists(results_dir):
        print("Making results directory: ", results_dir)
        mkdir(results_dir)

    # Load the AP news documents
    Xs, words = load()

    # N_docs = 1
    docs = slice(0,20)
    T_split = 10

    # Filter out documents shorter than 2 * T_split
    Xfilt = filter(lambda X: X.shape[0] > 5*T_split, Xs)
    Xtrain = [X[:-T_split] for X in Xfilt[docs]]
    Xtest = [X[-T_split:] for X in Xfilt[docs]]

    # Perform inference for a range of latent state dimensions and models
    N_samples = 200
示例#3
0
                               legendargs={
                                   "columnspacing": 0.75,
                                   "handletextpad": 0.1
                               })
    fig.savefig(os.path.join(results_dir, "legend.pdf"))


if __name__ == "__main__":
    run = 5
    results_dir = os.path.join("results", "dna", "run%03d" % run)

    # Make sure the results directory exists
    from pgmult.internals.utils import mkdir
    if not os.path.exists(results_dir):
        print("Making results directory: ", results_dir)
        mkdir(results_dir)

    # Load data
    Xs, key = load_data()

    # Split data into two
    T_end = Xs[0].shape[0]
    T_split = 10
    Xtrain = [Xs[0][:T_end - T_split, :]]
    Xtest = Xs[0][T_end - T_split:T_end, :]
    K = len(key)

    # Perform inference for a range of latent state dimensions and models
    N_samples = 1000
    all_results = []
    # Ds = np.array([2, 3, 4, 5, 6])
示例#4
0
文件: ap_lds.py 项目: fivejjs/pgmult
def fit_joint_corpus():
    run = 13
    results_dir = os.path.join("results", "ap", "run%03d" % run)

    # Make sure the results directory exists
    from pgmult.internals.utils import mkdir
    if not os.path.exists(results_dir):
        print("Making results directory: ", results_dir)
        mkdir(results_dir)

    # Load the AP news documents
    Xs, words = load()

    N_docs = 10
    T_split = 10

    # Filter out documents shorter than 2 * T_split
    Xfilt = filter(lambda X: X.shape[0] > 2*T_split, Xs)
    Xtrain = [X[:-T_split] for X in Xfilt[:N_docs]]
    Xtest = [X[-T_split:] for X in Xfilt[:N_docs]]

    # Perform inference for a range of latent state dimensions and models
    N_samples = 500
    all_results = []
    Ds = np.array([10])
    models = ["SBM-LDS", "HMM", "Raw LDS" , "LNM-LDS", "SBM-LDS (pMCMC)"]
    methods = [fit_lds_model, fit_hmm, fit_gaussian_lds_model, fit_ln_lds_model, fit_lds_model_with_pmcmc]

    for D in Ds:
        D_results = []
        for model, method in zip(models, methods):
            results_file = os.path.join(results_dir, "results_%s_D%d.pkl.gz" % (model, D))
            if os.path.exists(results_file):
                print("Loading from: ", results_file)
                with gzip.open(results_file, "r") as f:
                    D_model_results = cPickle.load(f)
            else:
                print("Fitting ", model, " for D=",D)
                D_model_results = method(Xtrain, Xtest, D, N_samples)

                with gzip.open(results_file, "w") as f:
                    print("Saving to: ", results_file)
                    cPickle.dump(D_model_results, f, protocol=-1)

            D_results.append(D_model_results)
        all_results.append(D_results)

    # Plot log likelihoods for the results using one D
    res_index = 0
    plot_log_likelihood(all_results[res_index],
                        models,
                        run,
                        outname="train_ll_vs_time_D%d.pdf" % Ds[res_index])

    plot_pred_log_likelihood(all_results[res_index],
                        models,
                        run,
                        outname="pred_ll_vs_time_D%d.pdf" % Ds[res_index])

    # Make a bar chart of all the results
    plot_pred_ll_vs_D(all_results, Ds, Xtrain, Xtest, run)
    plt.show()
示例#5
0
文件: ap_lds.py 项目: yinsenm/pgmult
def fit_joint_corpus():
    run = 13
    results_dir = os.path.join("results", "ap", "run%03d" % run)

    # Make sure the results directory exists
    from pgmult.internals.utils import mkdir
    if not os.path.exists(results_dir):
        print("Making results directory: ", results_dir)
        mkdir(results_dir)

    # Load the AP news documents
    Xs, words = load()

    N_docs = 10
    T_split = 10

    # Filter out documents shorter than 2 * T_split
    Xfilt = [X for X in Xs if X.shape[0] > 2 * T_split]
    Xtrain = [X[:-T_split] for X in Xfilt[:N_docs]]
    Xtest = [X[-T_split:] for X in Xfilt[:N_docs]]

    # Perform inference for a range of latent state dimensions and models
    N_samples = 500
    all_results = []
    Ds = np.array([10])
    models = ["SBM-LDS", "HMM", "Raw LDS", "LNM-LDS", "SBM-LDS (pMCMC)"]
    methods = [
        fit_lds_model, fit_hmm, fit_gaussian_lds_model, fit_ln_lds_model,
        fit_lds_model_with_pmcmc
    ]

    for D in Ds:
        D_results = []
        for model, method in zip(models, methods):
            results_file = os.path.join(results_dir,
                                        "results_%s_D%d.pkl.gz" % (model, D))
            if os.path.exists(results_file):
                print("Loading from: ", results_file)
                with gzip.open(results_file, "r") as f:
                    D_model_results = pickle.load(f)
            else:
                print("Fitting ", model, " for D=", D)
                D_model_results = method(Xtrain, Xtest, D, N_samples)

                with gzip.open(results_file, "w") as f:
                    print("Saving to: ", results_file)
                    pickle.dump(D_model_results, f, protocol=-1)

            D_results.append(D_model_results)
        all_results.append(D_results)

    # Plot log likelihoods for the results using one D
    res_index = 0
    plot_log_likelihood(all_results[res_index],
                        models,
                        run,
                        outname="train_ll_vs_time_D%d.pdf" % Ds[res_index])

    plot_pred_log_likelihood(all_results[res_index],
                             models,
                             run,
                             outname="pred_ll_vs_time_D%d.pdf" % Ds[res_index])

    # Make a bar chart of all the results
    plot_pred_ll_vs_D(all_results, Ds, Xtrain, Xtest, run)
    plt.show()