示例#1
0
def build_models(corpus, corpus_filename, model_path, context_type, krange,
                 n_iterations=200, n_proc=1, seed=None, dry_run=False):
    basefilename = os.path.basename(corpus_filename).replace('.npz', '')
    basefilename += "-LDA-K%s-%s-%d.npz" % ('{0}', context_type, n_iterations)
    basefilename = os.path.join(model_path, basefilename)

    if n_proc == 1 and type(seed) == int:
        seeds = seed
        fileparts = basefilename.split('-')
        fileparts.insert(-1, str(seed))
        basefilename = '-'.join(fileparts)
    elif type(seed) == int:
        seeds = [seed + p for p in range(n_proc)]
        fileparts = basefilename.split('-')
        fileparts.insert(-1, str(seed))
        basefilename = '-'.join(fileparts)
    else:
        seeds = None

    if not dry_run:
        from vsm.model.lda import LDA
        for k in krange:
            print("Training model for k={0} Topics with {1} Processes".format(k, n_proc))
            m = LDA(corpus, context_type, K=k, multiprocessing=(n_proc > 1),
                    seed_or_seeds=seeds, n_proc=n_proc)
            m.train(n_iterations=n_iterations)
            m.save(basefilename.format(k))
            print(" ")

    return basefilename
示例#2
0
def build_models(corpus, corpus_filename, model_path, context_type, krange,
                 n_iterations=200, n_proc=1, seed=None, dry_run=False):
    basefilename = os.path.basename(corpus_filename).replace('.npz', '')
    basefilename += "-LDA-K%s-%s-%d.npz" % ('{0}', context_type, n_iterations)
    basefilename = os.path.join(model_path, basefilename)

    if n_proc == 1 and type(seed) == int:
        seeds = seed
        fileparts = basefilename.split('-')
        fileparts.insert(-1, str(seed))
        basefilename = '-'.join(fileparts)
    elif type(seed) == int:
        seeds = [seed + p for p in range(n_proc)]
        fileparts = basefilename.split('-')
        fileparts.insert(-1, str(seed))
        basefilename = '-'.join(fileparts)
    else:
        seeds = None

    if not dry_run:
        from vsm.model.lda import LDA
        for k in krange:
            print("Training model for k={0} Topics with {1} Processes".format(k, n_proc))
            m = LDA(corpus, context_type, K=k, multiprocessing=(n_proc > 1),
                    seed_or_seeds=seeds, n_proc=n_proc)
            m.train(n_iterations=n_iterations)
            m.save(basefilename.format(k))
            print(" ")

    return basefilename
示例#3
0
def build_models(corpus, corpus_filename, model_path, context_type, krange, n_iterations=200, n_proc=1, seed=None):

    basefilename = os.path.basename(corpus_filename).replace(".npz", "")
    basefilename += "-LDA-K%s-%s-%d.npz" % ("{0}", context_type, n_iterations)
    basefilename = os.path.join(model_path, basefilename)

    if n_proc == 1 and type(seed) == int:
        seeds = seed
        fileparts = basefilename.split("-")
        fileparts.insert(-1, str(seed))
        basefilename = "-".join(fileparts)
    elif type(seed) == int:
        seeds = [seed + p for p in range(n_proc)]
        fileparts = basefilename.split("-")
        fileparts.insert(-1, str(seed))
        basefilename = "-".join(fileparts)
    else:
        seeds = None

    for k in krange:
        print "Training model for k={0} Topics with {1} Processes".format(k, n_proc)
        m = LDA(corpus, context_type, K=k, multiprocessing=(n_proc > 1), seed_or_seeds=seeds)
        m.train(n_iterations=n_iterations)
        m.save(basefilename.format(k))

    return basefilename