def build_models(corpus, corpus_filename, model_path, context_type, krange, n_iterations=200, n_proc=1, seed=None, dry_run=False): basefilename = os.path.basename(corpus_filename).replace('.npz', '') basefilename += "-LDA-K%s-%s-%d.npz" % ('{0}', context_type, n_iterations) basefilename = os.path.join(model_path, basefilename) if n_proc == 1 and type(seed) == int: seeds = seed fileparts = basefilename.split('-') fileparts.insert(-1, str(seed)) basefilename = '-'.join(fileparts) elif type(seed) == int: seeds = [seed + p for p in range(n_proc)] fileparts = basefilename.split('-') fileparts.insert(-1, str(seed)) basefilename = '-'.join(fileparts) else: seeds = None if not dry_run: from vsm.model.lda import LDA for k in krange: print("Training model for k={0} Topics with {1} Processes".format(k, n_proc)) m = LDA(corpus, context_type, K=k, multiprocessing=(n_proc > 1), seed_or_seeds=seeds, n_proc=n_proc) m.train(n_iterations=n_iterations) m.save(basefilename.format(k)) print(" ") return basefilename
def build_models(corpus, corpus_filename, model_path, context_type, krange, n_iterations=200, n_proc=1, seed=None): basefilename = os.path.basename(corpus_filename).replace(".npz", "") basefilename += "-LDA-K%s-%s-%d.npz" % ("{0}", context_type, n_iterations) basefilename = os.path.join(model_path, basefilename) if n_proc == 1 and type(seed) == int: seeds = seed fileparts = basefilename.split("-") fileparts.insert(-1, str(seed)) basefilename = "-".join(fileparts) elif type(seed) == int: seeds = [seed + p for p in range(n_proc)] fileparts = basefilename.split("-") fileparts.insert(-1, str(seed)) basefilename = "-".join(fileparts) else: seeds = None for k in krange: print "Training model for k={0} Topics with {1} Processes".format(k, n_proc) m = LDA(corpus, context_type, K=k, multiprocessing=(n_proc > 1), seed_or_seeds=seeds) m.train(n_iterations=n_iterations) m.save(basefilename.format(k)) return basefilename