Пример #1
0
def demo_LdaCgsSeq(doc_len=500,
                   V=100000,
                   n_docs=100,
                   K=20,
                   n_iterations=5,
                   corpus_seed=None,
                   model_seed=None):

    from vsm.extensions.corpusbuilders import random_corpus

    print('Words per document:', doc_len)
    print('Words in vocabulary:', V)
    print('Documents in corpus:', n_docs)
    print('Number of topics:', K)
    print('Iterations:', n_iterations)

    c = random_corpus(n_docs * doc_len,
                      V,
                      doc_len,
                      doc_len + 1,
                      seed=corpus_seed)
    m = LdaCgsSeq(c, 'document', K=K, seed=model_seed)
    m.train(n_iterations=n_iterations, verbose=2)

    return m
Пример #2
0
    def setUp(self):

        from vsm.extensions.corpusbuilders import random_corpus

        c = random_corpus(1000, 100, 0, 20)

        self.m = BeagleEnvironment(c, n_cols=100)
        self.m.train()
Пример #3
0
    def setUp(self):

        from vsm.extensions.corpusbuilders import random_corpus

        c = random_corpus(1000, 100, 0, 20)

        self.m = BeagleEnvironment(c, n_cols=100)
        self.m.train()
Пример #4
0
def demo_LdaCgsSeq(doc_len=500, V=100000, n_docs=100,
                   K=20, n_iterations=5,
                   corpus_seed=None, model_seed=None):

    from vsm.extensions.corpusbuilders import random_corpus

    print 'Words per document:', doc_len
    print 'Words in vocabulary:', V
    print 'Documents in corpus:', n_docs
    print 'Number of topics:', K
    print 'Iterations:', n_iterations

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1, seed=corpus_seed)
    m = LdaCgsSeq(c, 'document', K=K, seed=model_seed)
    m.train(n_iterations=n_iterations, verbose=2)

    return m
Пример #5
0
def demo_LdaCgsMulti(doc_len=500, V=100000, n_docs=100,
                     K=20, n_iterations=5, n_proc=2, 
                     corpus_seed=None, model_seeds=None):

    from vsm.extensions.corpusbuilders import random_corpus
    
    print('Words per document:', doc_len)
    print('Words in vocabulary:', V)
    print('Documents in corpus:', n_docs)
    print('Number of topics:', K)
    print('Iterations:', n_iterations)
    print('Number of processors:', n_proc)

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1, seed=corpus_seed)
    m = LdaCgsMulti(c, 'document', K=K, n_proc=n_proc, seeds=model_seeds)
    m.train(n_iterations=n_iterations, verbose=2)

    return m
Пример #6
0
def demo_LdaCgs(doc_len=500, V=100000, n_docs=100,
                K=20, n_iterations=5, n_threads=1):

    from vsm.extensions.corpusbuilders import random_corpus

    print('Words per document:', doc_len)
    print('Words in vocabulary:', V)
    print('Documents in corpus:', n_docs)
    print('Number of topics:', K)
    print('Iterations:', n_iterations)

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1)

    print('Random corpus generated. Initializing model.')
    m = LdaCgs(c, 'document', K=K)

    print('Begin estimation.')
    m.train(n_iterations=n_iterations, n_threads=n_threads)

    return m
Пример #7
0
def demo_LdaCgs(doc_len=500, V=100000, n_docs=100,
                K=20, n_iterations=5, n_threads=1):

    from vsm.extensions.corpusbuilders import random_corpus
    
    print 'Words per document:', doc_len
    print 'Words in vocabulary:', V
    print 'Documents in corpus:', n_docs
    print 'Number of topics:', K
    print 'Iterations:', n_iterations

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1)

    print 'Random corpus generated. Initializing model.'
    m = LdaCgs(c, 'document', K=K)
    
    print 'Begin estimation.'
    m.train(n_iterations=n_iterations, n_threads=n_threads)

    return m