Пример #1
0
def test_tags():
    themodel = LuminosoModel.make_empty(TEMPDIR+'/test_tags')
    themodel.add_document(dict(url='#test1', name='test one',
                               text='#foo +bar -baz #quux=4'))
    themodel.add_document(dict(url='#test2', name='test two',
                               text='foo',
                               tags=[('foo', None), ('bar', True),
                                     ('baz', False), ('quux', 4)]))
    assert_equal(sorted(themodel.database.get_document_tags('#test1')),
                 sorted(themodel.database.get_document_tags('#test2')))
Пример #2
0
def test_documents():
    themodel = LuminosoModel.make_empty(TEMPDIR+'/test_documents')
    #themodel.add_document(dict(url='#test1', name='test one',
    #                           text='one two three'))
    themodel.add_document(dict(url='#test1', name='test one',
                               text='two three'))
    assert_equal(themodel.database.count_term('*'), 3)
    assert_equal(themodel.database.count_documents(), 1)
    themodel.add_document(dict(url='#test2', name='test two',
                               text='two three four'))
    assert_equal(themodel.database.count_documents(), 2)
    assert themodel.database.get_document('#test1')
    assert themodel.database.get_document('#test2')
    assert_equal(themodel.database.count_term('two'), 2)
    assert_equal(themodel.database.count_term('two three'), 2)
    assert_equal(themodel.database.count_term('three four'), 1)
    assert_equal(themodel.database.count_term('*'), 8)
Пример #3
0
def test_small():
    model = LuminosoModel.make_empty(
        TEMPDIR + '/small',
        {
            'num_concepts': 3,
            'num_axes': 2,
            'iteration': 0,
            'reader': 'simplenlp.en'
        }
    )
    assert model.config['num_concepts'] == 3
    assert model.index_term('a', 2) == 0
    assert model.index_term('b', 1) == 1
    assert model.index_term('c', 3) == 2
    assert model.index_term('d', 4) == 1
    assert model.index_term('e', 0) == 0
    assert model.index_term('e', 0) == 0
    assert model.priority.items == ['e', 'd', 'c']
Пример #4
0
def test_read_from_url():
    model = LuminosoModel.make_empty(
        TEMPDIR + '/testdocs',
        {
            'num_concepts': 5,
            'num_axes': 2,
            'iteration': 0,
            'reader': 'simplenlp.en'
        }
    )
    model.learn_from_url(get_project_filename('test/TestDocuments'),
                         study=u'test')
    tags = model.database.get_document_tags(
      get_project_filename('test/TestDocuments/happytest.txt')
    )
    assert tags == [(u'study', u'test')]
    terms = model.get_document_terms(
      get_project_filename('test/TestDocuments/happytest.txt')
    )
    assert terms == [('happy', 1.0), ('sad', -0.5)]
Пример #5
0
    err1 = english.learn_assoc(1, 'foo', 'bar')
    assoc1 = english.assoc.entry_named('foo', 'bar')
    err2 = english.learn_assoc(1, 'foo', 'bar')
    assoc2 = english.assoc.entry_named('foo', 'bar')
    # after seeing the same example twice, error should decrease
    assert err2 < err1
    # after seeing the same example twice, association should increase
    assert assoc2 > assoc1

if __name__ == '__main__':
    import cProfile
    import simplenlp
    en = simplenlp.get_nl('en')
    en.lemma_split('test')
    en.is_stopword('test')
    setup_module()
    model = LuminosoModel.make_empty(
        TEMPDIR + '/testdocs',
        {
            'num_concepts': 5,
            'num_axes': 2,
            'iteration': 0,
            'reader': 'simplenlp.en'
        }
    )
    cProfile.run('for i in xrange(10): model.learn_from_url("TestDocuments")', sort=2)
    #model = LuminosoModel('../models/PLDBStudy_test3')
    #cProfile.run("model.learn_from_url('../models/PLDBStudy/Documents')", sort=2)

    teardown_module()