def test_tags(): themodel = LuminosoModel.make_empty(TEMPDIR+'/test_tags') themodel.add_document(dict(url='#test1', name='test one', text='#foo +bar -baz #quux=4')) themodel.add_document(dict(url='#test2', name='test two', text='foo', tags=[('foo', None), ('bar', True), ('baz', False), ('quux', 4)])) assert_equal(sorted(themodel.database.get_document_tags('#test1')), sorted(themodel.database.get_document_tags('#test2')))
def test_documents(): themodel = LuminosoModel.make_empty(TEMPDIR+'/test_documents') #themodel.add_document(dict(url='#test1', name='test one', # text='one two three')) themodel.add_document(dict(url='#test1', name='test one', text='two three')) assert_equal(themodel.database.count_term('*'), 3) assert_equal(themodel.database.count_documents(), 1) themodel.add_document(dict(url='#test2', name='test two', text='two three four')) assert_equal(themodel.database.count_documents(), 2) assert themodel.database.get_document('#test1') assert themodel.database.get_document('#test2') assert_equal(themodel.database.count_term('two'), 2) assert_equal(themodel.database.count_term('two three'), 2) assert_equal(themodel.database.count_term('three four'), 1) assert_equal(themodel.database.count_term('*'), 8)
def test_small(): model = LuminosoModel.make_empty( TEMPDIR + '/small', { 'num_concepts': 3, 'num_axes': 2, 'iteration': 0, 'reader': 'simplenlp.en' } ) assert model.config['num_concepts'] == 3 assert model.index_term('a', 2) == 0 assert model.index_term('b', 1) == 1 assert model.index_term('c', 3) == 2 assert model.index_term('d', 4) == 1 assert model.index_term('e', 0) == 0 assert model.index_term('e', 0) == 0 assert model.priority.items == ['e', 'd', 'c']
def test_read_from_url(): model = LuminosoModel.make_empty( TEMPDIR + '/testdocs', { 'num_concepts': 5, 'num_axes': 2, 'iteration': 0, 'reader': 'simplenlp.en' } ) model.learn_from_url(get_project_filename('test/TestDocuments'), study=u'test') tags = model.database.get_document_tags( get_project_filename('test/TestDocuments/happytest.txt') ) assert tags == [(u'study', u'test')] terms = model.get_document_terms( get_project_filename('test/TestDocuments/happytest.txt') ) assert terms == [('happy', 1.0), ('sad', -0.5)]
err1 = english.learn_assoc(1, 'foo', 'bar') assoc1 = english.assoc.entry_named('foo', 'bar') err2 = english.learn_assoc(1, 'foo', 'bar') assoc2 = english.assoc.entry_named('foo', 'bar') # after seeing the same example twice, error should decrease assert err2 < err1 # after seeing the same example twice, association should increase assert assoc2 > assoc1 if __name__ == '__main__': import cProfile import simplenlp en = simplenlp.get_nl('en') en.lemma_split('test') en.is_stopword('test') setup_module() model = LuminosoModel.make_empty( TEMPDIR + '/testdocs', { 'num_concepts': 5, 'num_axes': 2, 'iteration': 0, 'reader': 'simplenlp.en' } ) cProfile.run('for i in xrange(10): model.learn_from_url("TestDocuments")', sort=2) #model = LuminosoModel('../models/PLDBStudy_test3') #cProfile.run("model.learn_from_url('../models/PLDBStudy/Documents')", sort=2) teardown_module()