Пример #1
0
def index():
	with Corpus_DB() as corpus_db:
		with BOW_DB() as bow_db:
			with LDA_DB() as lda_db:
				handler = GroupInBoxHandler(request, response, corpus_db, bow_db, lda_db)
	response.delimiters = ('[[', ']]')
	return handler.GenerateResponse()
Пример #2
0
def TermFrequencyModel():
	with BOW_DB() as bow_db:
		with LDA_DB() as lda_db:
			handler = TermTopicMatrix1(request, response, bow_db, lda_db)
			data = handler.GetTermFrequencyModel()
	dataStr = json.dumps(data, encoding='utf-8', indent=2, sort_keys=True)
	response.headers['Content-Type'] = 'application/json'
	return dataStr
Пример #3
0
def GroupInBox():
    with Corpus_DB() as corpus_db:
        with BOW_DB() as bow_db:
            with LDA_DB() as lda_db:
                handler = GroupInBoxHandler(request, response, corpus_db,
                                            bow_db, lda_db)
    handler.LoadGIB()
    return handler.GenerateResponse()
Пример #4
0
def Inspect():
    with Corpus_DB() as corpus_db:
        with BOW_DB() as bow_db:
            with LDA_DB() as lda_db:
                handler = GroupInBoxHandler(request, response, corpus_db,
                                            bow_db, lda_db)
    handler.InspectModel()
    return handler.GenerateResponse()
Пример #5
0
def ImportSTM( app_name, model_path, corpus_path, database_path, is_quiet, force_overwrite ):
	logger = logging.getLogger( 'termite' )
	logger.addHandler( logging.StreamHandler() )
	logger.setLevel( logging.INFO if is_quiet else logging.DEBUG )
	
	app_path = 'apps/{}'.format( app_name )
	corpus_filename = '{}/corpus.txt'.format( corpus_path )
	database_filename = '{}/corpus.db'.format( database_path )
	logger.info( '--------------------------------------------------------------------------------' )
	logger.info( 'Import an STM topic model as a web2py application...' )
	logger.info( '           app_name = %s', app_name )
	logger.info( '           app_path = %s', app_path )
	logger.info( '         model_path = %s', model_path )
	logger.info( '    corpus_filename = %s', corpus_filename )
	logger.info( '  database_filename = %s', database_filename )
	logger.info( '--------------------------------------------------------------------------------' )
	
	if force_overwrite or not os.path.exists( app_path ):
		with CreateApp(app_name) as app:
			# Import corpus (models/corpus.db, data/corpus.txt, data/sentences.txt)
			app_database_filename = '{}/corpus.db'.format( app.GetDatabasePath() )
			app_corpus_filename = '{}/corpus.txt'.format( app.GetDataPath() )
			app_sentences_filename = '{}/sentences.txt'.format( app.GetDataPath() )
			logger.info( 'Copying [%s] --> [%s]', database_filename, app_database_filename )
			shutil.copy( database_filename, app_database_filename )
			logger.info( 'Copying [%s] --> [%s]', corpus_filename, app_corpus_filename )
			shutil.copy( corpus_filename, app_corpus_filename )
			logger.info( 'Extracting [%s] --> [%s]', corpus_filename, app_sentences_filename )
			SplitSentences( corpus_filename, app_sentences_filename )
			
			# Import model (data/*)
			app_model_path = '{}/stm'.format( app.GetDataPath() )
			logger.info( 'Copying [%s] --> [%s]', model_path, app_model_path )
			shutil.copytree( model_path, app_model_path )
			for stm_filename in [ 'doc-index.json', 'term-index.json', 'topic-index.json', 'doc-topic-matrix.txt', 'term-topic-matrix.txt' ]:
				source_filename = '{}/{}'.format(corpus_path, stm_filename)
				target_filename = '{}/{}'.format(app_model_path, stm_filename)
				logger.info( 'Copying [%s] --> [%s]', source_filename, target_filename )
				shutil.copy( source_filename, target_filename )
			
			db_path = app.GetDatabasePath()
			with Corpus_DB(db_path) as corpus_db:
				
				# Create a bow-of-words language model
				with BOW_DB(db_path, isInit=True) as bow_db:
					bow_computer = BOW_ComputeStats(bow_db, corpus_db, app_corpus_filename, app_sentences_filename)
					bow_computer.Execute()
				
				# Compute derived-statistics about an LDA-like topic model
				with LDA_DB(db_path, isInit=True) as lda_db:
					stm_reader = STMReader(lda_db, app_model_path, corpus_db)
					stm_reader.Execute()
					lda_computer = LDA_ComputeStats(lda_db, corpus_db)
					lda_computer.Execute()			
	else:
		logger.info( '    Already available: %s', app_path )
Пример #6
0
def gib():
	with Corpus_DB() as corpus_db:
		with BOW_DB() as bow_db:
			with LDA_DB() as lda_db:
				handler = GroupInBoxHandler(request, response, corpus_db, bow_db, lda_db)
	handler.UpdateModel()
	handler.InspectModel()
	handler.LoadGIB()
	dataStr = json.dumps(handler.content, encoding='utf-8', indent=2, sort_keys=True)
	response.headers['Content-Type'] = 'application/json'
	return dataStr
Пример #7
0
def index():
	with BOW_DB() as bow_db:
		with LDA_DB() as lda_db:
			handler = TermTopicMatrix1(request, response, bow_db, lda_db)
	return handler.GenerateResponse()
Пример #8
0
def index():
    with BOW_DB() as bow_db:
        handler = BOW_Core(request, response, bow_db)
    return handler.GenerateResponse()
Пример #9
0
def SentenceG2():
    with BOW_DB() as bow_db:
        handler = BOW_Core(request, response, bow_db)
        handler.LoadSentenceG2()
    return handler.GenerateResponse()
Пример #10
0
def TermCoProbs():
    with BOW_DB() as bow_db:
        handler = BOW_Core(request, response, bow_db)
        handler.LoadTermCoProbs()
    return handler.GenerateResponse()