Python SaliencyAPI.write примеры использования

Язык программирования: Python

Пространство имен/Пакет: api_utils

Класс/Тип: SaliencyAPI

Метод/Функция: write

Примеров на hotexamples.com: 2

Python SaliencyAPI.write - 2 примера найдено. Это лучшие примеры Python кода для api_utils.SaliencyAPI.write, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SaliencyAPI(3)

read(2)

write(1)

Основные методы

SaliencyAPI (3)

read (2)

write (1)

Пример #1

Показать файл

Файл: compute_saliency.py Проект: riyachanduka/termite_test

class ComputeSaliency(object):
    """
	Distinctiveness and saliency.
	
	Compute term distinctiveness and term saliency, based on
	the term probability distributions associated with a set of
	latent topics.
	
	Input is term-topic probability distribution, stored in 3 separate files:
	    'term-topic-matrix.txt' contains the entries of the matrix.
	    'term-index.txt' contains the terms corresponding to the rows of the matrix.
	    'topic-index.txt' contains the topic labels corresponding to the columns of the matrix.
	
	Output is a list of term distinctiveness and saliency values,
	in two duplicate formats, a tab-delimited file and a JSON object:
	    'term-info.txt'
	    'term-info.json'
	
	An auxiliary output is a list topic weights (i.e., the number of
	tokens in the corpus assigned to each latent topic) in two
	duplicate formats, a tab-delimited file and a JSON object:
	    'topic-info.txt'
	    'topic-info.json'
	"""
    def __init__(self, logging_level):
        self.logger = logging.getLogger('ComputeSaliency')
        self.logger.setLevel(logging_level)
        handler = logging.StreamHandler(sys.stderr)
        handler.setLevel(logging_level)
        self.logger.addHandler(handler)

    def execute(self, data_path):

        assert data_path is not None

        self.logger.info(
            '--------------------------------------------------------------------------------'
        )
        self.logger.info('Computing term saliency...')
        self.logger.info('    data_path = %s', data_path)

        self.logger.info('Connecting to data...')
        self.model = ModelAPI(data_path)
        self.saliency = SaliencyAPI(data_path)

        self.logger.info('Reading data from disk...')
        self.model.read()

        self.logger.info('Computing...')
        self.computeTopicInfo()
        self.computeTermInfo()
        self.rankResults()

        self.logger.info('Writing data to disk...')
        self.saliency.write()

        self.logger.info(
            '--------------------------------------------------------------------------------'
        )

    def computeTopicInfo(self):
        topic_weights = [sum(x) for x in zip(*self.model.term_topic_matrix)]
        topic_info = []
        for i in range(self.model.topic_count):
            topic_info.append({
                'topic': self.model.topic_index[i],
                'weight': topic_weights[i]
            })

        self.saliency.topic_info = topic_info

    def computeTermInfo(self):
        """Iterate over the list of terms. Compute frequency, distinctiveness, saliency."""

        topic_marginal = self.getNormalized(
            [d['weight'] for d in self.saliency.topic_info])
        term_info = []
        for i in range(self.model.term_count):
            term = self.model.term_index[i]
            counts = self.model.term_topic_matrix[i]
            frequency = sum(counts)
            probs = self.getNormalized(counts)
            distinctiveness = self.getKLDivergence(probs, topic_marginal)
            saliency = frequency * distinctiveness
            term_info.append({
                'term': term,
                'saliency': saliency,
                'frequency': frequency,
                'distinctiveness': distinctiveness,
                'rank': None,
                'visibility': 'default'
            })
        self.saliency.term_info = term_info

    def getNormalized(self, counts):
        """Rescale a list of counts, so they represent a proper probability distribution."""
        tally = sum(counts)
        if tally == 0:
            probs = [d for d in counts]
        else:
            probs = [d / tally for d in counts]
        return probs

    def getKLDivergence(self, P, Q):
        """Compute KL-divergence from P to Q"""
        divergence = 0
        assert len(P) == len(Q)
        for i in range(len(P)):
            p = P[i]
            q = Q[i]
            assert p >= 0
            assert q >= 0
            if p > 0:
                divergence += p * math.log(p / q)
        return divergence

    def rankResults(self):
        """Sort topics by decreasing weight. Sort term frequencies by decreasing saliency."""
        self.saliency.topic_info = sorted(
            self.saliency.topic_info,
            key=lambda topic_weight: -topic_weight['weight'])
        self.saliency.term_info = sorted(
            self.saliency.term_info,
            key=lambda term_freq: -term_freq['saliency'])
        for i, element in enumerate(self.saliency.term_info):
            element['rank'] = i

Пример #2

Показать файл

Файл: compute_saliency.py Проект: Nventdata/termite

class ComputeSaliency( object ):
	"""
	Distinctiveness and saliency.
	
	Compute term distinctiveness and term saliency, based on
	the term probability distributions associated with a set of
	latent topics.
	
	Input is term-topic probability distribution, stored in 3 separate files:
	    'term-topic-matrix.txt' contains the entries of the matrix.
	    'term-index.txt' contains the terms corresponding to the rows of the matrix.
	    'topic-index.txt' contains the topic labels corresponding to the columns of the matrix.
	
	Output is a list of term distinctiveness and saliency values,
	in two duplicate formats, a tab-delimited file and a JSON object:
	    'term-info.txt'
	    'term-info.json'
	
	An auxiliary output is a list topic weights (i.e., the number of
	tokens in the corpus assigned to each latent topic) in two
	duplicate formats, a tab-delimited file and a JSON object:
	    'topic-info.txt'
	    'topic-info.json'
	"""
	
	def __init__( self, logging_level ):
		self.logger = logging.getLogger( 'ComputeSaliency' )
		self.logger.setLevel( logging_level )
		handler = logging.StreamHandler( sys.stderr )
		handler.setLevel( logging_level )
		self.logger.addHandler( handler )
	
	def execute( self, data_path ):
		
		assert data_path is not None
		
		self.logger.info( '--------------------------------------------------------------------------------' )
		self.logger.info( 'Computing term saliency...'                                                       )
		self.logger.info( '    data_path = %s', data_path                                                    )
		
		self.logger.info( 'Connecting to data...' )
		self.model = ModelAPI( data_path )
		self.saliency = SaliencyAPI( data_path )
		
		self.logger.info( 'Reading data from disk...' )
		self.model.read()
		
		self.logger.info( 'Computing...' )
		self.computeTopicInfo()
		self.computeTermInfo()
		self.rankResults()
		
		self.logger.info( 'Writing data to disk...' )
		self.saliency.write()
		
		self.logger.info( '--------------------------------------------------------------------------------' )
	
	def computeTopicInfo( self ):
		topic_weights = [ sum(x) for x in zip( *self.model.term_topic_matrix ) ]
		topic_info = []
		for i in range(self.model.topic_count):
			topic_info.append( {
				'topic' : self.model.topic_index[i],
				'weight' : topic_weights[i]
			} )
		
		self.saliency.topic_info = topic_info
	
	def computeTermInfo( self ):
		"""Iterate over the list of terms. Compute frequency, distinctiveness, saliency."""
		
		topic_marginal = self.getNormalized( [ d['weight'] for d in self.saliency.topic_info ] )
		term_info = []
		for i in range(self.model.term_count):
			term = self.model.term_index[i]
			counts = self.model.term_topic_matrix[i]
			frequency = sum( counts )
			probs = self.getNormalized( counts )
			distinctiveness = self.getKLDivergence( probs, topic_marginal )
			saliency = frequency * distinctiveness
			term_info.append( {
				'term' : term,
				'saliency' : saliency,
				'frequency' : frequency,
				'distinctiveness' : distinctiveness,
				'rank' : None,
				'visibility' : 'default'
			} )
		self.saliency.term_info = term_info
	
	def getNormalized( self, counts ):
		"""Rescale a list of counts, so they represent a proper probability distribution."""
		tally = sum( counts )
		if tally == 0:
			probs = [ d for d in counts ]
		else:
			probs = [ d / tally for d in counts ]
		return probs
	
	def getKLDivergence( self, P, Q ):
		"""Compute KL-divergence from P to Q"""
		divergence = 0
		assert len(P) == len(Q)
		for i in range(len(P)):
			p = P[i]
			q = Q[i]
			assert p >= 0
			assert q >= 0
			if p > 0:
				divergence += p * math.log( p / q )
		return divergence
	
	def rankResults( self ):
		"""Sort topics by decreasing weight. Sort term frequencies by decreasing saliency."""
		self.saliency.topic_info = sorted( self.saliency.topic_info, key = lambda topic_weight : -topic_weight['weight'] )
		self.saliency.term_info = sorted( self.saliency.term_info, key = lambda term_freq : -term_freq['saliency'] )
		for i, element in enumerate( self.saliency.term_info ):
			element['rank'] = i