Python CoeffTable примеры использования

Язык программирования: Python

Пространство имен/Пакет: passWeightCoeff

Класс/Тип: CoeffTable

Примеров на hotexamples.com: 3

Python CoeffTable - 3 примера найдено. Это лучшие примеры Python кода для passWeightCoeff.CoeffTable, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

lookUp(1)

pwTable(1)

pwfTable(1)

Пример #1

Показать файл

Файл: passGenReweightedFeatureSet.py Проект: pyongjoo/twitter-research

    def refreshCoeffTableAndFollowing(self):
        if self.train_fname != self.prev_train_fname:
            # read an appropriate coefficient file
            coeff_fname = os.path.join(self.coefficient_dir,
                    self.train_fname + '_coeff.json')

            tables = json.loads(open(coeff_fname).read())
            pw = tables['pws']
            pwf = tables['pwfs']
            self.coeff_table = CoeffTable(tables = (pw, pwf))
            self.prev_train_fname = self.train_fname

            # read an appropriate following file
            f_fname = os.path.join(self.following_dir, self.train_fname)
            followings = {}     # we will fill out this dict
            for line in open(f_fname):
                user_id, f_list = line.rstrip('\n').split('\t')
                f_list = f_list.split(' ')
                followings[user_id] = f_list

            self.followings = followings

Пример #2

Показать файл

Файл: checkCoeffTable.py Проект: pyongjoo/twitter-research

import yaml
import json
from passWeightCoeff import CoeffTable


fin_name = '../data/semi/coefficient/train0_coeff.json'
tables = json.loads(open(fin_name).read())
pw = tables['pws']
pwf = tables['pwfs']

print 'loaded'

coeff = CoeffTable(tables = (pw, pwf))

print 'start cal'

coeff.lookUp()

Пример #3

Показать файл

Файл: passGenReweightedFeatureSet.py Проект: pyongjoo/twitter-research

class ReweightingTextFeatureConverter(TextFeatureConverter):
    '''
    The attributes that should be set by the caller:

    1. train_fname (instance variable)
    2. hard_label_fname (class variable)
    3. coefficient_dir (class variable)
    4. following data dir (class variable)
    '''

    def __init__(self, getText = None, stopfile = None):
        '''
        Need to read in hard-label list, so we pass those users later. Also we
        need to read-in the coefficient file.
        '''
        super(ReweightingTextFeatureConverter,
                self).__init__(getText, stopfile)    # if any

        self.hard_label_fname = ReweightingTextFeatureConverter.hard_label_fname
        self.coefficient_dir = ReweightingTextFeatureConverter.coefficient_dir
        self.following_dir = ReweightingTextFeatureConverter.following_dir

        # this is used to cache things
        self.prev_train_fname = None

        # read in all the hard labeled users
        hard_label_list = []
        for line in open(self.hard_label_fname):
            user_id = line.split('\t')[0]
            hard_label_list.append(user_id)
        self.hard_label_list = hard_label_list


    def refreshCoeffTableAndFollowing(self):
        if self.train_fname != self.prev_train_fname:
            # read an appropriate coefficient file
            coeff_fname = os.path.join(self.coefficient_dir,
                    self.train_fname + '_coeff.json')

            tables = json.loads(open(coeff_fname).read())
            pw = tables['pws']
            pwf = tables['pwfs']
            self.coeff_table = CoeffTable(tables = (pw, pwf))
            self.prev_train_fname = self.train_fname

            # read an appropriate following file
            f_fname = os.path.join(self.following_dir, self.train_fname)
            followings = {}     # we will fill out this dict
            for line in open(f_fname):
                user_id, f_list = line.rstrip('\n').split('\t')
                f_list = f_list.split(' ')
                followings[user_id] = f_list

            self.followings = followings


    def additionalPass(self, words_arr, user_id, semi_label):
        '''
        Override superclass method to add another pass. this pass will
        adjust the feature weight, based on the precomoputed p(w) and p(w|f)
        values for each class in the observed clean dataset.

        First, we should be able to distinguish if user_id is weakly labeled
        data, or hard labeled data which can be done easily by reading
        hard-label file. *The caller of this class is responsible to set the
        proper training file name*, so this method can know which file to read to
        calculate the coefficient.
        '''
        if user_id in self.hard_label_list:     # if clean data
            return words_arr                    # do nothing.

        # refresh coeff table and following list if needed
        self.refreshCoeffTableAndFollowing()

        # pre-load some common variables
        following = self.followings[user_id]
        pw_table = self.coeff_table.pwTable(int(semi_label))
        pwf_table = self.coeff_table.pwfTable(int(semi_label))

        # Now start to process each word in words_arr
        def _reweight(word_weight):
            word, weight = word_weight

            # load numerator, i.e., p(w)
            if not word in pw_table:
                return (word, weight)

            pw = pw_table[word]        # numerator, or p(w)

            # load denominator, i.e., p(w|f). This part is more involved since
            # we should calculate the noisy-or.

            # for any entry available in the pwf_table, we combine the
            # coefficient.
            mul_1_q = 1             # PI_i {1 - q_i}
            for f in following:
                try:
                    value = pwf_table[f][word]         # p(w|f)
                    mul_1_q *= (1 - value) / (1 - pw)
                except KeyError:
                    continue

            # final constant
            if mul_1_q == 1:
                c_wf = 1
            else:
                c_wf = min(pw / (1 - (1 - pw) * mul_1_q), 2.0)

            return (word, c_wf * weight)

        words_arr = map(_reweight, words_arr)

        return words_arr