Exemplo n.º 1
0
def savefeatures(classifier, filename):
    """Accepts feature probability distributions, filename string.
        Saves features to the file in json format.
        Returns features list of 6-tuples.  
        First is name of feature,
            second is predictive ratio,
            third is probability of showing up given positive label,
            4th is ... negative.
            5th is the label of maximum P(label|feature).
            6th is P(label|feature) for that feature.
    """
    features = []
    pd = classifier._feature_probdist

    for (label, fname) in pd:
        if label == "neg":
            ratio = maxratio(pd, fname)
            posprob = p_of_feature_given_label(pd, "pos", fname)
            negprob = p_of_feature_given_label(pd, "neg", fname)

            ld = classifier.prob_classify({fname: True})
            max_class = ld.max()
            pclass = ld.prob(max_class)

            features.append((fname, ratio, posprob, negprob, max_class, pclass))

    jsondata.save(filename, features)
    return features
Exemplo n.º 2
0
def tlc_print_func(i, var):
    #print 'y: %s' % var.y
    print 'eta: %s' % var.eta
    print 'ss: %s' % var.sigma_squared
    
    if i % 5 == 0:
        jsondata.save('mytlc-output-%s.dat' % i, var.to_dict())
Exemplo n.º 3
0
def slda_print_func(i, var):
    #print 'y: %s' % var.y
    print 'gamma: %s' % var.gamma
    print 'eta: %s' % var.eta
    print 'ss: %s' % var.sigma_squared

    if i % 5 == 0:
        jsondata.save('slda-output-%s.dat' % i, var.to_dict())
Exemplo n.º 4
0
#!/usr/bin/env python
"""
    converts liu opinion sentiment word lists into json format
    visualizing them
    Copyright (C) 2011 Joseph Perla

    GNU Affero General Public License. See <http://www.gnu.org/licenses/>.
"""
import codecs
import jsondata

filename = 'data/liu_neg_words.txt'

f,ext = filename.rsplit('.', 1)

lines = codecs.open(filename, 'r', 'utf8').readlines()
real = [l.strip('\r\n ') for l in lines if not l.startswith(';')]
real = [l for l in real if l]

try:
    jsondata.save(f + '.json', real)
except:
    import pdb;pdb.post_mortem()

Exemplo n.º 5
0
# topic 015 in ls models/lda_c_2011_10_16/final.* is about restaurants and dining

# I classified the 10k document collection and put those gammas in ccode/lda-c/classify10000-gamma.dat
# topic 015 in ls models/lda_c_2011_10_16/final.* is about restaurants and dining

# this will find the documents which have that topic
import jsondata

gamma_filename = 'models/lda_c_2011_10_16/final.gamma'
gamma_filename = 'ccode/lda-c/classify10000-gamma.dat'

gammas = []
with open(gamma_filename, 'r') as f:
    gammas = [[float(f) for f in g.split(' ')] for g in f.readlines()]

restaurant_gammas = [(i,g) for i,g in enumerate(gammas) if g[15] > 100.0]
restaurant_indices = [i for (i,g) in restaurant_gammas]



data_filename = 'data/lda/nytimes_10000_sparse_lda_2011_10_16.dat'
with open(data_filename, 'r') as f:
    docs = f.readlines()

restaurant_docs = [docs[i] for i in restaurant_indices]

sparse_docs = [[(int(e.split(':')[0]),int(e.split(':')[1])) for e in d.split(' ')[1:]] for d in restaurant_docs]

import pdb; pdb.set_trace()
jsondata.save('background.nyt_med.json', sparse_docs)
Exemplo n.º 6
0
             generate_complete_overlap,
    ]

    num_points = 10000

    table = []
    for pp in pps:
        for d in dists:
            pos, neg = d(num_points, pp)
            for c in cs:
                pos_sample, unlabeled = sample_positive(c, pos, neg)
                # validation set:
                v_p, v_u = sample_positive(c, *d(num_points, pp))
                #v_p, v_u = d(num_points, pp)

                data = (pos_sample, unlabeled, v_p, v_u)
                #data, fixers = normalize_pu_data(*data)

                _, estimators = calculate_estimators(*data, max_iter=100)

                t = (pp, d.func_name, c,) + estimators
                print t
                table.append(t)

                #e1, e2, e3, e1_hat, e4_hat = estimators
                
    # save the table for graphing
    import jsondata
    jsondata.save('table.json', table)
        
Exemplo n.º 7
0
    num_points = 10000

    table = []
    for pp in pps:
        for d in dists:
            pos, neg = d(num_points, pp)
            for c in cs:
                pos_sample, unlabeled = sample_positive(c, pos, neg)
                # validation set:
                v_p, v_u = sample_positive(c, *d(num_points, pp))
                #v_p, v_u = d(num_points, pp)

                data = (pos_sample, unlabeled, v_p, v_u)
                #data, fixers = normalize_pu_data(*data)

                _, estimators = calculate_estimators(*data, max_iter=100)

                t = (
                    pp,
                    d.func_name,
                    c,
                ) + estimators
                print t
                table.append(t)

                #e1, e2, e3, e1_hat, e4_hat = estimators

    # save the table for graphing
    import jsondata
    jsondata.save('table.json', table)
Exemplo n.º 8
0
def lda_print_func(i, var):
    #print 'phi: %s' % var.phi
    print 'gamma: %s' % var.gamma

    if i % 5 == 0:
        jsondata.save('lda-output-%i.dat' % i, var.to_dict())