def joint_distribution_model(H): numcontexts = H.shape[1] counts = H.ravel() size = counts.size f0 = f_ssd(H.shape[0], H.shape[1]) F = np.asarray([f0]) return maxent.conditionalmodel(F, counts, numcontexts)
# Ideally, this could be stored as a sparse matrix of size C x X, whose ith row # vector contains all points x_j in the sample space X in context c_i: # N = sparse.lil_matrix((len(contexts), len(samplespace))) # initialized to zero # for (c, x) in corpus: # N[c, x] += 1 # This would be a nicer input format, but computations are more efficient # internally with one long row vector. What we really need is for sparse # matrices to offer a .reshape method so this conversion could be done # internally and transparently. Then the numcontexts argument to the # conditionalmodel constructor could also be inferred from the matrix # dimensions. # Create a model model = maxentropy.conditionalmodel(F, N, numcontexts) model.verbose = True # Fit the model model.fit() # Output the distribution print "\nFitted model parameters are:\n" + str(model.params) p = model.probdist() print "\npmf table p(x | c), where c is the context 'the':" c = contexts.index('the') print p[c * numsamplepoints:(c + 1) * numsamplepoints]
# Ideally, this could be stored as a sparse matrix of size C x X, whose ith row # vector contains all points x_j in the sample space X in context c_i: # N = sparse.lil_matrix((len(contexts), len(samplespace))) # initialized to zero # for (c, x) in corpus: # N[c, x] += 1 # This would be a nicer input format, but computations are more efficient # internally with one long row vector. What we really need is for sparse # matrices to offer a .reshape method so this conversion could be done # internally and transparently. Then the numcontexts argument to the # conditionalmodel constructor could also be inferred from the matrix # dimensions. # Create a model model = maxentropy.conditionalmodel(F, N, numcontexts) model.verbose = True # Fit the model model.fit() # Output the distribution print "\nFitted model parameters are:\n" + str(model.params) p = model.probdist() print "\npmf table p(x | c), where c is the context 'the':" c = contexts.index("the") print p[c * numsamplepoints : (c + 1) * numsamplepoints]