# this is from our own module import util import tokenizer # pprint is like print_r from php to python. just use pprint(stuff) from pprint import pprint if __name__ == '__main__': doc = util.getFileContent("../examples/TEST.DAT") tfm = tokenizer.createTermFrequencyMatrix(doc) pprint(tfm)
import json,sys,os cgitb.enable() fs = cgi.FieldStorage() #print("Content-Type: application/json\n\n") print("Content-Type: text/html\n\n") print scriptPath = "/var/www/clusterator/core/" sys.path.append(scriptPath) import util import tokenizer docWordsStruct = util.getFileContent("../examples/TEST.DAT") matrix = tokenizer.createTermFrequencyMatrix(docWordsStruct); html = "<table border='1'><thead><tr><th>DOC ID</th><th>TERMS</th></tr></thead><tbody>" for doc in matrix: html = html + "<tr><td>"+doc+"</td>" for word in matrix[doc]: html = html + "<td>" + word + ":" + str(matrix[doc][word]) +"</td>" html = html + "</tr>" html = html + "</tbody></table>" #print(json.dumps(message,indent=1))