Exemplo n.º 1
0
# this is from our own module
import util
import tokenizer

# pprint is like print_r from php to python. just use pprint(stuff)
from pprint import pprint

if __name__ == '__main__':
    doc = util.getFileContent("../examples/TEST.DAT")
    tfm = tokenizer.createTermFrequencyMatrix(doc)
    pprint(tfm)
Exemplo n.º 2
0
cgitb.enable()

fs = cgi.FieldStorage()

#print("Content-Type: application/json\n\n")
print("Content-Type: text/html\n\n")
print

scriptPath = "/var/www/clusterator/core/"
sys.path.append(scriptPath)

import util
import tokenizer
docWordsStruct = util.getFileContent("../examples/TEST.DAT")

matrix = tokenizer.createTermFrequencyMatrix(docWordsStruct);

html = "<table border='1'><thead><tr><th>DOC ID</th><th>TERMS</th></tr></thead><tbody>"

for doc in matrix:
    html = html + "<tr><td>"+doc+"</td>"

    for word in matrix[doc]:
        html = html + "<td>" + word + ":" + str(matrix[doc][word])  +"</td>"

    html = html + "</tr>"

html = html + "</tbody></table>"
#print(json.dumps(message,indent=1))
print(html)