Exemplo n.º 1
0
def createWordDocCount():
    allFilesContent = document.getAllFilesContent()
    fname = "../initialResult/wordNumber.txt"
    res =  os.path.isfile(fname)

    if res:
        pass
    else:
        f = open(fname, 'w')
        allFileContentLists = []
        for fileContent in allFilesContent:
            allFileContentLists.append(fileContent.split(" "))

        strWrite = ""
        index = 0
        for fileContentList in allFileContentLists:
            if index < len(allFileContentLists):
                countDic = dict(Counter(allFileContentLists[index]))
                for wordName in OrderedDict(sorted(countDic.items())):
                    wordNum = countDic[wordName]
                    if wordNum > 0 and wordName != "\n":
                        strWrite  += wordName + ":" + str(wordNum) + " "
                strWrite += "\r\n"
            index += 1
        f.write(strWrite)
        f.close()
Exemplo n.º 2
0
def createDocLength():
    fname = "./initialResult/docLength.txt"
    allFilesContent = document.getAllFilesContent()
    res = os.path.isfile(fname)
    if res:
        pass
    else:
        f = open(fname, 'w')
        for doc in allFilesContent:
            docLength = str(len(doc.split(" "))) + "\r\n"
            f.write(docLength)
        f.close()
Exemplo n.º 3
0
def createDictionary():
    s = document.getAllFilesContent()
    fname = "../initialResult/dictionary.txt"
    res = os.path.isfile(fname)

    if res:
        pass
    else:
        f = open(fname, 'w')
        all_the_text = "".join(str(x) for x in s)
        wordSet = set()
        for word in all_the_text.split():
            wordSet.add(word)
        f.write("\r\n".join(wordSet))
        f.close()