Exemplo n.º 1
0
        fileDirectory,
        fileName).readText()  # looking to the file and return html text

    parser = DataParser(data)  # create a parser class
    parser.processData()  #process the given data

    visibleText = parser.getProcessTexts(
    )  # get all the visibletext in the document

    wordList = Tokenize(visibleText).extractToken()  #extra all the text
    wPost.addWord(
        wordList)  #add the word, word's frequency, word's position to Posting
    parser.updatePostingTagScore(wPost)  # update the tag score of the Posting

    # Dictionary class will extract information from the Posting as well as update term id for Posting
    wDict.extractAndUpdatePosting(wPost)

    totalWordCount = wPost.counter  # the total Number of word found in document
    numOfUniqueTerm = wPost.uniqueTerm  # total number of unique term found in document

    wDict.updateDictionaryDB()  # update the dictionary Table
    docID = Location(
        location, urlLink, numOfUniqueTerm, parser.getTitleTagContent(),
        totalWordCount).insertToDatabase()  #insert data into location table
    wPost.insertDataToDatabase(
        docID)  # insert new information in Posting Table

    # use to show indexing process
    myCounter.increment()
    myCounter.printPercentforEveryHundred()