Пример #1
0
import Dictionary
import FSBuilder
import AccEval
import glob

DataDirectory = "data/"

debug = True

FSCheck = True

if debug:
    print("Verbose feedback enabled\n")

Lexicon = Dictionary.BuildLexicon(debug)

if FSCheck:
    FSSub = FSBuilder.BuildFS(Lexicon,debug)
else:
    FSSub = set()

report = list()

if debug:
    print("Building file list from " + DataDirectory)

TextList = glob.glob(DataDirectory + "*.txt")

print("Preparing score report for " + str(len(TextList) ) + " items in " + DataDirectory + "\n")
Пример #2
0
else:
    startindex = 0

HTIDfile = metadatapath + "htids.txt"
with open(HTIDfile, encoding="utf-8") as file:
    HTIDlist = file.readlines()

if startindex >= len(HTIDlist):
    print("Finished processing the whole list of volume IDs.")
    quit()
elif startindex + batchcount > len(HTIDlist):
    endindex = len(HTIDlist)
else:
    endindex = startindex + batchcount

Lexicon = Dictionary.BuildLexicon(dictionarypath, debug)

writename = 'typeindex.txt'

delim = '\t'

if startindex == 0:
    BigIndex = dict()
else:
    BigIndex = TypeIndex.ReadIndex(writename, delim, debug)

SortedIndex = list()

for index in range(startindex, endindex):
    IDtoprocess = HTIDlist[index].strip()
    filepath, postfix = FileCabinet.pairtreepath(IDtoprocess, datapath)
Пример #3
0
import Dictionary
import ReadText
import hyphenread1
import hyphenread2
import FSBuilder

Lexicon = Dictionary.BuildLexicon(True)

FSSub = FSBuilder.BuildFS(Lexicon, True)

## Text = ReadText.CleanText("AustBCR.txt", True)
Text = hyphenread1.CleanText("AustBCR.txt", Lexicon, True)

Match = 0

Sub = 0

Miss = set()

for word in Text:
    if word in Lexicon:
        Match = Match + 1
        continue
    elif word in FSSub:
        Match = Match + 1
        Sub = Sub + 1
        continue

print("Base Matches: " + str(Match - Sub))
print("Sub. Matches: " + str(Sub))
print("Total Matches: " + str(Match) + "\n")