Пример #1
0
		retcode = subprocess.call(["python", "./pdfImport.py", pdf, pdf + ".txt"])
		if (retcode != 0):
			print("Error while pasing PDF file!")
			exit(1)

		end_time = time.time()
		print("Parsing PDF took {0:.3f}".format(end_time - start_time), "seconds")

	except OSError:
		print("Error while trying to parse pdf file!")
		exit(1)

#Getting words from a txt file
print("\nStarted parsing TXT, wait for a while...")
start_time = time.time()
text = parse.getlist(input_file, enableComments = False)
end_time = time.time()
print("Parsing TXT took {0:.3f}".format(end_time - start_time), "seconds")

#Getting words for deleting
if (garbage_file == ''):
	garbage = set()
	pass
else:
	#print("\nStarted parsing TXT with garbage, wait for a while...")
	start_time = time.time()
	garbage = parse.getlist(garbage_file, enableComments = True)
	end_time = time.time()
	print("\nParsing TXT with garbage took {0:.3f}".format(end_time - start_time), "seconds")
	garbage = set(garbage)
Пример #2
0
    filename = pdf[:pdf.find(".pdf")]
    txt = filename + ".txt"
    try:
        retcode = subprocess.call(["/usr/bin/python", "./pdfImport.py", pdf, txt])
        if (retcode != 0):
            exit()
    except OSError:
        print("Error!")
        exit()
else:
    txt = pdf



try:
    words = parse.getlist(txt, enableComments = False)
    rubbish = parse.getlist(sys.argv[2], enableComments = True)
except IndexError:
    print("Error in arguments!")
    exit()

rubbish = set(rubbish)

word = []
number = []

start_time = time.time()
for i in range(len(words)):
    if (words[i] not in rubbish) and (words[i] not in word):
        word.append(words[i])
        number.append(words.count(words[i]))