retcode = subprocess.call(["python", "./pdfImport.py", pdf, pdf + ".txt"]) if (retcode != 0): print("Error while pasing PDF file!") exit(1) end_time = time.time() print("Parsing PDF took {0:.3f}".format(end_time - start_time), "seconds") except OSError: print("Error while trying to parse pdf file!") exit(1) #Getting words from a txt file print("\nStarted parsing TXT, wait for a while...") start_time = time.time() text = parse.getlist(input_file, enableComments = False) end_time = time.time() print("Parsing TXT took {0:.3f}".format(end_time - start_time), "seconds") #Getting words for deleting if (garbage_file == ''): garbage = set() pass else: #print("\nStarted parsing TXT with garbage, wait for a while...") start_time = time.time() garbage = parse.getlist(garbage_file, enableComments = True) end_time = time.time() print("\nParsing TXT with garbage took {0:.3f}".format(end_time - start_time), "seconds") garbage = set(garbage)
filename = pdf[:pdf.find(".pdf")] txt = filename + ".txt" try: retcode = subprocess.call(["/usr/bin/python", "./pdfImport.py", pdf, txt]) if (retcode != 0): exit() except OSError: print("Error!") exit() else: txt = pdf try: words = parse.getlist(txt, enableComments = False) rubbish = parse.getlist(sys.argv[2], enableComments = True) except IndexError: print("Error in arguments!") exit() rubbish = set(rubbish) word = [] number = [] start_time = time.time() for i in range(len(words)): if (words[i] not in rubbish) and (words[i] not in word): word.append(words[i]) number.append(words.count(words[i]))