def getQuotes(): if not os.path.exists('quotes.pkl'): quotes = generateQuotes() quotes = cleanUp(quotes) with open('quotes.pkl', 'w') as f: pickle.dump(quotes, f) else: with open('quotes.pkl', 'r') as f: quotes = pickle.load(f) quotes = cleanUp(quotes) return quotes
def createSamples(text, n=1): for i in range(0, n): file = open(path + 'sample' + str(i) + '.txt', 'w+') amt = random.randint(1, 5000) text = text.replace('$', '$' + str(amt)) text = cleanUp(text) file.write(text)
def commitCommand(): '''(None) -> None Checks if localRepoPath is a valid path. If it is, syncs the local repository, copies all the extracted XMLs from Changed_Systems into the local repository, and pushes changes to the remote. Changes the last update date to current date ''' localRepoPath = repoTools.getLocalRepo() if (os.path.isdir(localRepoPath) == False ): #file is not right, print message and pass if case print( "Problem in path. Please set the path of local repository using 'repo' command" ) else: #pull from remote to make sure local is up to date git.pull_repo(localRepoPath) #print clean up report in a text file stdout = sys.stdout #keep a handle on the real standard output if (os.path.isfile('cleanUpReport.txt')): with open('cleanUpReport.txt', 'w') as f: f.write("") sys.stdout = open('cleanUpReport.txt', 'w', encoding='utf-8') try: cleanUp.cleanUp() except: print("Fail to run clean up script") #reset stdout sys.stdout = stdout #go through changed_systems and copy files to local repo source_dir = os.path.join(os.getcwd(), "Changed_Systems") print("Copying files...") for filename in glob.glob(os.path.join(source_dir, '*.*')): try: shutil.copy(filename, localRepoPath) except: print("Couldn't copy " + filename + " to " + localRepoPath + ". Please close all files and try again") #push changes from local to remote git.push_all(localRepoPath) #change updated date today = str(datetime.date.today()) fname = "last_commit_date.txt" with open(fname, 'w') as f: f.write(today) print("Changes successfully made. Last updated date is now " + today)
def inputSound(): # Initialize PyAudio pyaud = pyaudio.PyAudio() global recording recording = True # Open input stream, 16-bit mono at 44100 Hz # On my system, device 2 is a USB microphone, your number may differ. stream = pyaud.open( format = pyaudio.paInt16, channels = 1, rate = 44100, input_device_index = 1, input = True) times = [] end = [] notes = cleanup.notes global notesSung notesSung = [] print "*listening*" start = time.time() # while not cleanup.silentEnd(notesSung): while not cleanup.ended(notesSung): # Read raw microphone data rawsamps = stream.read(1024) # print(type(rawsamps), rawsamps) # Convert raw data to NumPy array samps = numpy.fromstring(rawsamps, dtype=numpy.int16) # print(type(samps), samps) # Show the volume and pitch # analyse.loudness(samps), analyse.musical_detect_pitch(samps) freq = analyse.musical_detect_pitch(samps) notesSung.append(cleanup.getNote(freq, notes)) #print cleanup.getNote(freq, notes) print "*Done*" recording = False # print notesSung cleanup.cleanUp(notesSung) # print notesSung # print cleanup.removeRepeats(notesSung) return notesSung
def inputSound(): # Initialize PyAudio pyaud = pyaudio.PyAudio() # Open input stream, 16-bit mono at 44100 Hz # On my system, device 2 is a USB microphone, your number may differ. stream = pyaud.open( format = pyaudio.paInt16, channels = 1, rate = 44100, input_device_index = 1, input = True) times = [] end = [] notes = cleanup.notes notesSung = [] #so that there is an established pause in any beginning print "*listening*" start = time.time() fun = [] while cleanup.silentEnd(notesSung): times.append(time.time()-start) fun.append("*") # Read raw microphone data rawsamps = stream.read(1024) # print(type(rawsamps), rawsamps) # Convert raw data to NumPy array samps = numpy.fromstring(rawsamps, dtype=numpy.int16) # print(type(samps), samps) # Show the volume and pitch # analyse.loudness(samps), analyse.musical_detect_pitch(samps) freq = analyse.musical_detect_pitch(samps) notesSung.append(cleanup.getNote(freq, notes)) # print cleanup.getNote(freq, notes) print "*Done*" print times leng = len(notesSung) cleanup.cleanUp(notesSung) print notesSung print len(times), leng, len(notesSung), len(fun) print print cleanup.removeRepeats(notesSung) print notesSung
def inputSound(): # Initialize PyAudio pyaud = pyaudio.PyAudio() # Open input stream, 16-bit mono at 44100 Hz # On my system, device 2 is a USB microphone, your number may differ. stream = pyaud.open(format=pyaudio.paInt16, channels=1, rate=44100, input_device_index=1, input=True) times = [] end = [] notes = cleanup.notes notesSung = [] #so that there is an established pause in any beginning print "*listening*" start = time.time() fun = [] while cleanup.silentEnd(notesSung): times.append(time.time() - start) fun.append("*") # Read raw microphone data rawsamps = stream.read(1024) # print(type(rawsamps), rawsamps) # Convert raw data to NumPy array samps = numpy.fromstring(rawsamps, dtype=numpy.int16) # print(type(samps), samps) # Show the volume and pitch # analyse.loudness(samps), analyse.musical_detect_pitch(samps) freq = analyse.musical_detect_pitch(samps) notesSung.append(cleanup.getNote(freq, notes)) # print cleanup.getNote(freq, notes) print "*Done*" print times leng = len(notesSung) cleanup.cleanUp(notesSung) print notesSung print len(times), leng, len(notesSung), len(fun) print print cleanup.removeRepeats(notesSung) print notesSung
def readFilesToText(): print('Accessing files from data/Docs...') directory = './data/Docs' read_path = './data/Read' unread_path = './data/Unreadable' save_path = './data/Docs_txt' unableToConvert = [] countpdf = 1 for file in os.listdir(directory): if (file not in os.listdir(read_path)): if ('.pdf' in file): countpdf += 1 data = parser.from_file(directory + '/' + file) text = data['content'] if text != None: text = cleanUp(text) if len(text) > 0: # classifyFilesByText(file, text, reciprocalfolder, # nonmutualndafolder, unclassified, r, m, n) textFile = open( ('./data/Docs_txt/' + file.partition('.')[0] + '.txt'), 'w') textFile.write(text) else: textFile.close() continue textFile.close() # moving to read folder shutil.copyfile(directory + '/' + file, read_path + '/' + file) else: # print('Moving ' + file + " to unreadable.") # this means that the data is a scanned image pdf so we can convert # into an image and use OCR to extract text print('Apache Tika returned None for file: ' + str(file)) # moving to unread folder shutil.copyfile(directory + '/' + file, unread_path + '/' + file) unableToConvert.append(file) print('\n\n The total number of PDFs were: ', countpdf) if (len(unableToConvert) == 0): print('All files converted to .txt') return print( str(len(unableToConvert)) + ' files were not converted to text. These files are: ' + str(unableToConvert)) print('Converting the files to JPEG and then trying OCR...') for file in os.listdir(unread_path): if '.pdf' in file: pdf_path = unread_path + '/' + file try: pages = convert_from_path(pdf_path) page_counter = 1 for page in pages: filename = unread_path+'/'+file.partition('.')[0] + \ "_page"+str(page_counter)+'.jpg' page.save(filename, 'JPEG') page_counter += 1 totalpages = page_counter - 1 txtfile = save_path + '/' + file.partition('.')[0] + '.txt' textFile = open(txtfile, 'a') for i in range(1, totalpages): filename = unread_path+'/' + \ file.partition('.')[0]+"_page"+str(i)+'.jpg' text = str( ((pytesseract.image_to_string(Image.open(filename))))) text = cleanUp(text) if len(text) > 0: # classifyFilesByText(file, text, reciprocalfolder, # nonmutualndafolder, unclassified, r, m, n) textFile.write(text) else: textFile.close() continue textFile.close() # moving to read folder shutil.move(unread_path + '/' + file, read_path + '/' + file) except (ValueError, PDFPageCountError): print('The pdf ' + file + ' could not be read') filelist = [f for f in os.listdir(unread_path) if f.endswith(".jpg")] for f in filelist: os.remove(unread_path + '/' + f) template_path = './data/Templates/' save_path = './data/Templates_txt/' for file in os.listdir(template_path): data = parser.from_file(template_path + file) text = data['content'] if text != None: if len(text) > 0: text = cleanUp(text) # classifyFilesByText(file, text, reciprocalfolder, # nonmutualndafolder, unclassified, r, m, n) textFile = open((save_path + file.partition('.')[0] + '.txt'), 'w') textFile.write(text) # moving to read folder shutil.copyfile(template_path + file, read_path + '/' + file)