def test(self, inputs, label, timestep): '''Test model. param timestep: split into segments of length timestep. ''' for embed in self.embeds: embed.eval() input = Variable(inputs[0].cuda(), requires_grad=False) label = Variable(label.cuda(), requires_grad=False) length = input.size(1) # Split video into segments input, start_indices = utils.get_segments(input, timestep) inputs = [input] logits, _ = self._forward(inputs) logits = utils.to_numpy(logits).squeeze(0) all_logits = [[] for i in range(length)] for i in range(len(start_indices)): s = start_indices[i] for j in range(timestep): all_logits[s + j].append(logits[i][j]) # Average logits for each time step. final_logits = np.zeros((length, self.n_classes + 1)) for i in range(length): final_logits[i] = np.mean(all_logits[i], axis=0) logits = final_logits info_acc = self._get_acc([torch.Tensor(logits)], label) scores = utils.softmax(logits, axis=1) return OrderedDict(info_acc), logits, scores
def perform_vad(wav_file, segs_file, speech_label, **kwargs): """Perform VAD for WAV file. If an exception is raised during processing, it returns the exception as well as the full traceback. Otherwise, returns ``None``. Parameters ---------- wav_file : str Path to WAV file to perform VAD for. segs_file : str Path to output segments file. speech_label : str Label for speech segments. kwargs Keyword arguments to pass to ``utils.vad``. """ try: data, fs = librosa.load(wav_file, sr=None) vad_info = utils.vad(data, fs, **kwargs) segments = utils.get_segments(vad_info, fs) utils.write_segments(segs_file, segments, label=speech_label) return None except Exception as e: tb = traceback.format_exc() return e, tb
def main_vad(wav_dir ,mode, hop): if not os.path.exists(wav_dir): raise RuntimeError("cannot locate the original dictionary !") wav_files = [os.path.join(wav_dir,line) for line in os.listdir(wav_dir) ] for wav in wav_files: if wav.endswith('.wav'): data, fs = load(wav, sr=16000) vad_info = utils.vad(data, fs, fs_vad = 16000, hoplength = hop, vad_mode=mode) segments = utils.get_segments(vad_info,fs) output_file = open(wav.replace('.wav','.sad' ),'w') for i in range(segments.shape[0]): start_time = segments[i][0] end_time = segments[i][1] output_file.write( "%.3f %.3f \n" %(start_time, end_time) )
def preprocess(file, BATCH_SIZE, max_length, tokenizer): train_dataset = [] input_vocab_size = len(tokenizer.vocab) f = open(file, 'r') words = f.read() words = words.replace('\n\n', '.') words = words.replace('\n', ' ') words = re.split('[;:.!?]', words) i = 0 for _ in range(len(words) // BATCH_SIZE + 1): if i + 1 >= len(words): break input_ids_list = [] segment_list = [] is_masked_list = [] is_next_list = [] for j in range(BATCH_SIZE): if i + 1 >= len(words): break now = int( random.random() > 0.5 ) # decide if the 2nd sentence has to be next sentence or not if now == 1: res = ["[CLS]"] + tokenizer.tokenize(words[i]) + [ "[SEP]" ] + tokenizer.tokenize(words[i + 1]) + ["[SEP]"] else: res = ["[CLS]"] + tokenizer.tokenize( words[i]) + ["[SEP]"] + tokenizer.tokenize( words[random.randint(0, len(words) - 1)]) + ["[SEP]"] input_ids = get_ids(res, tokenizer, max_length) segment_list.append(get_segments(res, max_length)) is_next_list.append(now) is_masked = [0] * max_length for ind in range(max_length): if input_ids[ind] == 0: # is padding token appears, then break break if input_ids[ind] == 101 or input_ids[ ind] == 102: # don't mask [CLS] and [SEP] tokens continue if random.random() < 0.15: # mask 15% of tokens is_masked[ind] = input_ids[ind] if random.random() < 0.8: # out of 15%, mask 80% input_ids[ind] = 103 elif random.random( ) < 0.5: # replace 10% with random token input_ids[ind] = random.randint(1000, input_vocab_size) #in the remaining tokens, keep the same token input_ids_list.append(input_ids) is_masked_list.append(is_masked) if now == 1: i += 2 else: i += 1 input_ids_list = np.array(input_ids_list) is_masked_list = np.array(is_masked_list) masks = create_padding_mask(input_ids_list) segment_list = np.array(segment_list) is_next_list = np.array(is_next_list) is_next_list = np.reshape(is_next_list, (len(is_next_list), 1)) train_dataset.append([ input_ids_list, segment_list, masks, is_next_list, is_masked_list ]) return train_dataset
FS = 300 LENGTH = 9000 LR = 1e-3 RESAMP = False # try: segments = np.load('../data/segment.npy') except: signals, labels = get_ecg(PATH, length=LENGTH) segments = np.zeros((245990, 1001)) k = 0 for i, record in enumerate(signals): rp = qrs_detection(record, sample_rate=FS) seg = get_segments(record, rp, labels[i]) if seg is not None: segments[k:k + seg.shape[0], :] = seg k += seg.shape[0] del signals, labels np.save('./data/segment.npy', segments) X, y = segments[:, :-1], segments[:, -1][:, np.newaxis] del segments train, test = build_dataloader(X, y, resamp=RESAMP, batch_size=BATCH_SIZE) del X, y net = cnn_feed_lstm() try:
help='path of a text file with a word list') parser.add_argument('-c', '--config', help='path of config file') parser.add_argument('-f', '--frequency', action='store_true', help='display frequency of segments') parser.add_argument( '-e', '--check-inventory', action='store_true', help='display if the segment belongs to the segment inventory or not') parser.add_argument('--version', action='version', version='%(prog)s 1.0') args = parser.parse_args() configPath = '../config.cfg' if args.config == None else args.config config = configparser.ConfigParser() config.read_file(open(configPath)) lst_charGroups = config.get('SETTINGS', 'complexCharList').split() lst_phonInventory = config.get('SETTINGS', 'validSegmentList').split() # Run command lst_wordList = utils.get_wordlist_from_raw_text(args.input_file) utils.get_segments(lst_wordList=lst_wordList, lst_charGroups=lst_charGroups, lst_phonInventory=lst_phonInventory, display_frequencyColumn=args.frequency, display_isInInventoryColumn=args.check_inventory)
def _valid_file(filename, threshold=.1): try: return utils.get_segments(filename)[-1] > threshold except: return False
def new_dream(audio_memory): #import matplotlib.pyplot as plt #plt.ion() try: print 'Dreaming - removing wrongly binned filenames' mega_filenames_and_indexes = [] for audio_id, audio_segments in audio_memory.audio_ids.iteritems(): NAP_detail = 'low' filenames_and_indexes = [] for audio_segment in audio_segments: segstart, segend = audio_segment.segment_idxs audio_times = utils.get_segments(audio_segments.wav_file) norm_segstart = segstart/audio_times[-1] norm_segend = segend/audio_times[-1] filenames_and_indexes.append([ soundfile, norm_segstart, norm_segend, audio_id, NAP_detail ]) mega_filenames_and_indexes.extend(filenames_and_indexes) k = 2 print 'Examining audio_id {}'.format(audio_id) if len(audio_segments) == 1: print 'Just one member in this audio_id, skipping analysis' continue sparse_codes = mysai.experiment(filenames_and_indexes, k) # plt.matshow(sparse_codes, aspect='auto') # plt.colorbar() # plt.draw() coarse = np.mean(sparse_codes, axis=1) coarse.shape = (len(coarse), 1) codebook,_ = kmeans(coarse, k) instances = [ vq(np.atleast_2d(s), codebook)[0] for s in coarse ] freqs = itemfreq(instances) sorted_freqs = sorted(freqs, key=lambda x: x[1]) print 'Average sparse codes: {} Class count: {}'.format(list(itertools.chain.from_iterable(coarse)), sorted_freqs) if len(sorted_freqs) == 1: print 'Considered to be all the same.' continue fewest_class = sorted_freqs[0][0] ousted_audio_segments = [ audio_segment for audio_segment, i in zip(audio_segments, instances) if i == fewest_class ] print 'Class {} has fewest members, deleting audio_segments {}'.format(fewest_class, ousted_audio_segments) filter(audio_memory.forget, ousted_audio_segments) print 'Creating mega super self-organized class' for row in mega_filenames_and_indexes: row[-1] = 'high' high_resolution_k = 256 clusters = 24 sparse_codes = mysai.experiment(mega_filenames_and_indexes, high_resolution_k) sparse_codes = np.array(sparse_codes) # plt.matshow(sparse_codes, aspect='auto') # plt.colorbar() # plt.draw() codebook,_ = kmeans(sparse_codes, clusters) instances = [ vq(np.atleast_2d(s), codebook)[0] for s in sparse_codes ] cluster_list = {} for mega, instance in zip(mega_filenames_and_indexes, instances): soundfile,_,_,audio_id,_ = mega cluster_list[(soundfile, audio_id)] = instance print cluster_list except: utils.print_exception('NIGHTMARE!')
def new_learn_audio(host, debug=False): context = zmq.Context() mic = context.socket(zmq.SUB) mic.connect('tcp://{}:{}'.format(host, IO.MIC)) mic.setsockopt(zmq.SUBSCRIBE, b'') dreamQ = context.socket(zmq.PUSH) dreamQ.connect('tcp://{}:{}'.format(host, IO.DREAM)) stateQ, eventQ, brainQ = _three_amigos(context, host) sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(host, IO.EXTERNAL)) counterQ = context.socket(zmq.REQ) counterQ.connect('tcp://{}:{}'.format(host, IO.COUNTER)) poller = zmq.Poller() poller.register(mic, zmq.POLLIN) poller.register(stateQ, zmq.POLLIN) poller.register(eventQ, zmq.POLLIN) audio = deque() NAPs = [] wavs = [] wav_audio_ids = {} NAP_hashes = {} audio_classifier = [] audio_recognizer = [] global_audio_recognizer = [] mixture_audio_recognizer = [] maxlen = [] deleted_ids = [] state = stateQ.recv_json() black_list = open('black_list.txt', 'a') audio_memory = AudioMemory() if debug: import matplotlib.pyplot as plt plt.ion() while True: events = dict(poller.poll()) if stateQ in events: state = stateQ.recv_json() if mic in events: new_audio = utils.recv_array(mic) if state['record']: audio.append(new_audio) if eventQ in events: pushbutton = eventQ.recv_json() if 'learn' in pushbutton: try: t0 = time.time() filename = pushbutton['filename'] audio_segments = utils.get_segments(filename) print 'Learning {} duration {} seconds with {} segments'.format(filename, audio_segments[-1], len(audio_segments)-1) new_sentence = utils.csv_to_array(filename + 'cochlear') norm_segments = np.rint(new_sentence.shape[0]*audio_segments/audio_segments[-1]).astype('int') audio_ids = [] new_audio_hash = [] amps = utils.get_amps(filename) most_significant_value = -np.inf most_significant_audio_id = [] original_NAP_length = len(NAPs) for segment, new_sound in enumerate([ utils.trim_right(new_sentence[norm_segments[i]:norm_segments[i+1]]) for i in range(len(norm_segments)-1) ]): # We filter out short, abrupt sounds with lots of noise. if np.mean(new_sound) < 2 or new_sound.shape[0] == 0: black_list.write('{} {}\n'.format(filename, segment)) print 'BLACKLISTED segment {} in file {}'.format(segment, filename) continue if debug: utils.plot_NAP_and_energy(new_sound, plt) audio_id = audio_memory.learn(new_sound, filename, [ audio_segments[segment], audio_segments[segment+1] ]) # START LEGACY try: wavs[audio_id].append(filename) except: wavs.append([filename]) wav_audio_ids[(filename, audio_id)] = [ audio_segments[segment], audio_segments[segment+1] ] # END LEGACY audio_ids.append(audio_id) if amps[segment] > most_significant_value: most_significant_audio_id = audio_id most_significant_value = amps[segment] black_list.flush() print 'AUDIO IDs after blacklisting {}'. format(audio_ids) if len(audio_ids): # while len(NAPs) - len(deleted_ids) > AUDIO_MEMORY_SIZE: # utils.delete_loner(counterQ, NAPs, 'audio_ids_counter', int(AUDIO_MEMORY_SIZE*PROTECT_PERCENTAGE), deleted_ids) # maxlen = max([ m.shape[0] for memory in NAPs for m in memory if len(m) ]) # memories = [ np.ndarray.flatten(utils.zero_pad(m, maxlen)) for memory in NAPs for m in memory if len(m) ] # targets = [ i for i,f in enumerate(NAPs) for k in f if len(k) ] # audio_classifier = train_rPCA_SVM(memories, targets) # all_hammings = [ utils.hamming_distance(new_audio_hash[i], new_audio_hash[j]) # for i in range(len(new_audio_hash)) for j in range(len(new_audio_hash)) if i > j ] # print 'RHYME VALUE', np.mean(sorted(all_hammings)[int(len(all_hammings)/2):]) # rhyme = np.mean(sorted(all_hammings)[int(len(all_hammings)/2):]) < RHYME_HAMMERTIME # sender.send_json('rhyme {}'.format(rhyme)) brainQ.send_pyobj(['audio_learn', filename, audio_ids, audio_memory, most_significant_audio_id, wavs, wav_audio_ids]) print 'Audio learned from {} in {} seconds'.format(filename, time.time() - t0) else: print 'SKIPPING fully blacklisted file {}'.format(filename) except: utils.print_exception('Audio learning aborted.') audio.clear() if 'dream' in pushbutton: new_dream(audio_memory) if 'save' in pushbutton: utils.save('{}.{}'.format(pushbutton['save'], mp.current_process().name), [ deleted_ids, NAPs, wavs, wav_audio_ids, NAP_hashes, audio_classifier, maxlen, audio_memory ]) if 'load' in pushbutton: deleted_ids, NAPs, wavs, wav_audio_ids, NAP_hashes, audio_classifier, maxlen, audio_memory = utils.load('{}.{}'.format(pushbutton['load'], mp.current_process().name))
def new_respond(control_host, learn_host, debug=False): context = zmq.Context() eventQ = context.socket(zmq.SUB) eventQ.connect('tcp://{}:{}'.format(control_host, IO.EVENT)) eventQ.setsockopt(zmq.SUBSCRIBE, b'') projector = context.socket(zmq.PUSH) projector.connect('tcp://{}:{}'.format(control_host, IO.PROJECTOR)) sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(control_host, IO.EXTERNAL)) brainQ = context.socket(zmq.PULL) brainQ.bind('tcp://*:{}'.format(IO.BRAIN)) counterQ = context.socket(zmq.REQ) counterQ.connect('tcp://{}:{}'.format(control_host, IO.COUNTER)) cognitionQ = context.socket(zmq.PUSH) cognitionQ.connect('tcp://{}:{}'.format(control_host, IO.COGNITION)) association = context.socket(zmq.REQ) association.connect('tcp://{}:{}'.format(learn_host, IO.ASSOCIATION)) snapshot = context.socket(zmq.REQ) snapshot.connect('tcp://{}:{}'.format(control_host, IO.SNAPSHOT)) scheduler = context.socket(zmq.PUSH) scheduler.connect('tcp://{}:{}'.format(control_host, IO.SCHEDULER)) dreamQ = context.socket(zmq.PULL) dreamQ.bind('tcp://*:{}'.format(IO.DREAM)) snapshot.send_json('Give me state!') state = snapshot.recv_json() poller = zmq.Poller() poller.register(eventQ, zmq.POLLIN) poller.register(brainQ, zmq.POLLIN) poller.register(dreamQ, zmq.POLLIN) sound_to_face = [] wordFace = {} face_to_sound = [] faceWord = {} register = {} video_producer = {} voiceType1 = 1 voiceType2 = 6 wordSpace1 = 0.3 wordSpaceDev1 = 0.3 wordSpace2 = 0.1 wordSpaceDev2 = 0.3 audio_ids = [] wavs = [] wav_audio_ids = [] NAP_hashes = {} most_significant_audio_id = [] if debug: import matplotlib.pyplot as plt plt.ion() while True: events = dict(poller.poll()) if brainQ in events: cells = brainQ.recv_pyobj() mode = cells[0] wav_file = cells[1] if wav_file not in register: register[wav_file] = [False, False, False] if mode == 'audio_learn': register[wav_file][0] = cells if mode == 'video_learn': register[wav_file][1] = cells if mode == 'face_learn': register[wav_file][2] = cells if all(register[wav_file]): _, _, audio_ids, audio_memory, most_significant_audio_id, wavs, wav_audio_ids = register[wav_file][0] _, _, tarantino = register[wav_file][1] _, _, face_id, face_recognizer = register[wav_file][2] print 'Audio - video - face recognizers related to {} arrived at responder, total processing time {} seconds'.format(wav_file, time.time() - utils.filetime(wav_file)) for audio_id in audio_ids: # If audio_ids is empty, none of this will happen video_producer[(audio_id, face_id)] = tarantino if audio_id < len(sound_to_face) and not face_id in sound_to_face[audio_id]: # sound heard before, but not said by this face sound_to_face[audio_id].append(face_id) if audio_id == len(sound_to_face): sound_to_face.append([face_id]) wordFace.setdefault(audio_id, [[face_id,0]]) found = 0 for item in wordFace[audio_id]: if item[0] == face_id: item[1] += 1 found = 1 if found == 0: wordFace[audio_id].append([face_id,1]) # We can't go from a not known face to any of the sounds, that's just the way it is. print 'face_id for audio segment learned', face_id if face_id is not -1: if face_id < len(face_to_sound) and not audio_id in face_to_sound[face_id]: #face seen before, but the sound is new face_to_sound[face_id].append(audio_id) if face_id == len(face_to_sound): face_to_sound.append([audio_id]) faceWord.setdefault(face_id, [[audio_id,0]]) found = 0 for item in faceWord[face_id]: if item[0] == audio_id: item[1] += 1 found = 1 if found == 0: faceWord[face_id].append([audio_id,1]) del register[wav_file] similar_ids = [] for audio_id in audio_ids: # I SUSPECT THIS IS WRONG, SINCE THERE IS NO SORTING OF THESE HAMMING DISTANCES IN ASSOCIATION.PY new_audio_hash = audio_memory.audio_ids[audio_id][-1].crude_hash similar_ids_for_this_audio_id = [ utils.hamming_distance(new_audio_hash, random.choice(h).crude_hash) for h in audio_memory.audio_ids.itervalues() ] similar_ids.append(similar_ids_for_this_audio_id) if len(audio_ids): association.send_pyobj(['analyze',wav_file,wav_audio_ids,audio_ids,wavs,similar_ids,wordFace,faceWord]) association.recv_pyobj() sender.send_json('last_most_significant_audio_id {}'.format(most_significant_audio_id)) cognitionQ.send_pyobj(face_recognizer) # A possiblity of recognizing a face that is not connecting to any soundfiles if eventQ in events: pushbutton = eventQ.recv_json() if 'respond_single' in pushbutton: try: filename = pushbutton['filename'] audio_segments = utils.get_segments(filename) print 'Single response to {} duration {} seconds with {} segments'.format(filename, audio_segments[-1], len(audio_segments)-1) new_sentence = utils.csv_to_array(filename + 'cochlear') norm_segments = np.rint(new_sentence.shape[0]*audio_segments/audio_segments[-1]).astype('int') segment_id = utils.get_most_significant_word(filename) NAP = utils.trim_right(new_sentence[norm_segments[segment_id]:norm_segments[segment_id+1]]) if debug: plt.imshow(NAP.T, aspect='auto') plt.draw() best_match,_,_,_,_ = audio_memory.find(NAP) soundfile = best_match.wav_file segstart, segend = best_match.segment_idxs voiceChannel = 1 speed = 1 amp = -3 # voice amplitude in dB _,dur,maxamp,_ = utils.getSoundInfo(soundfile) start = 0 voice1 = 'playfile {} {} {} {} {} {} {} {} {}'.format(1, voiceType1, start, soundfile, speed, segstart, segend, amp, maxamp) voice2 = '' print 'Recognized as sound {}'.format(best_match.audio_id) # sound_to_face, video_producer projection = _project(best_match.audio_id, sound_to_face, NAP, video_producer) scheduler.send_pyobj([[ dur, voice1, voice2, projection, FRAME_SIZE ]]) print 'Respond time from creation of wav file was {} seconds'.format(time.time() - utils.filetime(filename)) except: utils.print_exception('Single response aborted.') if 'play_sentence' in pushbutton: try: sentence = pushbutton['sentence'] sentence = eval(sentence) print '*** (play) Play sentence', sentence start = 0 nextTime1 = 0 play_events = [] for i in range(len(sentence)): word_id = sentence[i] soundfile = np.random.choice(wavs[word_id]) speed = 1 segstart, segend = wav_audio_ids[(soundfile, word_id)] NAP = _extract_NAP(segstart, segend, soundfile) amp = -3 # voice amplitude in dB _,totaldur,maxamp,_ = utils.getSoundInfo(soundfile) dur = segend-segstart if dur <= 0: dur = totaldur # play in both voices voice1 = 'playfile {} {} {} {} {} {} {} {} {}'.format(1, voiceType1, start, soundfile, speed, segstart, segend, amp, maxamp) voice2 = 'playfile {} {} {} {} {} {} {} {} {}'.format(2, voiceType1, start, soundfile, speed, segstart, segend, amp, maxamp) wordSpacing1 = wordSpace1 + np.random.random()*wordSpaceDev1 print 'PLAY RESPOND SPACING', wordSpacing1 nextTime1 += (dur/speed)+wordSpacing1 projection = _project(audio_id, sound_to_face, NAP, video_producer) play_events.append([ dur+wordSpacing1, voice1, voice2, projection, FRAME_SIZE ]) scheduler.send_pyobj(play_events) except: utils.print_exception('Sentence play aborted.') if 'respond_sentence' in pushbutton: print 'SENTENCE Respond to', pushbutton['filename'][-12:] try: filename = pushbutton['filename'] audio_segments = utils.get_segments(filename) print 'Sentence response to {} duration {} seconds with {} segments'.format(filename, audio_segments[-1], len(audio_segments)-1) new_sentence = utils.csv_to_array(filename + 'cochlear') norm_segments = np.rint(new_sentence.shape[0]*audio_segments/audio_segments[-1]).astype('int') segment_id = utils.get_most_significant_word(filename) print '**Sentence selected to respond to segment {}'.format(segment_id) NAP = utils.trim_right(new_sentence[norm_segments[segment_id]:norm_segments[segment_id+1]]) best_match,_,_,_,_ = audio_memory.find(NAP) audio_id = best_match.audio_id soundfile = best_match.wav_file numWords = len(audio_segments)-1 print numWords association.send_pyobj(['setParam', 'numWords', numWords ]) association.recv_pyobj() association.send_pyobj(['makeSentence', audio_id]) print 'respond_sentence waiting for association output...', sentence, secondaryStream = association.recv_pyobj() print '*** (respond) Play sentence', sentence, secondaryStream start = 0 nextTime1 = 0 nextTime2 = 0 enableVoice2 = 1 play_events = [] for i in range(len(sentence)): word_id = sentence[i] soundfile = np.random.choice(wavs[word_id]) voiceChannel = 1 speed = 1 # segment start and end within sound file, if zero, play whole file segstart, segend = wav_audio_ids[(soundfile, word_id)] NAP = _extract_NAP(segstart, segend, soundfile) amp = -3 # voice amplitude in dB #totaldur, maxamp = utils.getSoundParmFromFile(soundfile) _,totaldur,maxamp,_ = utils.getSoundInfo(soundfile) dur = segend-segstart if dur <= 0: dur = totaldur voice1 = 'playfile {} {} {} {} {} {} {} {} {}'.format(voiceChannel, voiceType1, start, soundfile, speed, segstart, segend, amp, maxamp) #start += dur # if we want to create a 'score section' for Csound, update start time to make segments into a contiguous sentence wordSpacing1 = wordSpace1 + np.random.random()*wordSpaceDev1 nextTime1 += (dur/speed)+wordSpacing1 #print 'voice 2 ready to play', secondaryStream[i], i if enableVoice2: word_id2 = secondaryStream[i] #print 'voice 2 playing', secondaryStream[i] soundfile2 = np.random.choice(wavs[word_id2]) voiceChannel2 = 2 start2 = 0.7 # set delay between voice 1 and 2 speed2 = 0.7 amp2 = -10 # voice amplitude in dB try: segstart2, segend2 = wav_audio_ids[(soundfile2, word_id2)] dur2 = segend2-segstart2 #totalDur2, maxamp2 = utils.getSoundParmFromFile(soundfile2) _,totalDur2,maxamp2,_ = utils.getSoundInfo(soundfile) if dur2 <= 0: dur2 = totalDur2 voice2 = 'playfile {} {} {} {} {} {} {} {} {}'.format(voiceChannel2, voiceType2, start2, soundfile2, speed2, segstart2, segend2, amp2, maxamp2) wordSpacing2 = wordSpace2 + np.random.random()*wordSpaceDev2 nextTime2 += (dur2/speed2)+wordSpacing2 except: voice2 = '' utils.print_exception('VOICE 2 tried to access an illegal soundfile/audio_id combination.') #enableVoice2 = 0 # trig another word in voice 2 only if word 2 has finished playing (and sync to start of voice 1) if nextTime1 > nextTime2: enableVoice2 = 1 projection = _project(audio_id, sound_to_face, NAP, video_producer) print 'SENTENCE RESPOND SPACING', wordSpacing1 play_events.append([ dur+wordSpacing1, voice1, voice2, projection, FRAME_SIZE ]) scheduler.send_pyobj(play_events) print 'Sentence respond time from creation of wav file was {} seconds'.format(time.time() - utils.filetime(filename)) except: utils.print_exception('Sentence response aborted.') if 'testSentence' in pushbutton: print 'testSentence', pushbutton association.send_pyobj(['makeSentence',int(pushbutton['testSentence'])]) print 'testSentence waiting for association output...' sentence, secondaryStream = association.recv_pyobj() print '*** Test sentence', sentence, secondaryStream if 'assoc_setParam' in pushbutton: try: parm, value = pushbutton['assoc_setParam'].split() association.send_pyobj(['setParam', parm, value ]) association.recv_pyobj() except: utils.print_exception('Assoc set param aborted.') if 'respond_setParam' in pushbutton: items = pushbutton['respond_setParam'].split() if items[0] == 'voiceType': chan = items[1] if chan == '1': voiceType1 = int(items[2]) if chan == '2': voiceType2 = int(items[2]) if items[0] == 'wordSpace': chan = items[1] print 'wordSpace chan', chan, items if chan == '1': wordSpace1 = float(items[2]) if chan == '2': wordSpace2 = float(items[2]) if items[0] == 'wordSpaceDev': chan = items[1] print 'wordSpaceDev1 chan', chan, items if chan == '1': wordSpaceDev1 = float(items[2]) if chan == '2': wordSpaceDev2 = float(items[2]) if 'play_id' in pushbutton: try: items = pushbutton['play_id'].split(' ') if len(items) < 3: print 'PARAMETER ERROR: play_id audio_id voiceChannel voiceType' play_audio_id = int(items[0]) voiceChannel = int(items[1]) voiceType = int(items[2]) print 'play_audio_id', play_audio_id, 'voice', voiceChannel print 'wavs[play_audio_id]', wavs[play_audio_id] #print wavs soundfile = np.random.choice(wavs[play_audio_id]) speed = 1 #print 'wav_audio_ids', wav_audio_ids segstart, segend = wav_audio_ids[(soundfile, play_audio_id)] #segstart = 0 # segment start and end within sound file #segend = 0 # if zero, play whole file amp = -3 # voice amplitude in dB #dur, maxamp = utils.getSoundParmFromFile(soundfile) _,dur,maxamp,_ = utils.getSoundInfo(soundfile) start = 0 sender.send_json('playfile {} {} {} {} {} {} {} {} {}'.format(voiceChannel, voiceType, start, soundfile, speed, segstart, segend, amp, maxamp)) except: utils.print_exception('play_id aborted.') if 'print_me' in pushbutton: # just for inspecting the contents of objects while running print 'printing '+pushbutton['print_me'] if 'brain ' in pushbutton['print_me']: print_variable = pushbutton['print_me'].split('brain ')[-1] try: print eval(print_variable) except Exception, e: print e, 'print_me in brain failed.' elif 'association ' in pushbutton['print_me']: print_variable = pushbutton['print_me'].split('association ')[-1] association.send_pyobj(['print_me',print_variable]) if 'dream' in pushbutton: play_events = [] for audio_segment in audio_memory.all_segments(): segstart, segend = audio_segment.segment_idxs dur = segend - segstart NAP = _extract_NAP(segstart, segend, audio_segment.wav_file) speed = 1 amp = -3 maxamp = 1 start = 0 voice1 = 'playfile {} {} {} {} {} {} {} {} {}'.format(1, 6, np.random.rand()/3, audio_segment.wav_file, speed, segstart, segend, amp, maxamp) projection = _project(audio_segment.audio_id, sound_to_face, NAP, video_producer) voice2 = 'playfile {} {} {} {} {} {} {} {} {}'.format(2, 6, np.random.randint(3,6), audio_segment.wav_file, speed, segstart, segend, amp, maxamp) play_events.append([ dur, voice1, voice2, projection, FRAME_SIZE ]) print 'Dream mode playing back {} memories'.format(len(play_events)) scheduler.send_pyobj(play_events) if 'save' in pushbutton: utils.save('{}.{}'.format(pushbutton['save'], mp.current_process().name), [ sound_to_face, wordFace, face_to_sound, faceWord, video_producer, wavs, wav_audio_ids, audio_classifier, maxlen, NAP_hashes, face_id, face_recognizer, audio_memory ]) if 'load' in pushbutton: sound_to_face, wordFace, face_to_sound, faceWord, video_producer, wavs, wav_audio_ids, audio_classifier, maxlen, NAP_hashes, face_id, face_recognizer, audio_memory = utils.load('{}.{}'.format(pushbutton['load'], mp.current_process().name))