def test_complex_query(acoustic_config): with CorpusContext(acoustic_config) as g: vowels = ['aa'] obstruents = ['k'] syllabics = ['aa', 'ih'] q = g.query_graph(g.phone).filter(g.phone.label.in_(syllabics)) q.set_type('syllabic') q = g.query_graph(g.phone).filter(g.phone.label.in_(vowels)) q = q.filter(g.phone.following.label.in_(obstruents)) #q = q.filter(g.phone.following.end == g.word.end) #q = q.filter(g.word.end == g.utterance.end) q = q.clear_columns().columns( g.phone.label.column_name('vowel'), g.phone.duration.column_name('vowel_duration'), g.phone.begin.column_name('vowel_begin'), g.phone.end.column_name('vowel_end'), g.utterance.phone.rate.column_name('phone_rate'), g.word.phone.count.column_name('num_segments_in_word'), g.word.phone.subset_type('syllabic').count.column_name( 'num_syllables_in_word'), g.word.discourse.column_name('discourse'), g.word.label.column_name('word'), g.word.transcription.column_name('word_transcription'), g.word.following.label.column_name('following_word'), g.word.following.duration.column_name('following_word_duration'), g.pause.following.duration.column_name('following_pause_duration'), g.phone.following.label.column_name('following_phone')) q.order_by(g.word.begin) print(q.cypher()) results = q.all() assert (len(results) == 2) assert (results[0].num_segments_in_word == 5) assert (results[0].num_syllables_in_word == 2)
def test_add_default_voicing_annotations(acoustic_config): with CorpusContext(acoustic_config) as c: stops = ('p', 't', 'k', 'b', 'd', 'g') q = c.query_graph(c.phone).filter(c.phone.label.in_(stops)) q = q.columns(c.phone.id) num = q.count() assert (num == 28) defaults = [('closure', 0, 0.5, { 'checked': False }), ('release', 0.5, 1, { 'checked': False })] add_default_annotations(c, 'phone', defaults, subset=stops) q = c.query_graph(c.phone).filter(c.phone.label.in_(stops)) for a in q.all(): assert (len(a.closure) == 1) assert (len(a.release) == 1) assert (all(not x.checked for x in a.closure)) assert (all(not x.checked for x in a.release)) assert (q.count() == 28) q = c.query_graph(c.phone).filter(c.phone.label.in_(stops)) q = q.preload(c.phone.closure, c.phone.release) assert (q.count() == 28) for a in q.all(): print([(x.begin, x.end, x._type) for x in a.closure]) print([(x.begin, x.end, x._type) for x in a.release]) assert (len(a.closure) == 1) assert (len(a.release) == 1)
def run(self): config = self.kwargs['config'] directory = self.kwargs['directory'] with CorpusContext(config) as c: update_sound_files(c, directory) all_found = c.has_all_sound_files() self.dataReady.emit(all_found)
def run_query(self): config = self.kwargs['config'] discourse = self.kwargs['discourse'] with CorpusContext(config) as c: audio_file = c.discourse_sound_file(discourse) if audio_file is not None: c.sql_session.expunge(audio_file) return audio_file
def updateConfig(self, config): self.config = config self.discourseList.clear() if self.config is None or self.config.corpus_name == '': return with CorpusContext(self.config) as c: for d in sorted(c.discourses): self.discourseList.addItem(d)
def updateConfig(self, config): self.config = config self.changingDiscourse.emit() self.discourseWidget.config = config if self.config is None: return with CorpusContext(self.config) as c: self.discourseWidget.hierarchy = c.hierarchy
def test_query_speaking_rate(acoustic_config): with CorpusContext(acoustic_config) as g: q = g.query_graph(g.word).filter(g.word.label == 'talking') q = q.columns( g.word.utterance.word.rate.column_name('words_per_second')) q = q.order_by(g.word.begin) print(q.cypher()) results = q.all() assert (abs(results[0].words_per_second - (26 / 6.482261)) < 0.001)
def test_encode_utterances(acoustic_config): with CorpusContext(acoustic_config) as g: g.encode_pauses(['sil', 'um']) g.encode_utterances(min_pause_length=0) q = g.query_graph(g.utterance).times().duration().order_by( g.utterance.begin) print(q.cypher()) results = q.all() print(results) expected_utterances = [(1.059223, 7.541484), (8.576511, 11.807666), (12.167356, 13.898228), (14.509726, 17.207370), (18.359807, 19.434003), (19.599747, 21.017242), (21.208318, 22.331874), (24.174348, 24.706663), (24.980290, 25.251656)] assert (len(results) == len(expected_utterances)) for i, r in enumerate(results): assert (round(r.begin, 3) == round(expected_utterances[i][0], 3)) assert (round(r.end, 3) == round(expected_utterances[i][1], 3)) assert (abs(results[0].duration - 6.482261) < 0.001) g.encode_pauses(['sil']) g.encode_utterances(min_pause_length=0) expected_utterances = [(1.059223, 7.541484), (8.016164, 11.807666), (12.167356, 13.898228), (14.509726, 17.207370), (18.359807, 19.434003), (19.599747, 21.017242), (21.208318, 22.331874), (22.865036, 23.554014), (24.174348, 24.706663), (24.980290, 25.251656)] q = g.query_graph(g.utterance).times().duration().order_by( g.utterance.begin) print(q.cypher()) results = q.all() assert (len(g.query_graph(g.pause).all()) == 11) assert (len(results) == len(expected_utterances)) for i, r in enumerate(results): assert (round(r.begin, 3) == round(expected_utterances[i][0], 3)) assert (round(r.end, 3) == round(expected_utterances[i][1], 3)) q = g.query_graph(g.utterance).order_by(g.utterance.begin) results = q.all() for i, r in enumerate(results): assert (round(r.begin, 3) == round(expected_utterances[i][0], 3)) assert (round(r.end, 3) == round(expected_utterances[i][1], 3)) assert (r.label is None) q = g.query_graph( g.phone).filter(g.phone.begin == g.phone.utterance.begin) q = q.order_by(g.phone.begin) results = q.all() assert (len(results) == len(expected_utterances)) expected = ['dh', 'ah', 'l', 'ah', 'ae', 'hh', 'w', 'ah', 'ae', 'th'] for i, r in enumerate(results): assert (r.label == expected[i])
def run(self): print('beginning pitch work') config = self.kwargs['config'] algorithm = self.kwargs['algorithm'] sound_file = self.kwargs['sound_file'] with CorpusContext(config) as c: pitch_list = get_pitch(c, sound_file, algorithm) pitch_list = np.array([[x.time, x.F0] for x in pitch_list]) self.dataReady.emit(pitch_list) print('finished pitch work')
def run_query(self): a_type = self.kwargs['annotation_type'] config = self.kwargs['config'] with CorpusContext(config) as c: a_type = getattr(c, a_type) query = c.query_graph(a_type) query = query.times().columns( a_type.discourse.column_name('discourse')) results = query.all() return query, results
def test_utterance_nosilence(graph_db, textgrid_test_dir): tg_path = os.path.join(textgrid_test_dir, 'phone_word_no_silence.TextGrid') with CorpusContext('word_phone_nosilence', **graph_db) as g: g.reset() parser = inspect_textgrid(tg_path) parser.annotation_types[0].linguistic_type = 'phone' parser.annotation_types[1].linguistic_type = 'word' parser.hierarchy['word'] = None parser.hierarchy['phone'] = 'word' g.load(parser, tg_path) g.encode_utterances() q = g.query_graph(g.word).filter(g.word.label == 'b') q = q.columns(g.word.following.label.column_name('following_word')) print(q.cypher()) results = q.all() assert (len(results) == 1) assert (results[0].following_word is None) q = g.query_graph( g.word).filter(g.word.begin == g.word.utterance.begin) results = q.all() assert (len(results) == 1) assert (results[0].label == 'a') q = g.query_graph( g.phone).filter(g.phone.begin == g.phone.utterance.begin) results = q.all() assert (len(results) == 1) assert (results[0].label == 'a') #Things like g.phone.word.following are currently broken in PolyglotDB return q = g.query_graph(g.phone).filter(g.phone.label == 'b') q = q.filter(g.phone.following.label == 'b') q = q.columns( g.phone.label, g.phone.id, g.phone.word.following.label.column_name('following_word')) print(q.cypher()) results = q.all() assert (len(results) == 1) assert (results[0].following_word is None)
def run_query(self): config = self.kwargs['config'] try: stops = gp_language_stops[config.corpus_name] except KeyError: print( 'Couldn\'t find corpus name in stops, defaulting to p, t, k, b, d, g' ) stops = ['p', 't', 'k', 'b', 'd', 'g'] with CorpusContext(config) as c: a_type = c.hierarchy.lowest w_type = c.hierarchy[a_type] utt_type = c.hierarchy.highest a_type = getattr(c, a_type) w_type = getattr(a_type, w_type) utt_type = getattr(a_type, utt_type) q = c.query_graph(a_type) q = q.order_by(a_type.discourse.name) q = q.order_by(a_type.begin) q = q.filter(a_type.phon4lab1 == True) #print('Number found: {}'.format(q.count())) q = q.columns(a_type.label.column_name('Stop'), a_type.begin.column_name('Begin'), a_type.end.column_name('End'), w_type.label.column_name('Word'), a_type.checked.column_name('Annotated'), a_type.speaker.name.column_name('Speaker'), a_type.discourse.name.column_name('Discourse'), a_type.id.column_name('Unique_id'), a_type.notes.column_name('Notes')) if 'burst' in c.hierarchy.subannotations[c.hierarchy.lowest]: q = q.columns( a_type.burst.begin.column_name('Burst_begin'), a_type.burst.end.column_name('Burst_end'), a_type.burst.duration.column_name('Burst_duration')) if 'voicing' in c.hierarchy.subannotations[c.hierarchy.lowest]: q = q.columns( a_type.voicing.begin.column_name('Voicing_begin'), a_type.voicing.end.column_name('Voicing_end'), a_type.voicing.duration.column_name('Voicing_duration')) #q = q.limit(100) results = q.all() return q, results
def test_query_with_pause(acoustic_config): with CorpusContext(acoustic_config) as g: g.encode_pauses(['sil', 'uh', 'um']) q = g.query_graph(g.word).filter(g.word.label == 'cares') q = q.columns( g.word.following.label.column_name('following'), g.pause.following.label.column_name('following_pause'), g.pause.following.duration.column_name('following_pause_duration')) q = q.order_by(g.word.begin) print(q.cypher()) results = q.all() print(results) assert (len(results) == 1) assert (results[0].following == 'this') assert (results[0].following_pause == ['sil', 'um']) assert (abs(results[0].following_pause_duration - 1.035027) < 0.001) q = g.query_graph(g.word).filter(g.word.label == 'this') q = q.columns( g.word.previous.label.column_name('previous'), g.pause.previous.label.column_name('previous_pause'), g.pause.previous.begin, g.pause.previous.end, g.pause.previous.duration.column_name('previous_pause_duration')) q = q.order_by(g.word.begin) print(q.cypher()) results = q.all() assert (len(results) == 2) assert (results[1].previous == 'cares') assert (results[1].previous_pause == ['sil', 'um']) assert (abs(results[1].previous_pause_duration - 1.035027) < 0.001) g.encode_pauses(['sil']) q = g.query_graph(g.word).filter(g.word.label == 'words') q = q.columns( g.word.following.label.column_name('following'), g.pause.following.label.column_name('following_pause'), g.pause.following.duration.column_name('following_pause_duration')) q = q.order_by(g.word.begin) print(q.cypher()) results = q.all() assert (len(results) == 5) assert (results[0].following == 'and') assert (results[0].following_pause == ['sil']) assert (abs(results[0].following_pause_duration - 1.152438) < 0.001)
def run_query(self): a_type = self.kwargs['word_type'] s_type = self.kwargs['seg_type'] config = self.kwargs['config'] discourse = self.kwargs['discourse'] with CorpusContext(config) as c: word = getattr(c, a_type) q = c.query_graph(word).filter(word.discourse.name == discourse) preloads = [] if a_type in c.hierarchy.subannotations: for s in c.hierarchy.subannotations[t]: preloads.append(getattr(word, s)) for t in c.hierarchy.get_lower_types(a_type): preloads.append(getattr(word, t)) q = q.preload(*preloads) q = q.order_by(word.begin) #annotations = c.query_acoustics(q).pitch('reaper').all() annotations = q.all() return annotations
def test_query_pitch(acoustic_config): with CorpusContext(acoustic_config) as g: q = g.query_graph(g.phone).filter(g.phone.label == 'ow').order_by( g.phone.begin.column_name('begin')) aq = g.query_acoustics(q).pitch('acousticsim') results = aq.all() expected_pitch = { 4.23: 98.2, 4.24: 390.2, 4.25: 0.0, 4.26: 95.8, 4.27: 95.8 } assert (set(results[0].pitch.keys()) == set(expected_pitch.keys())) for k, v in results[0].pitch.items(): assert (round(v, 1) == expected_pitch[k]) assert (round(aq.max()[0].max_pitch, 1) == round(max(expected_pitch.values()), 1))
def changeDiscourse(self, discourse): if discourse: self.changingDiscourse.emit() kwargs = {} kwargs['config'] = self.config kwargs['discourse'] = discourse self.audioWorker.setParams(kwargs) self.audioWorker.start() kwargs = {} with CorpusContext(self.config) as c: self.discourseWidget.updateHierachy(c.hierarchy) kwargs['seg_type'] = c.hierarchy.lowest kwargs['word_type'] = c.hierarchy.highest kwargs['config'] = self.config kwargs['discourse'] = discourse self.worker.setParams(kwargs) self.worker.start()
def test_utterance_position(acoustic_config): with CorpusContext(acoustic_config) as g: g.encode_pauses(['sil', 'um']) q = g.query_graph(g.pause) print(q.all()) g.encode_utterances(min_pause_length=0) q = g.query_graph(g.word) q = q.filter(g.word.label == 'this') q = q.order_by(g.word.begin) q = q.columns(g.word.utterance.word.position.column_name('position')) print(q.cypher()) results = q.all() assert (results[0].position == 1) q = g.query_graph(g.word) q = q.filter(g.word.label == 'talking') q = q.order_by(g.word.begin) q = q.columns(g.word.utterance.word.position.column_name('position')) print(q.cypher()) results = q.all() assert (results[0].position == 7) assert (results[1].position == 4)
def test_encode_pause(acoustic_config): with CorpusContext(acoustic_config) as g: discourse = g.discourse('acoustic_corpus') g.encode_pauses(['sil']) q = g.query_graph(g.pause) print(q.cypher()) assert (len(q.all()) == 11) paused = g.discourse('acoustic_corpus') expected = [x for x in discourse if x.label != 'sil'] for i, d in enumerate(expected): print(d.label, paused[i].label) assert (d.label == paused[i].label) g.reset_pauses() new_discourse = g.discourse('acoustic_corpus') for i, d in enumerate(discourse): assert (d.label == new_discourse[i].label) g.encode_pauses(['sil', 'um', 'uh']) q = g.query_graph(g.pause) print(q.cypher()) assert (len(q.all()) == 14) paused = g.discourse('acoustic_corpus') expected = [x for x in discourse if x.label not in ['sil', 'um', 'uh']] for i, d in enumerate(expected): print(d.label, paused[i].label) assert (d.label == paused[i].label) g.reset_pauses() new_discourse = g.discourse('acoustic_corpus') print(discourse) print(new_discourse) for i, d in enumerate(discourse): assert (d.label == new_discourse[i].label)
'password': '******' } def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) reset = True if reset: print("Getting annotation types..") parser = pgio.inspect_textgrid(path_to_gp) parser.speaker_parser = FilenameSpeakerParser(5) parser.call_back = print print('Loading corpus...') with CorpusContext('gp_thai', **graph_db) as c: c.reset() beg = time.time() c.load(parser, path_to_gp) end = time.time() print('Time taken: {}'.format(end - beg)) if __name__ == '__main__': with CorpusContext('gp_thai', **graph_db) as g: q = g.query_graph(g.phones).filter(g.phones.label == 'd') print(q.cypher()) print(q.count())
sys.path.insert(0, base) import polyglotdb.io as pgio from speechtools.corpus import CorpusContext path_to_timit = r'D:\Data\TIMIT_fixed' graph_db = { 'host': 'localhost', 'port': 7474, 'user': '******', 'password': '******' } def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) parser = pgio.inspect_timit(path_to_timit) parser.call_back = call_back with CorpusContext('timit', **graph_db) as c: c.reset() beg = time.time() c.load(parser, path_to_timit) end = time.time() print('Time taken: {}'.format(end - beg))
def test_query_formants_aggregate_group_by(acoustic_config): with CorpusContext(acoustic_config) as g: q = g.query_graph(g.phone).filter(g.phone.label.in_(['aa', 'ae'])) aq = g.query_acoustics(q).group_by( g.phone.label).formants('acousticsim')
def test_update_sound_files(acoustic_config, textgrid_test_dir): with CorpusContext(acoustic_config) as c: update_sound_files(c, textgrid_test_dir) expected_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.wav') assert (c.discourse_sound_file('acoustic_corpus').filepath == expected_path)
def test_analyze_acoustics(graph_db): with CorpusContext('acoustic', pause_words=['sil'], **graph_db) as g: g.analyze_acoustics()
def test_get_utterances(acoustic_config): with CorpusContext(acoustic_config) as g: g.encode_pauses(['sil']) utterances = g.get_utterances('acoustic_corpus', min_pause_length=0, min_utterance_length=0) expected_utterances = [(1.059223, 7.541484), (8.016164, 11.807666), (12.167356, 13.898228), (14.509726, 17.207370), (18.359807, 19.434003), (19.599747, 21.017242), (21.208318, 22.331874), (22.865036, 23.554014), (24.174348, 24.706663), (24.980290, 25.251656)] print(utterances) assert (len(utterances) == len(expected_utterances)) for i, u in enumerate(utterances): assert (round(u[0], 5) == round(expected_utterances[i][0], 5)) assert (round(u[1], 5) == round(expected_utterances[i][1], 5)) utterances = g.get_utterances('acoustic_corpus', min_pause_length=0.5) expected_utterances = [(1.059223, 13.898228), (14.509726, 17.207370), (18.359807, 22.331874), (22.865036, 23.554014), (24.174348, 25.251656)] assert (len(utterances) == len(expected_utterances)) for i, u in enumerate(utterances): assert (round(u[0], 5) == round(expected_utterances[i][0], 5)) assert (round(u[1], 5) == round(expected_utterances[i][1], 5)) utterances = g.get_utterances('acoustic_corpus', min_pause_length=0.5, min_utterance_length=1.0) expected_utterances = [(1.059223, 13.898228), (14.509726, 17.207370), (18.359807, 23.554014), (24.174348, 25.251656)] assert (len(utterances) == len(expected_utterances)) for i, u in enumerate(utterances): assert (round(u[0], 5) == round(expected_utterances[i][0], 5)) assert (round(u[1], 5) == round(expected_utterances[i][1], 5)) utterances = g.get_utterances('acoustic_corpus', min_pause_length=0.5, min_utterance_length=1.1) expected_utterances = [(1.059223, 13.898228), (14.509726, 17.207370), (18.359807, 25.251656)] assert (len(utterances) == len(expected_utterances)) for i, u in enumerate(utterances): assert (round(u[0], 5) == round(expected_utterances[i][0], 5)) assert (round(u[1], 5) == round(expected_utterances[i][1], 5)) g.encode_pauses(['sil', 'um']) utterances = g.get_utterances('acoustic_corpus', min_pause_length=0, min_utterance_length=0) expected_utterances = [(1.059223, 7.541484), (8.576511, 11.807666), (12.167356, 13.898228), (14.509726, 17.207370), (18.359807, 19.434003), (19.599747, 21.017242), (21.208318, 22.331874), (24.174348, 24.706663), (24.980290, 25.251656)] print(utterances) assert (len(utterances) == len(expected_utterances)) for i, u in enumerate(utterances): assert (round(u[0], 5) == round(expected_utterances[i][0], 5)) assert (round(u[1], 5) == round(expected_utterances[i][1], 5))
def test_wav_info(acoustic_config): with CorpusContext(acoustic_config) as g: sf = g.discourse_sound_file('acoustic_corpus') assert (sf.sampling_rate == 16000) assert (sf.n_channels == 1)
def run(self): time.sleep(0.1) print('beginning export') try: config = self.kwargs['config'] export_path = self.kwargs['path'] try: stops = gp_language_stops[config.corpus_name] except KeyError: print( 'Couldn\'t find corpus name in stops, defaulting to p, t, k, b, d, g' ) stops = ['p', 't', 'k', 'b', 'd', 'g'] with CorpusContext(config) as c: a_type = c.hierarchy.lowest w_type = c.hierarchy[a_type] utt_type = c.hierarchy.highest a_type = getattr(c, a_type) w_type = getattr(a_type, w_type) utt_type = getattr(a_type, utt_type) q = c.query_graph(a_type) q = q.order_by(a_type.discourse.name) q = q.order_by(a_type.begin) q = q.filter(a_type.phon4lab1 == True) #print('Number found: {}'.format(q.count())) q = q.columns(a_type.label.column_name('Stop'), a_type.begin.column_name('Begin'), a_type.end.column_name('End'), a_type.duration.column_name('Duration')) if 'burst' in c.hierarchy.subannotations[c.hierarchy.lowest]: q = q.columns( a_type.burst.begin.column_name('Burst_begin'), a_type.burst.end.column_name('Burst_end'), a_type.burst.duration.column_name('Burst_duration')) if 'voicing' in c.hierarchy.subannotations[c.hierarchy.lowest]: q = q.columns( a_type.voicing.begin.column_name('Voicing_begin'), a_type.voicing.end.column_name('Voicing_end'), a_type.voicing.duration.column_name( 'Voicing_duration')) q = q.columns( w_type.label.column_name('Word'), w_type.begin.column_name('Word_begin'), w_type.end.column_name('Word_end'), w_type.duration.column_name('Word_duration'), w_type.transcription.column_name('Word_transcription'), a_type.following.label.column_name('Following_segment'), a_type.following.begin.column_name( 'Following_segment_begin'), a_type.following.end.column_name('Following_segment_end'), a_type.following.duration.column_name( 'Following_segment_duration'), a_type.following.following.label.column_name( 'Following_following_segment'), a_type.following.following.begin.column_name( 'Following_following_segment_begin'), a_type.following.following.end.column_name( 'Following_following_segment_end'), a_type.following.following.duration.column_name( 'Following_following_segment_duration'), a_type.checked.column_name('Annotated'), a_type.speaker.name.column_name('Speaker'), a_type.discourse.name.column_name('Discourse'), w_type.utterance.phones.rate.column_name('Speaking_rate'), a_type.notes.column_name('Notes')) #q = q.limit(100) results = q.to_csv(export_path) except Exception as e: raise self.errorEncountered.emit(e) return print('finished') if self.stopped: time.sleep(0.1) self.finishedCancelling.emit() return self.dataReady.emit((q, results))
import sys import os import time base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) sys.path.insert(0,base) import polyglotdb.io as aio from speechtools.corpus import CorpusContext from polyglotdb.config import CorpusConfig graph_db = {'graph_host':'localhost', 'graph_port': 7474, 'graph_user': '******', 'graph_password': '******'} praat = r'C:\Users\michael\Documents\Praat\praatcon.exe' config = CorpusConfig('acoustic', **graph_db) config.reaper_path = r'D:\Dev\Tools\REAPER-master\reaper.exe' def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) if __name__ == '__main__': with CorpusContext(config) as g: g.encode_pauses(['sil']) g.encode_utterances() g.analyze_acoustics()
def test_query_formants(acoustic_config): with CorpusContext(acoustic_config) as g: q = g.query_graph(g.phone).filter(g.phone.label == 'aa') aq = g.query_acoustics(q).formants('acousticsim')
graph_db = {'host':'localhost', 'port': 7474, 'user': '******', 'password': '******'} first_run = True from py2neo.packages.httpstream import http http.socket_timeout = 999 syllabics = set(['aa', 'aan','ae', 'aen','ah', 'ahn','ay', 'ayn','aw','awn','ao', 'aon', 'iy','iyn','ih', 'ihn', 'uw', 'uwn','uh', 'uhn', 'eh', 'ehn','ey', 'eyn', 'er','el','em', 'eng', 'ow','own', 'oy', 'oyn']) import time with CorpusContext('buckeye', **graph_db) as g: if first_run: begin = time.time() g.encode_pauses('^[<{].*') print('Finished encoding pauses in {} seconds'.format(time.time() - begin)) #g.encode_pauses(['uh','um','okay','yes','yeah','oh','heh','yknow','um-huh', # 'uh-uh','uh-huh','uh-hum','mm-hmm']) begin = time.time() g.reset_utterances() print('Finished resetting utterances in {} seconds'.format(time.time() - begin)) g.encode_utterances(min_pause_length = 0.15) print('Finished encoding utterances in {} seconds'.format(time.time() - begin)) #g.encode_syllables(syllabics) begin = time.time() q = g.query_graph(g.surface_transcription).filter(g.surface_transcription.label.in_(syllabics))
def run(self): config = self.kwargs['config'] with CorpusContext(config) as c: all_found = c.has_all_sound_files() self.dataReady.emit(all_found)