Пример #1
0
def populate_segs(db, trs_folder):
    print 'Populating db using trs files...'

    trs_filenames = glob.glob('%s*.trs' % (trs_folder))
    trs_filenames.extend(glob.glob('%s*.its' % (trs_folder)))

    for i in range(len(trs_filenames)):
        print 'File %d of %d' % (i + 1, len(trs_filenames))

        is_trs = trs_filenames[i].endswith('.trs')

        segs = None
        if is_trs:
            trs_parser = TRSParser(trs_filenames[i])
            segs = trs_parser.parse(validate=False)

        else:
            trs_parser = ITSParser(trs_filenames[i])
            segs = trs_parser.parse()

            #<hack for its files>
            utters = []
            for s in segs:
                utters.extend(s.utters)
            segs = utters
            #</hack for its files>

        file_cd = os.path.basename(trs_filenames[i][:-4]).upper()

        for cur_seg in segs:
            #commented out for .its hack
            if is_trs:
                is_fuz = False
                j = 0
                while not is_fuz and j < len(cur_seg.speakers):
                    is_fuz = cur_seg.speakers[j].speaker_codeinfo.code == 'FUZ'
                    j += 1

                if is_fuz and len(cur_seg.speakers) > 1:
                    print 'Warning: Found multi-speaker FUZ seg in file: "%s"' % (
                        os.path.basename(trs_filenames[i]))

            db.insert(
                'trs_segs',
                'file_cd start end speaker'.split(),
                (
                    (
                        file_cd,
                        cur_seg.start,
                        cur_seg.end,
                        #commented out for its hack
                        cur_seg.speakers[0].speaker_codeinfo.code if is_trs
                        else cur_seg.speaker.speaker_codeinfo.code), ))

    print 'done.\n'
Пример #2
0
def process_dir(path, env, par_code, writer):
    trs_filenames = glob.glob('%s*.trs' % (path))

    trans_count = 0
    lena_count = 0

    lena_db = Database('%s%s.db' % (lena_db_path, env))

    for filename in trs_filenames:
        print '\n\tProcessing file %s' % (os.path.basename(filename))

        parser = TRSParser(filename)
        segs = get_trans_segs(parser.parse())

        if segs:
            zone_start = segs[0].start
            zone_end = segs[-1].end
            print '\tExamining range: %s (%0.2f) - %s (%0.2f)' % (get_time_str(
                zone_start), zone_start, get_time_str(zone_end), zone_end)

            trans_count += get_trans_child_vocs(segs)
            lena_count += get_lena_child_vocs(lena_db, filename, zone_start,
                                              zone_end)

    lena_db.close()

    writer.writerow([par_code, trans_count, lena_count])
Пример #3
0
def process_dir(full_item_path, output_dir):
    trs_filenames = glob.glob('%s*.trs' % (full_item_path))
    matrix = build_matrix(TRANS_CODES, LENA_CODES)

    for filename in trs_filenames:
        print '\n\tProcessing file %s' % (os.path.basename(filename))

        parser = TRSParser(filename)
        segs = get_trans_segs( parser.parse() )
        if segs:
            print '\tExamining range: %s - %s' % (get_time_str(segs[0].start), get_time_str(segs[-1].end))

            sm = StateMachine()
            single, numbered_multi, unnumbered_multi = sm.divide_segs(segs, use_lena_segmentation=False)

            count_single = len(single)
            count_numbered_multi = len(numbered_multi)
            count_unnumbered_multi = len(unnumbered_multi)
            count_angle_brackets = count_angle_bracket_segs(single) + count_angle_bracket_segs(numbered_multi) + count_angle_bracket_segs(unnumbered_multi)

            process_single(single, matrix)
            process_numbered_multi(numbered_multi, matrix)
            process_unnumbered_multi(unnumbered_multi, matrix)

    output_name = '%s%s-matrix.csv' % (output_dir, full_item_path.split('/')[-2])
    output_matrix(matrix, output_name, count_single, count_numbered_multi, count_unnumbered_multi, count_angle_brackets)
Пример #4
0
def run():
    LOGFILE = 'logs/confusion.log'
    
    #create log file if it doesn't exist
    check_log_file(LOGFILE)
    #set up logging
    logging.basicConfig(level=logging.ERROR,
                        filename=LOGFILE,
                        format='%(asctime)s %(message)s') #prefix each message with a timestamp

    for cur_env in envs:
        print 'Processing environment: %s' % (cur_env)
        output_dir = '%s%s/' % (output_path, cur_env)
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        
        input_dir = '%s%s/' % (input_path, cur_env)
        dir_contents = os.listdir(input_dir)

        for item in dir_contents:
            full_item_path = '%s%s/' % (input_dir, item)

            if os.path.isdir(full_item_path):
                trs_filenames = glob.glob('%s*.trs' % (full_item_path))
                matrix = build_matrix(TRANS_CODES, LENA_CODES)

                for filename in trs_filenames:
                    print '\n\tProcessing file %s' % (os.path.basename(filename))

                    parser = TRSParser(filename)
                    segs = get_trans_segs( parser.parse() )
                    if segs:
                        print '\tExamining range: %s - %s' % (get_time_str(segs[0].start), get_time_str(segs[-1].end))

                        single, numbered_multi, unnumbered_multi = divide_segs(segs)

                        count_single = len(single)
                        count_numbered_multi = len(numbered_multi)
                        count_unnumbered_multi = len(unnumbered_multi)
                        count_angle_brackets = count_angle_bracket_segs(single) + count_angle_bracket_segs(numbered_multi) + count_angle_bracket_segs(unnumbered_multi)

                        process_single(single, matrix)
                        process_numbered_multi(numbered_multi, matrix)
                        process_unnumbered_multi(unnumbered_multi, matrix)

                output_name = '%s%s-matrix.csv' % (output_dir, full_item_path.split('/')[-2])
                output_matrix(matrix, output_name, count_single, count_numbered_multi, count_unnumbered_multi, count_angle_brackets)
Пример #5
0
class VerificationWindow():
    ERROR_STATES = Enum(['NONE', 'WARNING', 'ERROR'])

    def __init__(self, filename, progress_dialog):
        self.logger = logging.getLogger(__name__)
        self.window = gtk.Window(gtk.WindowType.TOPLEVEL)
        self.window.set_title('Transcription Verifier')
        self.window.connect('destroy', lambda x: self.window.destroy())
        self.window.set_border_width(10)
        self.window.set_default_size(580, 500)

        self.trs_parser = TRSParser(filename)
        self.trs_parser.parse(
            progress_update_fcn=progress_dialog.set_fraction,
            progress_next_phase_fcn=progress_dialog.next_phase,
            remove_bad_trans_codes=False)
        self.wav_parser = None

        progress_dialog.next_phase()
        self.filter_errors = True
        self.toolbar = self.build_toolbar()
        self.treeview = self.build_treeview(progress_dialog.set_fraction)
        self.treeview.expand_all()

        scrolled_win = gtk.ScrolledWindow()
        scrolled_win.set_policy(gtk.PolicyType.AUTOMATIC,
                                gtk.PolicyType.AUTOMATIC)
        scrolled_win.add(self.treeview)

        vbox = gtk.VBox(False, 2)
        vbox.pack_start(self.toolbar, False, False, 0)
        vbox.pack_start(scrolled_win, True, True, 0)

        self.window.add(vbox)

        self.window.show_all()

    def build_toolbar(self):
        toolbar = gtk.Toolbar()
        toolbar.set_orientation(gtk.Orientation.HORIZONTAL)

        filter_errors_button = gtk.ToggleToolButton()
        filter_errors_button.set_active(
            True
        )  #set this before the connecting the clicked handler so it doesn't cause trouble
        filter_errors_button.connect(
            'toggled', lambda w: self.toggle_filter_errors(w.get_active()))
        filter_errors_icon = gtk.Image()
        filter_errors_icon.set_from_file(
            UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.FLAG))
        filter_errors_button.set_label('Show Errors Only')
        filter_errors_button.set_icon_widget(filter_errors_icon)

        expand_button = gtk.ToolButton()
        expand_icon = gtk.Image()
        expand_icon.set_from_file(
            UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.EXPAND))
        expand_button.set_label('Expand All')
        expand_button.set_icon_widget(expand_icon)
        expand_button.connect('clicked', lambda w: self.treeview.expand_all())

        collapse_button = gtk.ToolButton()
        collapse_icon = gtk.Image()
        collapse_icon.set_from_file(
            UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.COLLAPSE))
        collapse_button.set_label('Collapse All')
        collapse_button.set_icon_widget(collapse_icon)
        collapse_button.connect('clicked',
                                lambda w: self.treeview.collapse_all())

        rescan_button = gtk.ToolButton()
        rescan_icon = gtk.Image()
        rescan_icon.set_from_file(
            UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.REFRESH))
        rescan_button.set_label('Rescan File')
        rescan_button.set_icon_widget(rescan_icon)
        rescan_button.connect('clicked', lambda w: self._rescan_file())

        play_seg_button = gtk.ToolButton()
        play_icon = gtk.Image()
        play_icon.set_from_file(
            UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.PLAY))
        play_seg_button.set_label('Play Seg')
        play_seg_button.set_icon_widget(play_icon)
        play_seg_button.connect('clicked', lambda w: self.play_selected_seg())

        close_button = gtk.ToolButton()
        close_icon = gtk.Image()
        close_icon.set_from_file(
            UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.CLOSE))
        close_button.set_label('Close')
        close_button.set_icon_widget(close_icon)
        close_button.connect('clicked', lambda w: self.window.destroy())

        exit_button = gtk.ToolButton()
        exit_icon = gtk.Image()
        exit_icon.set_from_file(
            UIUtils.get_icon_path(UIUtils.BUTTON_ICONS.EXIT))
        exit_button.set_label('Exit')
        exit_button.set_icon_widget(exit_icon)
        exit_button.connect('clicked', lambda w: gtk.main_quit())

        toolbar.insert(filter_errors_button, -1)
        toolbar.insert(expand_button, -1)
        toolbar.insert(collapse_button, -1)
        toolbar.insert(gtk.SeparatorToolItem(), -1)
        toolbar.insert(play_seg_button, -1)
        toolbar.insert(rescan_button, -1)
        toolbar.insert(gtk.SeparatorToolItem(), -1)
        toolbar.insert(close_button, -1)
        toolbar.insert(exit_button, -1)

        return toolbar

    def _rescan_file(self):
        self.window.set_sensitive(False)

        progress_dialog = ProgressDialog(
            'Processing File...',
            ['Parsing trs file...', 'Validating data...', 'Building UI...'])
        progress_dialog.show()

        #this causes the parser to invalidate all cache, re-open and re-parse the file
        self.trs_parser.re_parse(
            progress_update_fcn=progress_dialog.set_fraction,
            progress_next_phase_fcn=progress_dialog.next_phase)

        #build a new treeview model based on the new data
        progress_dialog.next_phase()
        filter_model = self._build_tree_store(progress_dialog.set_fraction)
        self.treeview.set_model(filter_model)

        #Presumably the most common cause for rescanning is to check if errors have been fixed.
        #If the error filter is on, automatically expand all rows to show any remaining errors.
        if self.filter_errors:
            self.treeview.expand_all()

        self.window.set_sensitive(True)

    def _build_tree_store(self, progress_update_fcn):
        #segment/utter id, description, error_state (0 = none, 1 = warning, 2 = error)
        tree_store = gtk.TreeStore(gobject.TYPE_INT, gobject.TYPE_STRING,
                                   gobject.TYPE_INT)

        #note: these may be errors or warnings
        cur_utter = 0
        for seg in self.trs_parser.parse():
            seg_speakers = ''
            if seg.speakers:
                for i in range(len(seg.speakers)):
                    seg_speakers += seg.speakers[i].speaker_codeinfo.get_code()
                    if i < len(seg.speakers) - 1:
                        seg_speakers += ' + '
            else:
                seg_speakers = ' - '

            seg_iter = tree_store.append(None, [
                seg.num,
                '%s [%s - %s]' %
                (seg_speakers, BackendUtils.get_time_str(
                    seg.start), BackendUtils.get_time_str(seg.end)),
                VerificationWindow.ERROR_STATES.NONE
            ])

            for utter in seg.utters:
                speaker_cd = '?'  #question mark indicates an error occured - if we have utter.speaker, we should have an utter code. Errors occur if the utter code isn't in the DB lookup table (which means that utter.speaker != None, but utter.speaker.speaker_codeinfo == None. This is the condition that falls through the if-else blocks below).
                if utter.speaker:
                    if utter.speaker.speaker_codeinfo:
                        speaker_cd = utter.speaker.speaker_codeinfo.get_code()
                else:
                    speaker_cd = ' - '

                desc_str = '%s [%s - %s]' % (
                    speaker_cd, BackendUtils.get_time_str(
                        utter.start), BackendUtils.get_time_str(utter.end))
                if utter.lena_notes:
                    desc_str += ' %s' % (utter.lena_notes)
                if utter.trans_phrase:
                    desc_str += ' %s' % (utter.trans_phrase)
                if utter.lena_codes:
                    desc_str += ' |%s|' % ('|'.join(utter.lena_codes))
                if utter.trans_codes:
                    if not utter.lena_codes:
                        desc_str += ' |'
                    desc_str += '%s|' % ('|'.join(utter.trans_codes))

                utter_iter = tree_store.append(
                    seg_iter,
                    [utter.id, desc_str, VerificationWindow.ERROR_STATES.NONE])

                cur_utter += 1
                progress_update_fcn(
                    float(cur_utter) / float(self.trs_parser.total_utters))

                error_list = self.trs_parser.error_collector.get_errors_by_utter(
                    utter)
                for error in error_list:
                    error_type = VerificationWindow.ERROR_STATES.ERROR
                    if isinstance(error, ParserWarning):
                        error_type = VerificationWindow.ERROR_STATES.WARNING

                    error_iter = tree_store.append(
                        utter_iter, [-1, '%s' % (error.msg), error_type])

                    parent_it = utter_iter
                    while parent_it:
                        parent_error_type = tree_store.get_value(parent_it, 2)
                        if parent_error_type < error_type:
                            tree_store.set_value(parent_it, 2, error_type)

                        parent_it = tree_store.iter_parent(parent_it)

        filter_model = tree_store.filter_new()
        filter_model.set_visible_func(self.filter)

        return filter_model

    def build_treeview(self, progress_update_fcn):
        filter_model = self._build_tree_store(progress_update_fcn)
        treeview = gtk.TreeView(filter_model)

        col = gtk.TreeViewColumn('ID', gtk.CellRendererText(), text=0)
        col.set_visible(False)
        treeview.append_column(col)

        renderer = gtk.CellRendererText()
        col = gtk.TreeViewColumn('Description', renderer, text=1)
        col.set_cell_data_func(renderer, self.cell_render_fcn)
        treeview.append_column(col)

        col = gtk.TreeViewColumn('Error State', gtk.CellRendererText(), text=2)
        col.set_visible(False)
        treeview.append_column(col)

        return treeview

    def cell_render_fcn(self, col, cell_renderer, model, it, user_data=None):
        error_state = model.get_value(it, 2)
        if error_state == VerificationWindow.ERROR_STATES.WARNING:
            cell_renderer.set_property('foreground', 'orange')
        elif error_state == VerificationWindow.ERROR_STATES.ERROR:
            cell_renderer.set_property('foreground', 'red')
        else:
            cell_renderer.set_property('foreground', 'black')

        return

    #returns true if row pointed to by 'it' should be visible
    def filter(self, model, it, user_data):
        result = True
        if self.filter_errors:
            result = model.get_value(it,
                                     2) > VerificationWindow.ERROR_STATES.NONE

        return result

    def toggle_filter_errors(self, filter_errors):
        self.filter_errors = not self.filter_errors
        self.treeview.get_model().refilter()

    def play_selected_seg(self):
        (model, it) = self.treeview.get_selection().get_selected()
        if it:
            #if they've selected an error row, find the top level parent (the segment) and use it instead
            parent = model.iter_parent(it)
            while parent:
                it = parent
                parent = model.iter_parent(it)

            seg_num = model.get_value(it, 0) if it else None
            seg = self.trs_parser.parse()[seg_num]

            if not self.wav_parser:
                dialog = gtk.FileChooserDialog(
                    title='Select WAV File',
                    action=gtk.FileChooserAction.OPEN,
                    buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL,
                             gtk.STOCK_OPEN, gtk.ResponseType.OK))
                dialog.set_default_response(gtk.ResponseType.OK)

                for filter_opt in (('wav Files', '*.wav'), ('All Files', '*')):
                    file_filter = gtk.FileFilter()
                    file_filter.set_name(filter_opt[0])
                    file_filter.add_pattern(filter_opt[1])
                    dialog.add_filter(file_filter)

                response = dialog.run()
                if response == gtk.ResponseType.OK:
                    filename = dialog.get_filename()
                    self.wav_parser = WavParser(filename)

                dialog.destroy()

            if self.wav_parser:
                self.wav_parser.play_seg(seg)

            else:
                UIUtils.show_no_sel_dialog()
        else:
            UIUtils.show_no_sel_dialog()
Пример #6
0
class FreqWindow():
    def __init__(self, filename, progress_dialog):
        self.window = gtk.Window(gtk.WindowType.TOPLEVEL)
        self.window.set_title('WH-Frequency Counter')
        self.window.set_border_width(10)
        self.window.set_default_size(730, 400)

        self.logger = logging.getLogger(__name__)

        self.count_cols = self._get_initial_count_cols()
        self.trs_parser = TRSParser(filename)
        segments = self.trs_parser.parse(
            progress_update_fcn=progress_dialog.set_fraction,
            progress_next_phase_fcn=progress_dialog.next_phase,
            validate=False,
            seg_filters=[])

        self.filter_manager = FilterManager(
            segments
        )  #this object caches original segs and helps with lookup by segment number
        calc = CountOutputCalc('', CountOutputCalc.COUNT_TYPES.PER_SEG, 1)
        self.output = Output(
            '', '', [WHQFilter()], calc, False
        )  #this object filters and allows us to retrieve the filtered segs
        map(lambda seg: self.output.add_item(seg), segments)

        treeview = self._build_treeview()
        #ensure progress dialog self-destructs even if no utterances are found (in that case the above call never invokes progress_dialog.set_fraction)
        progress_dialog.ensure_finish()

        scrolled_win = gtk.ScrolledWindow()
        scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
        scrolled_win.add(treeview)

        export_button = UIUtils.create_button('Export Results',
                                              UIUtils.BUTTON_ICONS.EXPORT)
        export_button.connect('clicked',
                              lambda widget: self._export_results(treeview))

        close_button = UIUtils.create_button('Close',
                                             UIUtils.BUTTON_ICONS.CLOSE)
        close_button.connect('clicked', lambda w: self.window.destroy())

        add_button = UIUtils.create_button('Add Count Column',
                                           UIUtils.BUTTON_ICONS.ADD)
        add_button.connect('clicked', lambda w: self._add_count_col(treeview))

        self.remove_button = UIUtils.create_button('Remove Count Column',
                                                   UIUtils.BUTTON_ICONS.REMOVE)
        self.remove_button.connect('clicked',
                                   lambda w: self._remove_count_col(treeview))
        self._update_remove_button_state()

        options_frame = gtk.Frame(label='Options')
        options_vbox = gtk.VBox()
        self.linked_checkbox = gtk.CheckButton('Group Linked Segments')
        self.linked_checkbox.connect('toggled', self._toggle_seg_grouping,
                                     treeview)
        options_vbox.pack_start(self.linked_checkbox, False, False, 0)

        self.context_checkbox = gtk.CheckButton('Show Context')
        self.context_checkbox.connect('toggled', self._toggle_show_context,
                                      treeview)
        options_vbox.pack_start(self.context_checkbox, False, False, 0)

        options_frame.add(options_vbox)

        self.statusbar = gtk.Statusbar()
        self.statusbar.set_has_resize_grip(False)
        self.num_whq = treeview.get_model().iter_n_children(None)
        self._update_statusbar()

        vbox = gtk.VBox()

        bbox = gtk.HButtonBox()
        bbox.pack_start(export_button, True, False, 0)
        bbox.pack_start(add_button, True, False, 0)
        bbox.pack_start(self.remove_button, True, False, 0)
        bbox.pack_start(close_button, True, False, 0)

        vbox.pack_start(scrolled_win, True, True, 0)
        vbox.pack_start(self.statusbar, False, False, 0)
        vbox.pack_end(bbox, False, False, 0)
        vbox.pack_end(options_frame, False, False, 0)
        self.window.add(vbox)

        self.window.show_all()

    def _get_initial_count_cols(self):
        return map(lambda word: [word.capitalize(), word, 0],
                   'who what why when where how'.split())

    def _toggle_show_context(self, checkbox, treeview):
        tree_model = self._build_list_store(
            link_segs=self.linked_checkbox.get_active(),
            prev_store=treeview.get_model(),
            show_context=self.context_checkbox.get_active())
        treeview.set_model(tree_model)

    def _toggle_seg_grouping(self, checkbox, treeview):
        tree_model = self._build_list_store(
            link_segs=self.linked_checkbox.get_active(),
            prev_store=None,
            show_context=self.context_checkbox.get_active())
        treeview.set_model(tree_model)
        self.num_whq = treeview.get_model().iter_n_children(None)
        self._update_statusbar()

    def _update_remove_button_state(self):
        self.remove_button.set_sensitive(len(self.count_cols) > 0)

    def _remove_count_col(self, treeview):
        dialog = gtk.Dialog(title='Remove Count Column',
                            buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL,
                                     gtk.STOCK_OK, gtk.ResponseType.OK))
        dialog.set_default_response(gtk.ResponseType.OK)

        vbox = dialog.get_content_area()

        list_store = gtk.ListStore(gobject.TYPE_STRING)

        for i in range(len(self.count_cols)):
            list_store.append([self.count_cols[i][0]])

        combo = gtk.ComboBox(model=list_store)
        renderer = gtk.CellRendererText()
        combo.pack_start(renderer, True, True, 0)
        combo.add_attribute(renderer, 'text', 0)
        combo.set_active(0)

        hbox = gtk.HBox()
        hbox.pack_start(gtk.Label('Select Column:'), True, True, 0)
        hbox.pack_start(combo, True, True, 0)

        vbox.pack_start(hbox, True, True, 0)
        vbox.show_all()

        response = dialog.run()

        if response == gtk.ResponseType.CANCEL:
            dialog.destroy()
            done = True

        elif response == gtk.ResponseType.OK:
            col_index = combo.get_active()
            if col_index >= 0:
                dialog.destroy()

                self.count_cols = self.count_cols[:col_index] + self.count_cols[
                    col_index + 1:]

                progress_dialog = ProgressDialog('Removing Column...',
                                                 ['Rebuilding UI...'])
                progress_dialog.show()

                tree_model = self._build_list_store(
                    link_segs=self.linked_checkbox.get_active(),
                    prev_store=treeview.get_model(),
                    show_context=self.context_checkbox.get_active())

                old_col = treeview.get_column(6 + col_index)
                treeview.remove_column(old_col)

                #update the 'text' property of the cell renderers in all columns after the removed column - otherwise cell values get mixed up
                i = 6 + col_index
                while i < tree_model.get_n_columns():
                    col = treeview.get_column(i)
                    renderer = col.get_cell_renderers()[0]
                    col.set_attributes(renderer, text=i)
                    i += 1

                treeview.set_model(tree_model)

                self._update_remove_button_state()
                self._update_statusbar()

                progress_dialog.ensure_finish()

    def _add_count_col(self, treeview):
        dialog = gtk.Dialog(title='Add Count Column',
                            buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL,
                                     gtk.STOCK_OK, gtk.ResponseType.OK))
        dialog.set_default_response(gtk.ResponseType.OK)

        vbox = dialog.get_content_area()

        #table = gtk.Table(2, 2)
        grid = gtk.Grid()
        name_label = gtk.Label('Column Name:')
        #table.attach(name_label, 0, 1, 0, 1, gtk.EXPAND, gtk.EXPAND, 3, 3)
        grid.attach(name_label, 0, 0, 1, 1, 3)

        name_entry = gtk.Entry()
        #table.attach(name_entry, 1, 2, 0, 1, gtk.EXPAND, gtk.EXPAND, 3, 3)
        grid.attach(name_entry, 1, 0, 1, 1, 3)

        regex_label = gtk.Label('Search term:')
        #table.attach(regex_label, 0, 1, 1, 2, gtk.EXPAND, gtk.EXPAND, 3, 3)
        grid.attach(regex_label, 0, 1, 1, 1, 3)

        regex_entry = gtk.Entry()
        #table.attach(regex_entry, 1, 2, 1, 2, gtk.EXPAND, gtk.EXPAND, 3, 3)
        grid.attach(regex_entry, 1, 1, 1, 1, 3)

        vbox.pack_start(grid, True, True, 0)
        vbox.show_all()

        done = False
        while not done:
            response = dialog.run()
            if response == gtk.ResponseType.CANCEL:
                dialog.destroy()
                done = True

            elif response == gtk.ResponseType.OK:
                name = name_entry.get_text()
                regex = regex_entry.get_text()

                try:
                    re.compile(regex)

                    dialog.destroy()

                    self.count_cols.append([name, regex,
                                            0])  #name, regex, total

                    progress_dialog = ProgressDialog(
                        'Adding New Column...', ['Counting occurrances...'])
                    progress_dialog.show()
                    list_store = self._build_list_store(
                        link_segs=self.linked_checkbox.get_active(),
                        prev_store=treeview.get_model(),
                        show_context=self.context_checkbox.get_active())
                    progress_dialog.ensure_finish()

                    treeview.set_model(list_store)
                    col = gtk.TreeViewColumn(name,
                                             gtk.CellRendererText(),
                                             text=list_store.get_n_columns() -
                                             1)
                    treeview.append_column(col)
                    self._update_remove_button_state()
                    self._update_statusbar()
                    done = True

                except Exception as error:
                    if isinstance(error, sre_constants.error):
                        error_dialog = gtk.MessageDialog(
                            buttons=(gtk.ButtonType.OK),
                            message_format=
                            'The regular expression that has been entered is invalid.'
                        )
                        error_dialog.run()
                        error_dialog.destroy()
                    else:
                        error_dialog = gtk.MessageDialog(
                            buttons=(gtk.ButtonType.OK),
                            message_format=
                            'The application has encountered an internal error. Please contact your local programmer to assign blame.'
                        )
                        error_dialog.run()
                        error_dialog.destroy()
                        done = True

                    if progress_dialog:
                        progress_dialog.destroy()

                    self.logger.error(
                        'Exception in add_column():\n %s\nStacktrace: %s' %
                        (error, traceback.format_exc()))

    def _update_statusbar(self):
        context_id = self.statusbar.get_context_id('num_whq')
        self.statusbar.pop(context_id)
        totals = 'Totals: WHQ Count: %d' % (self.num_whq)

        for col in self.count_cols:
            totals += ', %s: %d' % (col[0], col[2])

        self.statusbar.push(context_id, totals)

    def _get_link_chain(self, cur_seg):
        cur = cur_seg
        chain = []
        while cur != None:
            chain.insert(cur, 0)
            cur = cur.prev

        cur = cur_seg.next
        while cur != None:
            chain.append(cur)

        return chain

    def _build_list_store_row(self, utter_id, start_time, end_time,
                              trans_phrase, speaker_str, target_str,
                              whq_count):
        start_time = ('%0.2f' % (start_time)) if start_time != None else ''
        end_time = ('%0.2f' % (end_time)) if end_time != None else ''

        return [
            utter_id,
            '%s - %s' % (start_time, end_time),
            trans_phrase,
            speaker_str,
            target_str,
            whq_count,
        ]

    def _find_utter_index(self, utter):
        utter_index = -1
        i = 0
        while i < len(utter.seg.utters) and utter_index < 0:
            if utter.seg.utters[i] == utter:
                utter_index = i
            i += 1

        return utter_index

    def _append_context(self, bwd_start_utter, fwd_start_utter, cur_phrase):
        #backward
        bwd_phrase = self._get_adjacent_phrase(bwd_start_utter, -1)
        fwd_phrase = self._get_adjacent_phrase(fwd_start_utter, 1)

        return '(%s)\n%s\n(%s)' % (bwd_phrase, cur_phrase, fwd_phrase)

    def _get_adjacent_phrase(self, start_utter, incr):
        utter_index = self._find_utter_index(start_utter) + incr
        seg_index = start_utter.seg.num
        phrase = None

        i_in_bounds = None
        init_j = None
        if incr < 0:
            i_in_bounds = lambda i: i >= 0
            init_j = lambda i, seg: utter_index if i == seg_index else len(
                seg.utters) - 1
            j_in_bounds = lambda j, seg: j >= 0
        else:
            i_in_bounds = lambda i: i < len(self.filter_manager.get_segs())
            init_j = lambda i, seg: utter_index if i == seg_index else 0
            j_in_bounds = lambda j, seg: j < len(seg.utters)

        i = seg_index
        while i_in_bounds(i) and not phrase:
            seg = self.filter_manager.get_seg_by_num(i)
            j = init_j(i, seg)
            while j_in_bounds(j, seg) and not phrase:
                phrase = seg.utters[j].trans_phrase
                j += incr
            i += incr

        return phrase or '-'

    def _build_list_store(self,
                          link_segs=False,
                          prev_store=None,
                          show_context=False):
        #for now, we always grab segs and convert to chains later if needed
        segments = self.output.get_filtered_items()
        list_store = gtk.ListStore(
            gobject.TYPE_INT,  #utterance id
            gobject.TYPE_STRING,  #time
            gobject.TYPE_STRING,  #phrase
            gobject.TYPE_STRING,  #speakers
            gobject.TYPE_STRING,  #target listeners
            gobject.TYPE_INT,  #whq count
            *([gobject.TYPE_INT] * len(self.count_cols)
              )  #user-defined 'count columns'
        )

        row_num = 0
        if link_segs:
            utter_chains = FilterManager.get_chains(segments)
            for head in utter_chains:
                cur = head
                prev = cur
                trans_phrase = cur.trans_phrase
                speaker_str = DBConstants.SPEAKER_CODES.get_option(
                    cur.speaker.get_codeinfo().get_code(
                    )).desc if cur.speaker else '(Unknown)'
                target_str = DBConstants.TRANS_CODES[1].get_option(
                    cur.trans_codes[1]
                ).desc if cur.trans_codes else '(Unknown)'
                cur = cur.next

                count_col_vals = [0] * len(self.count_cols)

                while cur:
                    trans_phrase += '\n->%s' % (cur.trans_phrase)
                    if cur.speaker:
                        speaker_str += ', %s' % (
                            DBConstants.SPEAKER_CODES.get_option(
                                cur.speaker.get_codeinfo().get_code()).desc)
                    if cur.trans_codes:
                        target_str += ', %s' % (
                            DBConstants.TRANS_CODES[1].get_option(
                                cur.trans_codes[1]).desc)
                    prev = cur
                    cur = cur.next

                tail = FilterManager.get_endpoint(
                    FilterManager.ENDPOINT_TYPES.TAIL, head)

                if show_context:
                    trans_phrase = self._append_context(
                        head, tail, trans_phrase)

                whq_count = prev_store[row_num][5] if prev_store else 1
                row = self._build_list_store_row(head.id, head.start, tail.end,
                                                 trans_phrase, speaker_str,
                                                 target_str, whq_count)

                for j in range(len(self.count_cols)):
                    count = len(re.findall(self.count_cols[j][1],
                                           trans_phrase))
                    #reset column total on first iteration (if _build_list_store() was called in the past, then self.count_cols[j][2] may be > 0)
                    self.count_cols[j][2] = self.count_cols[j][
                        2] + count if row_num else count
                    row.append(count)

                list_store.append(row)
                row_num += 1

        else:
            for i in range(len(segments)):
                for utter in segments[i].utters:
                    trans_phrase = utter.trans_phrase
                    if show_context:
                        trans_phrase = self._append_context(
                            utter, utter, trans_phrase)

                    whq_count = prev_store[row_num][5] if prev_store else 1
                    speaker_str = DBConstants.SPEAKER_CODES.get_option(
                        utter.speaker.speaker_codeinfo.get_code(
                        )).desc if utter.speaker else '(Unknown)'
                    target_str = DBConstants.TRANS_CODES[1].get_option(
                        utter.trans_codes[1]
                    ).desc if utter.trans_codes else '(Unknown)'
                    row = self._build_list_store_row(utter.id, utter.start,
                                                     utter.end, trans_phrase,
                                                     speaker_str, target_str,
                                                     whq_count)

                    for j in range(len(self.count_cols)):
                        count = len(
                            re.findall(self.count_cols[j][1],
                                       utter.trans_phrase.lower()))
                        #reset column total on first iteration (if _build_list_store() was called in the past, then self.count_cols[j][2] may be > 0)
                        self.count_cols[j][2] = self.count_cols[j][
                            2] + count if row_num else count
                        row.append(count)

                    list_store.append(row)
                    row_num += 1

        return list_store

    def _build_treeview(self):
        list_store = self._build_list_store()
        treeview = gtk.TreeView(list_store)

        #create hidden id column
        col = gtk.TreeViewColumn('ID', gtk.CellRendererText(), text=0)
        col.set_visible(False)
        col.set_resizable(True)
        treeview.append_column(col)

        col_names = ['Time', 'Phrase', 'Speakers', 'Target Listeners']
        for i in range(len(col_names)):
            col = gtk.TreeViewColumn(col_names[i],
                                     gtk.CellRendererText(),
                                     text=(i + 1))
            col.set_resizable(True)
            treeview.append_column(col)

        spin_renderer = gtk.CellRendererSpin()
        adj = gtk.Adjustment(value=1,
                             lower=0,
                             upper=100,
                             page_incr=5,
                             step_incr=1,
                             page_size=0)
        spin_renderer.set_property('adjustment', adj)
        spin_renderer.set_property('editable', True)
        spin_renderer.connect('edited', self._update_row, treeview)
        col = gtk.TreeViewColumn('WHQ Count',
                                 spin_renderer,
                                 text=(len(col_names) + 1))
        col.set_resizable(True)
        treeview.append_column(col)

        for i in range(len(self.count_cols)):
            col = gtk.TreeViewColumn(self.count_cols[i][0],
                                     gtk.CellRendererText(),
                                     text=(len(col_names) + 2 + i))
            col.set_resizable(True)
            treeview.append_column(col)

        treeview.connect('key-press-event', self._keypress_callback, treeview)

        return treeview

    def _keypress_callback(self, widget, event, treeview):
        if gdk.keyval_name(event.keyval).lower() == 'tab':
            (model, paths) = treeview.get_selection().get_selected_rows()
            total_rows = model.iter_n_children(None)
            if paths and paths[0][0] + 1 < total_rows:
                treeview.set_cursor(paths[0][0] + 1,
                                    focus_column=treeview.get_column(3),
                                    start_editing=True)

    def _update_row(self, widget, path, value, treeview):
        #we must retrieve the model each time this method is called (rather than just passing in a reference to it), since the model is re-defined ever time a count column is added or removed
        model = treeview.get_model()
        old_val = int(model[path][5])
        new_val = int(value)
        self.num_whq += (new_val - old_val)

        model[path][5] = new_val
        self._update_statusbar()

    def _export_results(self, treeview):
        dialog = gtk.FileChooserDialog(
            title='Save',
            action=gtk.FileChooserAction.SAVE,
            buttons=(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL, gtk.STOCK_SAVE,
                     gtk.ResponseType.OK))
        dialog.set_default_response(gtk.ResponseType.OK)
        dialog.add_filter(UIUtils.CSV_FILE_FILTER)
        dialog.add_filter(UIUtils.ALL_FILE_FILTER)

        #splice in the 'open immediately checkbox'
        content_area = dialog.get_content_area()
        open_now_checkbox = gtk.CheckButton('Open Immediately')
        open_now_checkbox.set_active(True)
        align = gtk.Alignment(xalign=1.0, yalign=1.0)
        align.add(open_now_checkbox)
        content_area.pack_end(align, False, False, 0)
        open_now_checkbox.show()
        align.show()

        response = dialog.run()
        if response == gtk.ResponseType.CANCEL:
            dialog.destroy()
        elif response == gtk.ResponseType.OK:
            filename = dialog.get_filename()
            open_now = open_now_checkbox.get_active()
            dialog.destroy()

            count_col_headers, count_col_vals, count_col_totals = zip(
                *self.count_cols) if self.count_cols else [[]] * 3
            exporter = FreqExporter(filename, self.trs_parser.filename)

            exporter.write_header_row(count_col_headers)
            list_store = treeview.get_model()
            tree_it = list_store.get_iter_first()
            while tree_it:
                #we must remove newline chars, otherwise Excel thinks it's the end of a row (even when it's quoted...)
                phrase = list_store.get_value(tree_it, 2).replace('\n',
                                                                  ' ').replace(
                                                                      '\r', '')
                time_str = list_store.get_value(tree_it, 1)
                speakers_str = list_store.get_value(tree_it, 3) or '(Unknown)'
                targets_str = list_store.get_value(tree_it, 4) or '(Unknown)'
                num_utters = int(list_store.get_value(tree_it, 5))
                i = 6
                count_col_vals = []
                while i < list_store.get_n_columns():
                    count_col_vals.append(int(list_store.get_value(tree_it,
                                                                   i)))
                    i += 1

                exporter.write_count_row(time_str, phrase, speakers_str,
                                         targets_str, num_utters,
                                         count_col_vals)

                tree_it = list_store.iter_next(tree_it)

            exporter.finish(self.num_whq, count_col_totals)

            if open_now:
                subprocess.Popen(
                    ['%s' % DBConstants.SETTINGS.SPREADSHEET_PATH, filename])
            else:
                result_dialog = gtk.MessageDialog(
                    buttons=gtk.ButtonType.OK,
                    message_format='Results exported successfully.')
                result_dialog.run()
                result_dialog.destroy()
Пример #7
0
    def export(self, progress_update_fcn=None, progress_next_phase_fcn=None):
        #create csv file
        export_file = open(self.export_filename, 'wb')

        #write header info
        csv_writer = csv.writer(export_file,
                                quoting=csv.QUOTE_ALL)  #use Python csv library
        csv_writer.writerow(
            ['Export Date: %s' % (UIUtils.get_cur_timestamp_str())])
        csv_writer.writerow(
            ['Configuration Creation Date: %s' % (self.config.created)])
        csv_writer.writerow(['TRS Filename: %s' % (self.trs_filename)])
        csv_writer.writerow(['Output Configuration:'])
        csv_writer.writerow(['Name: %s' % (self.config.name)])
        csv_writer.writerow(['Description: %s' % (self.config.desc)])
        csv_writer.writerow([''])
        csv_writer.writerow(['Outputs:'])
        csv_writer.writerow([''])

        #parse the trs file
        trs_parser = TRSParser(self.trs_filename)
        segs = trs_parser.parse(progress_update_fcn,
                                progress_next_phase_fcn,
                                validate=False)
        chains = None  #this is populated on demand, then cached

        summary_row = [os.path.basename(self.trs_filename)[:-4]]
        summary_head = ["TRS file"]
        #iterate through all outputs in the configuration, adding segments/chains to each one, then writing the output to the spreadsheet file
        i = 0
        while i < len(self.config.outputs):
            #update progress bar text
            if progress_next_phase_fcn:
                progress_next_phase_fcn()

            cur_output = self.config.outputs[i]
            cur_output.reset()  #clear any cached utterances from previous runs

            #if we need chains, parse them from the segment list
            if cur_output.chained and not chains:
                chains = FilterManager.get_chains(segs)

            #add chains/segments to the current output
            items = chains if cur_output.chained else segs
            j = 0
            while j < len(items):
                cur_output.add_item(
                    items[j], filter_utters=True
                )  #note: filter_utters only affects segs (not chains)
                j += 1

            #note: updating progress individually within the above loop (for every iteration of j) slows down the processing considerably (by a factor of ~4) - a compromise is to just set each phase to 100% after it completes.
            if progress_update_fcn:
                progress_update_fcn(1)

            #grab the output's results and write them to the file
            cur_output.write_csv_rows(csv_writer)

            # get summary from output
            summary_head += [cur_output.name]
            summary_row += [cur_output.get_summary()]

            csv_writer.writerow([''])

            i += 1
        export_file.close()

        if len(self.summary_filename) > 0:
            need_head = False
            # check the existence of file, decide the header
            if not os.path.isfile(self.summary_filename):
                need_head = True
            with open(self.summary_filename, 'at') as fp:
                summary_writer = csv.writer(fp, quoting=csv.QUOTE_ALL)
                if need_head:
                    summary_writer.writerow(summary_head)
                summary_writer.writerow(summary_row)
Пример #8
0
def process_dir(path, env, par_code, writer):
    trs_filenames = glob.glob('%s*.trs' % (path))

    utter_counts = [0] * len(container_types)
    word_counts = [0] * len(container_types)

    lena_db = Database('%s%s.db' % (lena_db_path, env))

    for filename in trs_filenames:
        print '\n\tProcessing file %s' % (os.path.basename(filename))

        parser = TRSParser(filename)
        segs = get_trans_segs( parser.parse() )

        if segs:
            print '\tExamining range: %s (%0.2f) - %s (%0.2f)' % (get_time_str(segs[0].start), segs[0].start, get_time_str(segs[-1].end), segs[-1].end)

            sm = StateMachine()
            single, numbered_multi, unnumbered_multi = sm.divide_segs(segs)

            #for non-overlapping (no numbered_multi)
            trans_awc, lena_awc, utter_count = get_trans_awc(single, lena_db, filename, exclude_angle=True)
            word_counts[container_types.TRANS_NO_OVERLAP] += trans_awc
            word_counts[container_types.LENA_NO_OVERLAP] += lena_awc
            utter_counts[container_types.TRANS_NO_OVERLAP] += utter_count

            trans_awc, lena_awc, utter_count = get_trans_awc(unnumbered_multi, lena_db, filename, exclude_angle=True)
            word_counts[container_types.TRANS_NO_OVERLAP] += trans_awc
            word_counts[container_types.LENA_NO_OVERLAP] += lena_awc
            utter_counts[container_types.TRANS_NO_OVERLAP] += utter_count

            #for all speech
            trans_awc, lena_awc, utter_count = get_trans_awc(single, lena_db, filename, exclude_angle=False)
            word_counts[container_types.TRANS_ALL_SPEECH] += trans_awc
            word_counts[container_types.LENA_ALL_SPEECH] += lena_awc
            utter_counts[container_types.TRANS_ALL_SPEECH] += utter_count

            trans_awc, lena_awc, utter_count = get_trans_awc(numbered_multi, lena_db, filename, exclude_angle=False)
            word_counts[container_types.TRANS_ALL_SPEECH] += trans_awc
            word_counts[container_types.LENA_ALL_SPEECH] += lena_awc
            utter_counts[container_types.TRANS_ALL_SPEECH] += utter_count

            trans_awc, lena_awc, utter_count = get_trans_awc(unnumbered_multi, lena_db, filename, exclude_angle=False)
            word_counts[container_types.TRANS_ALL_SPEECH] += trans_awc
            word_counts[container_types.LENA_ALL_SPEECH] += lena_awc
            utter_counts[container_types.TRANS_ALL_SPEECH] += utter_count

    lena_db.close()
    trans_avg_no_overlap = 0
    trans_avg_all_speech = 0
    lena_avg_no_overlap = 0
    lena_avg_all_speech = 0

    if utter_counts[container_types.TRANS_NO_OVERLAP] > 0:
        trans_avg_no_overlap = word_counts[container_types.TRANS_NO_OVERLAP] / float(utter_counts[container_types.TRANS_NO_OVERLAP])
        #note: lena and transcriber measures have matching segments, so count is the same
        lena_avg_no_overlap = word_counts[container_types.LENA_NO_OVERLAP] / float(utter_counts[container_types.TRANS_NO_OVERLAP])
    if utter_counts[container_types.TRANS_ALL_SPEECH] > 0:
        trans_avg_all_speech = word_counts[container_types.TRANS_ALL_SPEECH] / float(utter_counts[container_types.TRANS_ALL_SPEECH])
        lena_avg_all_speech = word_counts[container_types.LENA_ALL_SPEECH] / float(utter_counts[container_types.TRANS_ALL_SPEECH])

    writer.writerow([
        par_code,
        word_counts[container_types.TRANS_NO_OVERLAP],
        utter_counts[container_types.TRANS_NO_OVERLAP],
        '%0.3f' % (trans_avg_no_overlap),
        word_counts[container_types.TRANS_ALL_SPEECH],
        utter_counts[container_types.TRANS_ALL_SPEECH],
        '%0.3f' % (trans_avg_all_speech),
        
        word_counts[container_types.LENA_NO_OVERLAP],
        utter_counts[container_types.TRANS_NO_OVERLAP],
        '%0.3f' % (lena_avg_no_overlap),
        word_counts[container_types.LENA_ALL_SPEECH],
        utter_counts[container_types.TRANS_ALL_SPEECH],
        '%0.3f' % (lena_avg_all_speech),
    ])
Пример #9
0
    def create_check(self):
        error_msg = self.validate_form()

        if error_msg:
            UIUtils.show_message_dialog(error_msg)

        else:
            filters = self.filters_frame.get_filters()

            check = Check(
                self.form.name_entry.get_text(),
                self.form.input_file_entry.get_text(),
                self.form.wav_file_entry.get_text(),
                self.form.num_segs_spinner.get_value_as_int(),
                self.form.context_pad_spinner.get_value_as_int(),
                [],
                0,
                filters=filters,
                pick_randomly=self.form.rand_checkbox.get_active(),
            )

            parser = None
            progress_dialog = ProgressDialog(
                title='Loading File',
                phases=['Parsing file...', 'Setting up...'])
            segs = []

            #TRS files
            if check.input_filename.lower().endswith('.trs'):
                parser = TRSParser(check.input_filename)
                progress_dialog.show()
                segs = parser.parse(
                    progress_update_fcn=progress_dialog.set_fraction,
                    progress_next_phase_fcn=progress_dialog.next_phase,
                    validate=False,
                    seg_filters=check.filters)

            #CSV files
            else:
                parser = CSVParser(check.input_filename)
                progress_dialog.show()
                segs = parser.parse(
                    progress_update_fcn=progress_dialog.set_fraction,
                    seg_filters=check.filters)

            progress_dialog.next_phase()

            if check.pick_randomly:
                #segs = ParserTools.pick_rand_segs(check.num_segs, segs)
                segs = ParserTools.hacked_pick_rand_segs(
                    check.num_segs, segs,
                    os.path.basename(check.input_filename))
            else:
                segs = ParserTools.pick_contiguous_segs(check.num_segs, segs)
            progress_dialog.set_fraction(1.0)

            if len(segs) < check.num_segs:
                progress_dialog.ensure_finish(
                )  #close the progress bar (even though there's still one phase left)
                UIUtils.show_message_dialog(
                    'The input file does not contain enough segments of the specified types.',
                    dialog_type=gtk.MessageType.ERROR)

            else:
                db = BLLDatabase()
                check.db_insert(db)

                for i in range(len(segs)):
                    if segs[i].db_id == None:
                        segs[i].db_insert(db)

                    test = Test(
                        check.db_id,
                        None,
                        None,
                        None,
                        segs[i],
                        None,
                        check.default_context_padding,
                    )
                    test.db_insert(db)
                    check.tests.append(test)

                    progress_dialog.set_fraction(
                        float(i + 1) / float(check.num_segs))

                db.close()
                progress_dialog.ensure_finish()

                self.window.destroy()
                TestWindow(check)