Exemplo n.º 1
0
def generate_fig(main):
    def data_received(tokens_freq_files, tokens_stats_files):
        if tokens_freq_files:
            measure_dispersion = settings['generation_settings']['measure_dispersion']
            measure_adjusted_freq = settings['generation_settings']['measure_adjusted_freq']

            col_dispersion = main.settings_global['measures_dispersion'][measure_dispersion]['col']
            col_adjusted_freq = main.settings_global['measures_adjusted_freq'][measure_adjusted_freq]['col']
            
            if settings['fig_settings']['use_data'] == main.tr('Frequency'):
                wordless_fig_freq.wordless_fig_freq(main, tokens_freq_files,
                                                    settings = settings['fig_settings'],
                                                    label_x = main.tr('Tokens'))
            else:
                if settings['fig_settings']['use_data'] == col_dispersion:
                    tokens_stat_files = {token: numpy.array(stats_files)[:, 0]
                                         for token, stats_files in tokens_stats_files.items()}

                    label_y = col_dispersion
                elif settings['fig_settings']['use_data'] == col_adjusted_freq:
                    tokens_stat_files = {token: numpy.array(stats_files)[:, 1]
                                         for token, stats_files in tokens_stats_files.items()}

                    label_y = col_adjusted_freq

                wordless_fig_stat.wordless_fig_stat(main, tokens_stat_files,
                                                    settings = settings['fig_settings'],
                                                    label_x = main.tr('Tokens'),
                                                    label_y = label_y)

            wordless_msg.wordless_msg_generate_fig_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_fig_error(main)

        dialog_progress.accept()

        if tokens_freq_files:
            wordless_fig.show_fig()

    settings = main.settings_custom['wordlist']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(main)

        worker_process_data = Wordless_Worker_Process_Data_Wordlist_Fig(main, dialog_progress, data_received)
        thread_process_data = wordless_threading.Wordless_Thread_Process_Data(worker_process_data)

        thread_process_data.start()

        dialog_progress.exec_()

        thread_process_data.quit()
        thread_process_data.wait()
    else:
        wordless_msg.wordless_msg_generate_fig_error(main)
Exemplo n.º 2
0
def generate_table(main, table):
    def data_received(tokens_freq_files, tokens_stats_files):
        if tokens_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_measure_dispersion = settings['generation_settings'][
                'measure_dispersion']
            text_measure_adjusted_freq = settings['generation_settings'][
                'measure_adjusted_freq']

            text_dispersion = main.settings_global['measures_dispersion'][
                text_measure_dispersion]['col']
            text_adjusted_freq = main.settings_global[
                'measures_adjusted_freq'][text_measure_adjusted_freq]['col']

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            if settings['token_settings']['use_tags']:
                table.setHorizontalHeaderLabels([
                    main.tr('Rank'),
                    main.tr('Tags'),
                    main.tr('Number of\nFiles Found')
                ])

            # Insert Columns (Files)
            for file in files:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 num=True,
                                 pct=True,
                                 cumulative=True,
                                 breakdown=True)

                table.insert_col(
                    table.columnCount() - 1,
                    main.tr(f'[{file["name"]}]\n{text_dispersion}'),
                    num=True,
                    breakdown=True)

                table.insert_col(
                    table.columnCount() - 1,
                    main.tr(f'[{file["name"]}]\n{text_adjusted_freq}'),
                    num=True,
                    breakdown=True)

            # Insert Columns (Total)
            table.insert_col(table.columnCount() - 1,
                             main.tr('Total\nFrequency'),
                             num=True,
                             pct=True,
                             cumulative=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_dispersion}'),
                             num=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_adjusted_freq}'),
                             num=True)

            # Sort by frequency of the first file
            table.sortByColumn(
                table.find_col(main.tr(f'[{files[0]["name"]}]\nFrequency')),
                Qt.DescendingOrder)

            cols_freq = table.find_cols(main.tr('\nFrequency'))
            cols_dispersion = table.find_cols(main.tr(f'\n{text_dispersion}'))
            cols_adjusted_freq = table.find_cols(
                main.tr(f'\n{text_adjusted_freq}'))
            col_files_found = table.find_col(main.tr('Number of\nFiles Found'))

            len_files = len(files)

            table.setRowCount(len(tokens_freq_files))

            for i, (token, freq_files) in enumerate(
                    wordless_sorting.sorted_tokens_freq_files(
                        tokens_freq_files)):
                stats_files = tokens_stats_files[token]

                # Rank
                table.set_item_num_int(i, 0, -1)

                # Tokens
                table.setItem(i, 1, wordless_table.Wordless_Table_Item(token))

                # Frequency
                for j, freq in enumerate(freq_files):
                    table.set_item_num_cumulative(i, cols_freq[j], freq)

                for j, (dispersion, adjusted_freq) in enumerate(stats_files):
                    # Dispersion
                    table.set_item_num_float(i, cols_dispersion[j], dispersion)

                    # Adjusted Frequency
                    table.set_item_num_float(i, cols_adjusted_freq[j],
                                             adjusted_freq)

                # Number of Files Found
                table.set_item_num_pct(
                    i, col_files_found,
                    len([freq for freq in freq_files[:-1] if freq]), len_files)

            table.blockSignals(False)
            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()

    settings = main.settings_custom['wordlist']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(
            main)

        worker_process_data = Wordless_Worker_Process_Data_Wordlist_Table(
            main, dialog_progress, data_received)
        thread_process_data = wordless_threading.Wordless_Thread_Process_Data(
            worker_process_data)

        thread_process_data.start()

        dialog_progress.exec_()

        thread_process_data.quit()
        thread_process_data.wait()
    else:
        wordless_msg.wordless_msg_generate_table_error(main)
Exemplo n.º 3
0
def generate_table(main, table):
    def data_received(keywords_freq_files, keywords_stats_files):
        if keywords_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_test_significance = settings['generation_settings'][
                'test_significance']
            text_measure_effect_size = settings['generation_settings'][
                'measure_effect_size']

            (text_test_stat, text_p_value,
             text_bayes_factor) = main.settings_global['tests_significance'][
                 'keywords'][text_test_significance]['cols']
            text_effect_size = main.settings_global['measures_effect_size'][
                'keywords'][text_measure_effect_size]['col']

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            # Insert columns (Files)
            table.insert_col(table.columnCount() - 1,
                             main.tr(f'[{ref_file["name"]}]\nFrequency'),
                             num=True,
                             pct=True,
                             cumulative=True)

            for file in files:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 num=True,
                                 pct=True,
                                 cumulative=True,
                                 breakdown=True)

                if text_test_stat:
                    table.insert_col(
                        table.columnCount() - 1,
                        main.tr(f'[{file["name"]}]\n{text_test_stat}'),
                        num=True,
                        breakdown=True)

                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'[{file["name"]}]\n{text_p_value}'),
                                 num=True,
                                 breakdown=True)

                if text_bayes_factor:
                    table.insert_col(
                        table.columnCount() - 1,
                        main.tr(f'[{file["name"]}]\n{text_bayes_factor}'),
                        num=True,
                        breakdown=True)

                table.insert_col(
                    table.columnCount() - 1,
                    main.tr(f'[{file["name"]}]\n{text_effect_size}'),
                    num=True,
                    breakdown=True)

            # Insert columns (Total)
            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\nFrequency'),
                             num=True,
                             pct=True,
                             cumulative=True)

            if text_test_stat:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'Total\n{text_test_stat}'),
                                 num=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_p_value}'),
                             num=True)

            if text_bayes_factor:
                table.insert_col(table.columnCount() - 1,
                                 main.tr(f'Total\n{text_bayes_factor}'),
                                 num=True)

            table.insert_col(table.columnCount() - 1,
                             main.tr(f'Total\n{text_effect_size}'),
                             num=True)

            # Sort by p-value of the first file
            table.sortByColumn(
                table.find_col(
                    main.tr(f'[{files[0]["name"]}]\n{text_p_value}')),
                Qt.AscendingOrder)

            cols_freq = table.find_cols(main.tr('\nFrequency'))

            if text_test_stat:
                cols_test_stat = table.find_cols(
                    main.tr(f'\n{text_test_stat}'))

            cols_p_value = table.find_cols(main.tr('\np-value'))

            if text_bayes_factor:
                cols_bayes_factor = table.find_cols(main.tr('\nBayes Factor'))

            cols_effect_size = table.find_cols(f'\n{text_effect_size}')
            col_number_files_found = table.find_col(
                main.tr('Number of\nFiles Found'))

            len_files = len(files)

            table.setRowCount(len(keywords_freq_files))

            for i, (keyword, stats_files) in enumerate(
                    wordless_sorting.sorted_keywords_stats_files(
                        keywords_stats_files)):
                keyword_freq_files = keywords_freq_files[keyword]

                # Rank
                table.set_item_num_int(i, 0, -1)

                # Keywords
                table.setItem(i, 1,
                              wordless_table.Wordless_Table_Item(keyword))

                # Frequency
                for j, freq in enumerate(keyword_freq_files):
                    table.set_item_num_cumulative(i, cols_freq[j], freq)

                for j, (test_stat, p_value, bayes_factor,
                        effect_size) in enumerate(stats_files):
                    # Test Statistic
                    if text_test_stat:
                        table.set_item_num_float(i, cols_test_stat[j],
                                                 test_stat)

                    # p-value
                    table.set_item_num_float(i, cols_p_value[j], p_value)

                    # Bayes Factor
                    if text_bayes_factor:
                        table.set_item_num_float(i, cols_bayes_factor[j],
                                                 bayes_factor)

                    # Effect Size
                    table.set_item_num_float(i, cols_effect_size[j],
                                             effect_size)

                # Number of Files Found
                table.set_item_num_pct(
                    i, col_number_files_found,
                    len([freq for freq in keyword_freq_files[1:-1] if freq]),
                    len_files)

            table.blockSignals(False)
            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()

    settings = main.settings_custom['keywords']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        ref_file = main.wordless_files.find_file_by_name(
            settings['generation_settings']['ref_file'], selected_only=True)

        files = [
            file for file in main.wordless_files.get_selected_files()
            if file != ref_file
        ]

        if files:
            dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(
                main)

            worker_process_data = Wordless_Worker_Process_Data_Keywords_Table(
                main, dialog_progress, data_received)
            thread_process_data = wordless_threading.Wordless_Thread_Process_Data(
                worker_process_data)

            thread_process_data.start()

            dialog_progress.exec_()

            thread_process_data.quit()
            thread_process_data.wait()
        else:
            wordless_msg_box.wordless_msg_box_missing_observed_file(main)

            wordless_msg.wordless_msg_generate_table_error(main)
    else:
        wordless_msg.wordless_msg_generate_table_error(main)
Exemplo n.º 4
0
def generate_fig(main):
    def data_received(keywords_freq_files, keywords_stats_files):
        if keywords_freq_files:
            text_test_significance = settings['generation_settings'][
                'test_significance']
            text_measure_effect_size = settings['generation_settings'][
                'measure_effect_size']

            (text_test_stat, text_p_value,
             text_bayes_factor) = main.settings_global['tests_significance'][
                 'keywords'][text_test_significance]['cols']
            text_effect_size = main.settings_global['measures_effect_size'][
                'keywords'][text_measure_effect_size]['col']

            if settings['fig_settings']['use_data'] == main.tr('Frequency'):
                wordless_fig_freq.wordless_fig_freq_ref(
                    main,
                    keywords_freq_files,
                    ref_file=ref_file,
                    settings=settings['fig_settings'],
                    label_x=main.tr('Keywords'))
            else:
                if settings['fig_settings']['use_data'] == text_test_stat:
                    keywords_stat_files = {
                        keyword: numpy.array(stats_files)[:, 0]
                        for keyword, stats_files in
                        keywords_stats_files.items()
                    }

                    label_y = text_test_stat
                elif settings['fig_settings']['use_data'] == text_p_value:
                    keywords_stat_files = {
                        keyword: numpy.array(stats_files)[:, 1]
                        for keyword, stats_files in
                        keywords_stats_files.items()
                    }

                    label_y = text_p_value
                elif settings['fig_settings']['use_data'] == text_bayes_factor:
                    keywords_stat_files = {
                        keyword: numpy.array(stats_files)[:, 2]
                        for keyword, stats_files in
                        keywords_stats_files.items()
                    }

                    label_y = text_bayes_factor
                elif settings['fig_settings']['use_data'] == text_effect_size:
                    keywords_stat_files = {
                        keyword: numpy.array(stats_files)[:, 3]
                        for keyword, stats_files in
                        keywords_stats_files.items()
                    }

                    label_y = text_effect_size

                wordless_fig_stat.wordless_fig_stat_ref(
                    main,
                    keywords_stat_files,
                    ref_file=ref_file,
                    settings=settings['fig_settings'],
                    label_y=label_y)

            wordless_msg.wordless_msg_generate_fig_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_fig_error(main)

        dialog_progress.accept()

        if keywords_freq_files:
            wordless_fig.show_fig()

    settings = main.settings_custom['keywords']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        ref_file = main.wordless_files.find_file_by_name(
            settings['generation_settings']['ref_file'], selected_only=True)

        files = [
            file for file in main.wordless_files.get_selected_files()
            if file != ref_file
        ]

        if files:
            dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(
                main)

            worker_process_data = Wordless_Worker_Process_Data_Keywords_Fig(
                main, dialog_progress, data_received)
            thread_process_data = wordless_threading.Wordless_Thread_Process_Data(
                worker_process_data)

            thread_process_data.start()

            dialog_progress.exec_()

            thread_process_data.quit()
            thread_process_data.wait()
        else:
            wordless_msg_box.wordless_msg_box_missing_observed_file(main)

            wordless_msg.wordless_msg_generate_fig_error(main)
    else:
        wordless_msg.wordless_msg_generate_fig_error(main)
Exemplo n.º 5
0
def generate_table(main, table):
    def update_gui(keywords_freq_files, keywords_stats_files):
        if keywords_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_test_significance = settings['generation_settings']['test_significance']
            text_measure_effect_size = settings['generation_settings']['measure_effect_size']

            (text_test_stat,
             text_p_value,
             text_bayes_factor) = main.settings_global['tests_significance']['keyword'][text_test_significance]['cols']
            text_effect_size =  main.settings_global['measures_effect_size']['keyword'][text_measure_effect_size]['col']

            # Insert columns (files)
            table.insert_col(table.columnCount() - 2,
                             main.tr(f'[{ref_file["name"]}]\nFrequency'),
                             is_int = True, is_cumulative = True)
            table.insert_col(table.columnCount() - 2,
                             main.tr(f'[{ref_file["name"]}]\nFrequency %'),
                             is_pct = True, is_cumulative = True)

            for file in files:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 is_int = True, is_cumulative = True, is_breakdown = True)
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency %'),
                                 is_pct = True, is_cumulative = True, is_breakdown = True)

                if text_test_stat:
                    table.insert_col(table.columnCount() - 2,
                                     main.tr(f'[{file["name"]}]\n{text_test_stat}'),
                                     is_float = True, is_breakdown = True)

                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\n{text_p_value}'),
                                 is_float = True, is_breakdown = True)

                if text_bayes_factor:
                    table.insert_col(table.columnCount() - 2,
                                     main.tr(f'[{file["name"]}]\n{text_bayes_factor}'),
                                     is_float = True, is_breakdown = True)

                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\n{text_effect_size}'),
                                 is_float = True, is_breakdown = True)

            # Insert columns (total)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency'),
                             is_int = True, is_cumulative = True)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency %'),
                             is_pct = True, is_cumulative = True)

            if text_test_stat:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'Total\n{text_test_stat}'),
                                 is_float = True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_p_value}'),
                             is_float = True)

            if text_bayes_factor:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'Total\n{text_bayes_factor}'),
                                 is_float = True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_effect_size}'),
                             is_float = True)

            # Sort by p-value of the first file
            table.horizontalHeader().setSortIndicator(
                table.find_col(main.tr(f'[{files[0]["name"]}]\n{text_p_value}')),
                Qt.AscendingOrder
            )

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            cols_freq = table.find_cols(main.tr('\nFrequency'))
            cols_freq_pct = table.find_cols(main.tr('\nFrequency %'))

            for col in cols_freq_pct:
                cols_freq.remove(col)

            if text_test_stat:
                cols_test_stat = table.find_cols(main.tr(f'\n{text_test_stat}'))

            cols_p_value = table.find_cols(main.tr('\np-value'))

            if text_bayes_factor:
                cols_bayes_factor = table.find_cols(main.tr('\nBayes Factor'))

            cols_effect_size = table.find_cols(f'\n{text_effect_size}')
            col_files_found = table.find_col(main.tr('Number of\nFiles Found'))
            col_files_found_pct = table.find_col(main.tr('Number of\nFiles Found %'))

            freq_totals = numpy.array(list(keywords_freq_files.values())).sum(axis = 0)
            len_files = len(files)

            table.setRowCount(len(keywords_freq_files))

            for i, (keyword, stats_files) in enumerate(wordless_sorting.sorted_keywords_stats_files(keywords_stats_files)):
                freq_files = keywords_freq_files[keyword]

                # Rank
                table.set_item_num(i, 0, -1)

                # Keyword
                table.setItem(i, 1, wordless_table.Wordless_Table_Item(keyword))

                # Frequency
                for j, freq in enumerate(freq_files):
                    table.set_item_num(i, cols_freq[j], freq)
                    table.set_item_num(i, cols_freq_pct[j], freq, freq_totals[j])

                for j, (test_stat, p_value, bayes_factor, effect_size) in enumerate(stats_files):
                    # Test Statistic
                    if text_test_stat:
                        table.set_item_num(i, cols_test_stat[j], test_stat)

                    # p-value
                    table.set_item_num(i, cols_p_value[j], p_value)

                    # Bayes Factor
                    if text_bayes_factor:
                        table.set_item_num(i, cols_bayes_factor[j], bayes_factor)

                    # Effect Size
                    table.set_item_num(i, cols_effect_size[j], effect_size)

                # Number of Files Found
                num_files_found = len([freq for freq in freq_files[1:-1] if freq])

                table.set_item_num(i, col_files_found, num_files_found)
                table.set_item_num(i, col_files_found_pct, num_files_found, len_files)

            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

    settings = main.settings_custom['keyword']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        ref_file = main.wordless_files.find_file_by_name(
            settings['generation_settings']['ref_file'],
            selected_only = True
        )

        files = [file
                 for file in main.wordless_files.get_selected_files()
                 if file != ref_file]

        if files:
            dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(main)

            worker_keyword_table = Wordless_Worker_Keyword_Table(
                main,
                dialog_progress = dialog_progress,
                update_gui = update_gui
            )

            thread_keyword_table = wordless_threading.Wordless_Thread(worker_keyword_table)
            thread_keyword_table.start_worker()
        else:
            wordless_msg_box.wordless_msg_box_missing_observed_file(main)

            wordless_msg.wordless_msg_generate_table_error(main)
    else:
        wordless_msg.wordless_msg_generate_table_error(main)
Exemplo n.º 6
0
def generate_table(main, table):
    def data_received(texts_stats_files):
        if any(itertools.chain.from_iterable(texts_stats_files)):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            len_tokens_files = []

            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')),
                                 file['name'],
                                 breakdown=True)

            for i, stats in enumerate(texts_stats_files):
                len_paras_in_sentence = stats[0]
                len_paras_in_clause = stats[1]
                len_paras_in_token = stats[2]
                len_sentences = stats[3]
                len_clauses = stats[4]
                len_tokens = stats[5]
                len_types = stats[6]
                ttr = stats[7]
                sttr = stats[8]

                count_paras = len(len_paras_in_token)
                count_sentences = len(len_sentences)
                count_clauses = len(len_clauses)
                count_tokens = len(len_tokens)
                count_types = len(len_types)
                count_chars = sum(len_tokens)

                # Count of Paragraphs
                table.set_item_num_cumulative(0, i, count_paras)
                # Count of Sentences
                table.set_item_num_cumulative(1, i, count_sentences)
                # Count of Clauses
                table.set_item_num_cumulative(2, i, count_clauses)
                # Count of Tokens
                table.set_item_num_cumulative(3, i, count_tokens)
                # Count of Types
                table.set_item_num_pct(4, i, count_types)
                # Count of Characters
                table.set_item_num_cumulative(5, i, count_chars)
                # Type-token Ratio
                table.set_item_num_float(6, i, ttr)
                # Type-token Ratio (Standardized)
                table.set_item_num_float(7, i, sttr)

                # Paragraph Length
                if count_paras == 0:
                    table.set_item_num_float(8, i, 0)
                    table.set_item_num_float(9, i, 0)
                    table.set_item_num_float(10, i, 0)
                    table.set_item_num_float(11, i, 0)
                    table.set_item_num_float(12, i, 0)
                    table.set_item_num_float(13, i, 0)
                else:
                    table.set_item_num_float(8, i,
                                             numpy.mean(len_paras_in_sentence))
                    table.set_item_num_float(9, i,
                                             numpy.std(len_paras_in_sentence))
                    table.set_item_num_float(10, i,
                                             numpy.mean(len_paras_in_clause))
                    table.set_item_num_float(11, i,
                                             numpy.std(len_paras_in_clause))
                    table.set_item_num_float(12, i,
                                             numpy.mean(len_paras_in_token))
                    table.set_item_num_float(13, i,
                                             numpy.std(len_paras_in_token))

                # Sentence Length
                if count_sentences == 0:
                    table.set_item_num_float(14, i, 0)
                    table.set_item_num_float(15, i, 0)
                else:
                    table.set_item_num_float(14, i, numpy.mean(len_sentences))
                    table.set_item_num_float(15, i, numpy.std(len_sentences))

                # Clause Length
                if count_clauses == 0:
                    table.set_item_num_float(16, i, 0)
                    table.set_item_num_float(17, i, 0)
                else:
                    table.set_item_num_float(16, i, numpy.mean(len_clauses))
                    table.set_item_num_float(17, i, numpy.std(len_clauses))

                # Token Length
                if count_tokens == 0:
                    table.set_item_num_float(18, i, 0)
                    table.set_item_num_float(19, i, 0)
                else:
                    table.set_item_num_float(18, i, numpy.mean(len_tokens))
                    table.set_item_num_float(19, i, numpy.std(len_tokens))

                # Type Length
                if count_types == 0:
                    table.set_item_num_float(20, i, 0)
                    table.set_item_num_float(21, i, 0)
                else:
                    table.set_item_num_float(20, i, numpy.mean(len_types))
                    table.set_item_num_float(21, i, numpy.std(len_types))

                len_tokens_files.append(collections.Counter(len_tokens))

            # Count of n-length Tokens
            len_files = len(files)
            len_tokens_total = wordless_misc.merge_dicts(len_tokens_files)
            len_tokens_max = max(len_tokens_total)

            # Use tags only
            if settings['token_settings']['use_tags']:
                table.setVerticalHeaderLabels([
                    main.tr('Count of Paragraphs'),
                    main.tr('Count of Sentences'),
                    main.tr('Count of Clauses'),
                    main.tr('Count of Tags'),
                    main.tr('Count of Tag Types'),
                    main.tr('Count of Characters'),
                    main.tr('Type-tag Ratio'),
                    main.tr('Type-tag Ratio (Standardized)'),
                    main.tr('Paragraph Length in Sentence (Mean)'),
                    main.tr(
                        'Paragraph Length in Sentence (Standard Deviation)'),
                    main.tr('Paragraph Length in Sentence (Mean)'),
                    main.tr(
                        'Paragraph Length in Sentence (Standard Deviation)'),
                    main.tr('Paragraph Length in Tag (Mean)'),
                    main.tr('Paragraph Length in Tag (Standard Deviation)'),
                    main.tr('Sentence Length in Tag (Mean)'),
                    main.tr('Sentence Length in Tag (Standard Deviation)'),
                    main.tr('Clause Length in Tag (Mean)'),
                    main.tr('Clause Length in Tag (Standard Deviation)'),
                    main.tr('Tag Length in Character (Mean)'),
                    main.tr('Tag Length in Character (Standard Deviation)'),
                    main.tr('Tag Type Length in Character (Mean)'),
                    main.tr(
                        'Tag Type Length in Character (Standard Deviation)')
                ])

                for i in range(len_tokens_max):
                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tags'),
                                     num=True,
                                     pct=True,
                                     cumulative=True)
            else:
                for i in range(len_tokens_max):

                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-length Tokens'),
                        num=True,
                        pct=True,
                        cumulative=True)

            for i in range(len_tokens_max):
                freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1))

                for j, freq in enumerate(freqs):
                    table.set_item_num_cumulative(
                        table.rowCount() - len_tokens_max + i, j, freq)

            table.blockSignals(False)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()

    settings = main.settings_custom['overview']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(
            main)

        worker_process_data = Wordless_Worker_Process_Data_Overview_Table(
            main, dialog_progress, data_received)
        thread_process_data = wordless_threading.Wordless_Thread_Process_Data(
            worker_process_data)

        thread_process_data.start()

        dialog_progress.exec_()

        thread_process_data.quit()
        thread_process_data.wait()
    else:
        wordless_msg.wordless_msg_generate_table_error(main)
Exemplo n.º 7
0
def generate_table(main, table):
    def update_gui(tokens_freq_files, tokens_stats_files):
        if tokens_freq_files:
            table.clear_table()

            table.settings = copy.deepcopy(main.settings_custom)

            text_measure_dispersion = settings['generation_settings'][
                'measure_dispersion']
            text_measure_adjusted_freq = settings['generation_settings'][
                'measure_adjusted_freq']

            text_dispersion = main.settings_global['measures_dispersion'][
                text_measure_dispersion]['col']
            text_adjusted_freq = main.settings_global[
                'measures_adjusted_freq'][text_measure_adjusted_freq]['col']

            if settings['token_settings']['use_tags']:
                table.horizontalHeaderItem(1).setText(main.tr('Tag'))

            # Insert columns (files)
            for file in files:
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency'),
                                 is_int=True,
                                 is_cumulative=True,
                                 is_breakdown=True)
                table.insert_col(table.columnCount() - 2,
                                 main.tr(f'[{file["name"]}]\nFrequency %'),
                                 is_pct=True,
                                 is_cumulative=True,
                                 is_breakdown=True)

                table.insert_col(
                    table.columnCount() - 2,
                    main.tr(f'[{file["name"]}]\n{text_dispersion}'),
                    is_float=True,
                    is_breakdown=True)

                table.insert_col(
                    table.columnCount() - 2,
                    main.tr(f'[{file["name"]}]\n{text_adjusted_freq}'),
                    is_float=True,
                    is_breakdown=True)

            # Insert columns (total)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency'),
                             is_int=True,
                             is_cumulative=True)
            table.insert_col(table.columnCount() - 2,
                             main.tr('Total\nFrequency %'),
                             is_pct=True,
                             is_cumulative=True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_dispersion}'),
                             is_float=True)

            table.insert_col(table.columnCount() - 2,
                             main.tr(f'Total\n{text_adjusted_freq}'),
                             is_float=True)

            # Sort by frequency of the first file
            table.horizontalHeader().setSortIndicator(
                table.find_col(main.tr(f'[{files[0]["name"]}]\nFrequency')),
                Qt.DescendingOrder)

            table.blockSignals(True)
            table.setSortingEnabled(False)
            table.setUpdatesEnabled(False)

            cols_freq = table.find_cols(main.tr('\nFrequency'))
            cols_freq_pct = table.find_cols(main.tr('\nFrequency %'))

            for col in cols_freq_pct:
                cols_freq.remove(col)

            cols_dispersion = table.find_cols(main.tr(f'\n{text_dispersion}'))
            cols_adjusted_freq = table.find_cols(
                main.tr(f'\n{text_adjusted_freq}'))
            col_files_found = table.find_col(main.tr('Number of\nFiles Found'))
            col_files_found_pct = table.find_col(
                main.tr('Number of\nFiles Found %'))

            freq_totals = numpy.array(list(
                tokens_freq_files.values())).sum(axis=0)
            len_files = len(files)

            table.setRowCount(len(tokens_freq_files))

            for i, (token, freq_files) in enumerate(
                    wordless_sorting.sorted_tokens_freq_files(
                        tokens_freq_files)):
                stats_files = tokens_stats_files[token]

                # Rank
                table.set_item_num(i, 0, -1)

                # Token
                table.setItem(i, 1, wordless_table.Wordless_Table_Item(token))

                # Frequency
                for j, freq in enumerate(freq_files):
                    table.set_item_num(i, cols_freq[j], freq)
                    table.set_item_num(i, cols_freq_pct[j], freq,
                                       freq_totals[j])

                for j, (dispersion, adjusted_freq) in enumerate(stats_files):
                    # Dispersion
                    table.set_item_num(i, cols_dispersion[j], dispersion)

                    # Adjusted Frequency
                    table.set_item_num(i, cols_adjusted_freq[j], adjusted_freq)

                # Number of Files Found
                num_files_found = len(
                    [freq for freq in freq_files[:-1] if freq])

                table.set_item_num(i, col_files_found, num_files_found)
                table.set_item_num(i, col_files_found_pct, num_files_found,
                                   len_files)

            table.setSortingEnabled(True)
            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_ranks()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

    settings = main.settings_custom['wordlist']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(
            main)

        worker_wordlist_table = Wordless_Worker_Wordlist_Table(
            main, dialog_progress=dialog_progress, update_gui=update_gui)

        thread_wordlist_table = wordless_threading.Wordless_Thread(
            worker_wordlist_table)
        thread_wordlist_table.start_worker()
    else:
        wordless_msg.wordless_msg_generate_table_error(main)
Exemplo n.º 8
0
def generate_table(main, table):
    def update_gui(texts_stats_files):
        if any(itertools.chain.from_iterable(texts_stats_files)):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            count_tokens_lens = []

            # Insert column (total)
            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')),
                                 file['name'],
                                 is_breakdown=True)

            count_paras_total = len(texts_stats_files[-1][0])
            count_sentences_total = len(texts_stats_files[-1][3])
            count_clauses_total = len(texts_stats_files[-1][4])
            count_tokens_total = len(texts_stats_files[-1][5])
            count_types_total = len(texts_stats_files[-1][6])
            count_chars_total = sum(texts_stats_files[-1][5])

            for i, stats in enumerate(texts_stats_files):
                len_paras_in_sentence = stats[0]
                len_paras_in_clause = stats[1]
                len_paras_in_token = stats[2]
                len_sentences = stats[3]
                len_clauses = stats[4]
                len_tokens = stats[5]
                len_types = stats[6]
                ttr = stats[7]
                sttr = stats[8]

                count_paras = len(len_paras_in_sentence)
                count_sentences = len(len_sentences)
                count_clauses = len(len_clauses)
                count_tokens = len(len_tokens)
                count_types = len(len_types)
                count_chars = sum(len_tokens)

                # Count of Paragraphs
                table.set_item_num(0, i, count_paras)
                table.set_item_num(1, i, count_paras, count_paras_total)
                # Count of Sentences
                table.set_item_num(2, i, count_sentences)
                table.set_item_num(3, i, count_sentences,
                                   count_sentences_total)
                # Count of Clauses
                table.set_item_num(4, i, count_clauses)
                table.set_item_num(5, i, count_clauses, count_clauses_total)
                # Count of Tokens
                table.set_item_num(6, i, count_tokens)
                table.set_item_num(7, i, count_tokens, count_tokens_total)
                # Count of Types
                table.set_item_num(8, i, count_types)
                table.set_item_num(9, i, count_types, count_types_total)
                # Count of Characters
                table.set_item_num(10, i, count_chars)
                table.set_item_num(11, i, count_chars, count_chars_total)
                # Type-Token Ratio
                table.set_item_num(12, i, ttr)
                # Type-Token Ratio (Standardized)
                table.set_item_num(13, i, sttr)

                # Paragraph Length
                if count_paras == 0:
                    table.set_item_num(14, i, 0)
                    table.set_item_num(15, i, 0)
                    table.set_item_num(16, i, 0)
                    table.set_item_num(17, i, 0)
                    table.set_item_num(18, i, 0)
                    table.set_item_num(19, i, 0)
                else:
                    table.set_item_num(14, i,
                                       numpy.mean(len_paras_in_sentence))
                    table.set_item_num(15, i, numpy.std(len_paras_in_sentence))
                    table.set_item_num(16, i, numpy.mean(len_paras_in_clause))
                    table.set_item_num(17, i, numpy.std(len_paras_in_clause))
                    table.set_item_num(18, i, numpy.mean(len_paras_in_token))
                    table.set_item_num(19, i, numpy.std(len_paras_in_token))

                # Sentence Length
                if count_sentences == 0:
                    table.set_item_num(20, i, 0)
                    table.set_item_num(21, i, 0)
                else:
                    table.set_item_num(20, i, numpy.mean(len_sentences))
                    table.set_item_num(21, i, numpy.std(len_sentences))

                # Clause Length
                if count_clauses == 0:
                    table.set_item_num(22, i, 0)
                    table.set_item_num(23, i, 0)
                else:
                    table.set_item_num(22, i, numpy.mean(len_clauses))
                    table.set_item_num(23, i, numpy.std(len_clauses))

                # Token Length
                if count_tokens == 0:
                    table.set_item_num(24, i, 0)
                    table.set_item_num(25, i, 0)
                else:
                    table.set_item_num(24, i, numpy.mean(len_tokens))
                    table.set_item_num(25, i, numpy.std(len_tokens))

                # Type Length
                if count_types == 0:
                    table.set_item_num(26, i, 0)
                    table.set_item_num(27, i, 0)
                else:
                    table.set_item_num(26, i, numpy.mean(len_types))
                    table.set_item_num(27, i, numpy.std(len_types))

                count_tokens_lens.append(collections.Counter(len_tokens))

            # Count of n-length Tokens
            if any(count_tokens_lens):
                len_files = len(files)
                count_tokens_lens_files = wordless_misc.merge_dicts(
                    count_tokens_lens)
                count_tokens_lens_total = {
                    len_token: count_tokens_files[-1]
                    for len_token, count_tokens_files in
                    count_tokens_lens_files.items()
                }
                len_tokens_max = max(count_tokens_lens_files)

                for i in range(len_tokens_max):
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens'),
                        is_int=True,
                        is_cumulative=True)
                    table.insert_row(
                        table.rowCount(),
                        main.tr(f'Count of {i + 1}-Length Tokens %'),
                        is_pct=True,
                        is_cumulative=True)

                for i in range(len_tokens_max):
                    counts = count_tokens_lens_files.get(
                        i + 1, [0] * (len_files + 1))

                    for j, count in enumerate(counts):
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2,
                                           col=j,
                                           val=count)
                        table.set_item_num(row=table.rowCount() -
                                           (len_tokens_max - i) * 2 + 1,
                                           col=j,
                                           val=count,
                                           total=count_tokens_lens_total.get(
                                               i + 1, 0))

            table.setUpdatesEnabled(True)
            table.blockSignals(False)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

    settings = main.settings_custom['overview']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(
            main)

        worker_overview_table = Wordless_Worker_Overview_Table(
            main, dialog_progress=dialog_progress, update_gui=update_gui)

        thread_overview_table = wordless_threading.Wordless_Thread(
            worker_overview_table)
        thread_overview_table.start_worker()
    else:
        wordless_msg.wordless_msg_generate_table_error(main)
Exemplo n.º 9
0
def generate_table(main, table):
    def data_received(texts_stats_files, texts_len_tokens_files):
        if any(texts_len_tokens_files):
            table.settings = copy.deepcopy(main.settings_custom)

            table.blockSignals(True)
            table.setUpdatesEnabled(False)

            table.clear_table()

            for i, file in enumerate(files):
                table.insert_col(table.find_col(main.tr('Total')), file['name'], breakdown = True)

            for i, stats in enumerate(texts_stats_files):
                count_paras = stats[0]
                count_sentences = stats[1]
                count_tokens = stats[2]
                count_types = stats[3]
                count_chars = stats[4]
                ttr = stats[5]
                sttr = stats[6]

                table.set_item_num_cumulative(0, i, count_paras)
                table.set_item_num_cumulative(1, i, count_sentences)
                table.set_item_num_cumulative(2, i, count_tokens)
                table.set_item_num_pct(3, i, count_types)
                table.set_item_num_cumulative(4, i, count_chars)
                table.set_item_num_float(5, i, ttr)
                table.set_item_num_float(6, i, sttr)

                if count_paras == 0:
                    table.set_item_num_float(7, i, 0)
                    table.set_item_num_float(8, i, 0)
                else:
                    table.set_item_num_float(7, i, count_sentences / count_paras)
                    table.set_item_num_float(8, i, count_tokens / count_paras)

                if count_sentences == 0:
                    table.set_item_num_float(9, i, 0)
                else:
                    table.set_item_num_float(9, i, count_tokens / count_sentences)

                if count_tokens == 0:
                    table.set_item_num_float(10, i, 0)
                else:
                    table.set_item_num_float(10, i, count_chars / count_tokens)

            # Count of n-length Tokens
            len_tokens_total = wordless_misc.merge_dicts(texts_len_tokens_files)
            len_tokens_max = max(len_tokens_total)

            if settings['token_settings']['use_tags']:
                table.setVerticalHeaderLabels([
                    main.tr('Count of Paragraphs'),
                    main.tr('Count of Sentences'),
                    main.tr('Count of Tags'),
                    main.tr('Count of Tag Types'),
                    main.tr('Count of Characters'),
                    main.tr('Type-Tag Ratio'),
                    main.tr('Type-Tag Ratio (Standardized)'),
                    main.tr('Average Paragraph Length (in Sentence)'),
                    main.tr('Average Paragraph Length (in Tag)'),
                    main.tr('Average Sentence Length (in Tag)'),
                    main.tr('Average Tag Length (in Character)')
                ])

                for i in range(len_tokens_max):
                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tags'),
                                     num = True, pct = True, cumulative = True)
            else:
                for i in range(len_tokens_max):

                    table.insert_row(table.rowCount(),
                                     main.tr(f'Count of {i + 1}-length Tokens'),
                                     num = True, pct = True, cumulative = True)

            len_files = len(files)

            for i in range(len_tokens_max):
                freqs = len_tokens_total.get(i + 1, [0] * (len_files + 1))

                for j, freq in enumerate(freqs):
                    table.set_item_num_cumulative(table.rowCount() - len_tokens_max + i, j, freq)

            table.blockSignals(False)
            table.setUpdatesEnabled(True)

            table.toggle_pct()
            table.toggle_cumulative()
            table.toggle_breakdown()
            table.update_items_width()

            table.itemChanged.emit(table.item(0, 0))

            wordless_msg.wordless_msg_generate_table_success(main)
        else:
            wordless_msg_box.wordless_msg_box_no_results(main)

            wordless_msg.wordless_msg_generate_table_error(main)

        dialog_progress.accept()

    settings = main.settings_custom['overview']
    files = main.wordless_files.get_selected_files()

    if wordless_checking_file.check_files_on_loading(main, files):
        dialog_progress = wordless_dialog_misc.Wordless_Dialog_Progress_Process_Data(main)

        worker_process_data = Wordless_Worker_Process_Data_Overview_Table(main, dialog_progress, data_received)
        thread_process_data = wordless_threading.Wordless_Thread_Process_Data(worker_process_data)

        thread_process_data.start()

        dialog_progress.exec_()

        thread_process_data.quit()
        thread_process_data.wait()
    else:
        wordless_msg.wordless_msg_generate_table_error(main)