Exemplo n.º 1
0
    def build_rs_frequs(self, variables_calculation: list, filter_string: str=None, variables_calculation_avg: list=None, title: str=None,
                        codeplan: CodePlan=None) -> ReportingSet:
        """Build a reportingset based on frequencies.

        :param variables_calculation:
        :param filter_string:
        :param variables_calculation_avg:
        :param title:
        :param codeplan:
        """
        self.check_setup()

        table_column_names = self.db.query_table_column_names(self.database_table)
        for variable_name in variables_calculation:
            if variable_name not in table_column_names:
                raise Exception("Variable '{}' not found in {}!".format(variable_name, self.database_table))
        self.variables_calculation_avg = variables_calculation_avg  # XXX umbenennen
        rs = ReportingSet(title=title)
        rs['TOTAL'] = self._calc_frequencies(variables_calculation=variables_calculation, filter_string=filter_string)
        if codeplan:
            rs.split_main = codeplan
            rs.split_main.key_order + [key for key in rs['TOTAL']['COUNT'] if key not in codeplan]  # # Add missing codes
            keys_sorted = None
        else:
            if len(rs['TOTAL']) > 0:
                rs.split_main.data = list(rs['TOTAL']['COUNT'].keys())
                keys_sorted = None
                if self.order_freq_desc:
                    keys_sorted = sorted(copy.copy(rs['TOTAL']['COUNT']).items(), key=operator.itemgetter(1))
                    keys_sorted.reverse()
                    rs.split_main.key_order = [chunk[0] for chunk in keys_sorted]
                    if 'OTHER' in rs.split_main.key_order:
                        rs.split_main.key_order.remove('OTHER')
                        rs.split_main.key_order.append('OTHER')
                rs.split_main.variables = variables_calculation
        if self.result_subsplits:
            self.calc_splits_sub(rs, filter_string, variables_calculation)
        rs.timestamp = datetime.datetime.now().strftime('%d.%m.%Y %H:%M:%S')
        return rs
Exemplo n.º 2
0
    def _build_table_tex(self, rs: ReportingSet, head=None):
        """
        :param rs: ReportingSet to be texed
        :param head:
        :return:
        """
        rs_txt = ""
        codes_displayed_max_number = 90  # FIXME XXX not working.
        mainsplit_width = self.split_main_width_cm
        mainsplit_columns_count = 1

        table_heads = []
        data_columns_count = len(self.columns_reporting)
        if not head:
            table_heads.append(CodePlan())  # add empty codeplan if no head is present
        elif isinstance(head, CodePlan):
            data_columns_count *= len(head) + 1  # + 1 for total
            table_heads.append(head)
        elif isinstance(head, TabHead):
            if head.is_subhead:  # unschön  FIXME
                table_heads = head
                data_columns_count = len(self.columns_reporting) * (sum([len(element) for element in table_heads]) + 1)
            else:  # unschön
                raise Exception("WTF")
        else:
            raise Exception("Unkown type found as table head." + str(type(head)))

        """Filtern auf relevante Datensets, 'TOTAL' gehört immer dazu."""
        datasets_names_relevant = [head_sub.variables[0] + "_" + str(code)
                                   for head_sub in table_heads
                                   for code in head_sub.key_order]

        datasets_names_relevant.insert(0, 'TOTAL')
        datasets = {name: rs[name] for name in datasets_names_relevant if name in rs}
        # Bereinigen der Labels. Wenn mehr als 8 labels, dann nur die Labels mit Werten.
        if len(rs.split_main.key_order) >= codes_displayed_max_number:
            tempOrder = []
            for key in rs.split_main.key_order:
                if datasets['TOTAL']['COUNT'] and key in datasets['TOTAL']['COUNT'].keys():
                    tempOrder.append(key)
            codes_trailing = (96, 99)  # 'Weiß' nicht und 'keine Angabe' werden an das Ende gesetzt. TODO parametrisiere
            for code in codes_trailing:
                if code in tempOrder:
                    tempOrder.pop(tempOrder.index(code))
                    tempOrder.append(code)
            rs.split_main.key_order = tempOrder

        # Sonderschleife fuer den Titel. Der Titel wird nur einmal in das Inhaltsverzeichnis übernommen.
        if len(table_heads) == 0:
            raise Exception("Warum ist der Tablehead nicht >0?")
        tableHead_tex = len(self.columns_reporting) * "&"
        subHeadDataCols = 0
        for head_sub in table_heads:
            if len(head_sub) > 0:
                tableHead_tex += r"&\multicolumn{" + str(len(self.columns_reporting) * len(head_sub)) + "}{l}{" + str(
                    self._convert_symbols2tex(head_sub.title)) + "}"
                subHeadDataCols += len(head_sub)
        tableHead_tex += r"\\" + "\n "
        if data_columns_count - len(self.columns_reporting) > 0:
            midrule_start = len(self.columns_reporting) + mainsplit_columns_count + 1
            midrule_end = midrule_start + subHeadDataCols * len(self.columns_reporting) - 1
            tableHead_tex += r"\cmidrule(l){" + str(midrule_start) + "-" + str(midrule_end) + "}"

        #########################################
        # ## Zeile mit Codes als Spaltennamen
        #########################################
        tableHead_tex += r"&\multicolumn{{{span}}}{{l}}{{GESAMT}}".format(span=len(self.columns_reporting))

        codes_head = ['TOTAL'] + [head_sub.variables[0] + "_" + str(key)
                                  for head_sub in table_heads
                                  for key in head_sub.key_order]
        # ## Kopfbeschriftung
        if len(self.columns_reporting) < 3:
            width = "1.9cm"
        elif len(self.columns_reporting) == 3:
            width = "4cm"
        else:
            width = "6cm"
        for head_sub in table_heads:  ####XXXXX warum fuer alle unterkoepfe?
            for code in head_sub.key_order:
                insert = self._convert_symbols2tex(head_sub[code])
                if self._display_label == "both" and str(code) != insert:
                    insert = str(code) + " " + insert
                tableHead_tex += r"&\multicolumn{{{columns}}}{{p{{{width}}}}}{{{insert}}}".format(
                    columns=str(len(self.columns_reporting)), width=width, insert=insert)

        # ##XXXXXXXXXX brauche ich die rs vor dem STring??
        tableHead_tex += r"\\ " + "\n "
        ##### Unterste Kopfzeile mit Spalteninhalt.
        tableHead_tex += "Code Label" if self._display_label == "both" else "Kategorie"
        tableHead_tex += ''.join(
            ["&\multicolumn{1}{c}{" + r"\footnotesize{" + self.column_type_labels[reporting_column_type] + "}}" for
             reporting_column_type in
             self.columns_reporting])
        tableHead_tex += ''.join(
            ["&\multicolumn{1}{c}{" + r"\footnotesize{" + self.column_type_labels[reporting_column_type] + "}}" for code
             in head_sub.key_order
             for subHead in table_heads for reporting_column_type in self.columns_reporting])
        #        for subHead in tableHeads:
        #            for code in subHead.key_order:
        #                for column in self.reportingColumns:
        #                    dummy += "&\multicolumn{1}{c}{"+r"\footnotesize{"+self.columnType[column]+"}}"
        tableHead_tex += r"\\"

        rs_txt += r"\begin{longtable}[l]{"
        #### Spaltenbreite in Abhaengigkeit der dargestellten Codes.
        rs_txt += "p{1cm}" if self._display_label == "code" else r">{{\raggedright}}p{{{width}cm}}".format(
            width=mainsplit_width)
        rs_txt += "r" * data_columns_count + "}\n"
        # ## Ueberschrift, mit Pruefung, dass nur die erste Tabelle jeweils in Inhaltsverzeichnis kommt.
        rs_txt += r"\caption"
        if self.lagTitle == rs.title:
            rs_txt += r"[]"

        insert = None
        if not rs.title:
            rs.title = rs.split_main.variables[0]  # falsche reihenfolge??? #XXXX  waru mnicht alles;
        elif self.debug_content:
            insert = " [{}]".format(rs.split_main.variables[0])  # reihenfolge??? #XXXX  waru mnicht alles;
            pass
        if not insert:
            insert = ""
        self.lagTitle = str(rs.title)
        if 'section' in rs.content_misc.keys():
            rs_txt += r"{" + self._convert_symbols2tex(rs.content_misc['section'] + " -- " + rs.title) + r"}\\ "
        else:
            if rs.title is None:
                rs.title = ''
            rs_txt += r"{" + self._convert_symbols2tex(rs.title + insert) + r"}\\ "
        rs_txt += r"\toprule[1.5pt]\\ " + tableHead_tex + r"\midrule[1pt]\addlinespace \endfirsthead "
        head_last = r"\caption[]{" + self._convert_symbols2tex(rs.title) + r" \dots (Fortsetzung)}\\ "
        head_last += r"\toprule[1.5pt]\\ " + tableHead_tex + r"\midrule[1pt]\addlinespace \endhead "
        foot_first = r"\bottomrule[1.5pt] "
        foot_first += r"\multicolumn{" + str(data_columns_count + 1) + r"} {l} {\textit{Fortsetzung \ldots}}\\"
        foot_first += r"\endfoot "
        foot_last = r"\bottomrule[1.5pt] "
        if rs.content_misc.get('subTitle'):
            foot_last += r" \multicolumn{" + str(
                data_columns_count + 1) + r"}{p{0.5\linewidth}}{\footnotesize{" + self._convert_symbols2tex(
                rs.content_misc['subTitle']) + r" }}  \\"
        if rs.content_misc.get('comment'):
            foot_last += r" \multicolumn{" + str(
                data_columns_count + 1) + r"}{p{0.5\linewidth}}{\footnotesize{" + self._convert_symbols2tex(
                rs.content_misc['comment']) + r" }}  \\"
        if rs.content_misc.get('filterText'):  # TODO wieso get?
            foot_last += r" \multicolumn{{{columnCount}}}{{p{{0.5\linewidth}}}}{{\footnotesize{{Filter: ".format(
                columnCount=str(data_columns_count + 1)) + self._convert_symbols2tex(
                rs.content_misc['filterText']) + r" }}  \\"
        if rs.timestamp:
            foot_last += r" \multicolumn{" + str(
                data_columns_count + 1) + r"}{p{0.5\linewidth}}{\footnotesize{" + \
                         self._convert_symbols2tex(rs.timestamp) + r" }}  \\"
        foot_last += r"\endlastfoot "

        ###############
        ############### Ab hier Daten
        ############### 

        data_block = ""
        total = {column: {} for column in self.columns_reporting}
        i = 0

        for row in rs.split_main.key_order:
            """zeile fuer zeile Daten eintragen """
            if i > self.split_main_codes_max:
                break  # XXX TODO: sdfasd
            i = i + 1
            if self._display_label == "both":
                # dataBlock += str(row)+" "+self._convertSympols2Tex(rs.mainSplit.data[row])
                try:
                    data_block += self._convert_symbols2tex(str(row)) + " " + self._convert_symbols2tex(
                        rs.split_main[row])
                except KeyError:
                    data_block += self._convert_symbols2tex(str(row)) + " " + self._convert_symbols2tex(
                        rs.split_main.get(row, ''))

            else:
                # if isinstance(rs.mainSplit.data[row], int):
                # if isinstance(rs.mainSplit[row], int):
                data_block += rs.split_main[str(row)] if isinstance(rs.split_main[(row)],
                                                                    int) else self._convert_symbols2tex(
                    rs.split_main[row])
                # try:
                #     data_block += unicode(rs.split_main[str(row)]) if isinstance(rs.split_main[(row)], int) else self._convert_symbols2tex(
                #         rs.split_main[row])
                # except KeyError:  # ##XXXX iuebergang
                #     row = int(row)
                #     tempval = rs.split_main.get(row, '')
                #     data_block += unicode(tempval) if isinstance(tempval, int) else self._convert_symbols2tex(tempval)

                # dataBlock += unicode(rs.mainSplit.data[row])
                # else:
                #    dataBlock += self._convertSympols2Tex(rs.mainSplit[row])
                # dataBlock += self._convertSympols2Tex(rs.mainSplit.data[row])
            column_counter = 1  # # XXX kann ich das auch anders machen?
            for code in codes_head:
                for reporting_column_type in self.columns_reporting:
                    if not column_counter in total[reporting_column_type]:
                        total[reporting_column_type][column_counter] = 0
                    # # XXXXX der Abgleich mit str hier ist wohl nur aufgrund eines Fehlers notwendig. ???
                    row = str(row)
                    if code in datasets and reporting_column_type in datasets[code] and str(row) in datasets[code][
                        reporting_column_type]:
                        total[reporting_column_type][column_counter] += datasets[code][reporting_column_type][str(row)]
                        if reporting_column_type == 'COUNT':
                            data_block += "&" + str(
                                locale.format("%.0f", datasets[code][reporting_column_type][row], True))
                        else:
                            data_block += "&" + str(locale.format("%." + str(self.decimals) + "f",
                                                                  round(datasets[code][reporting_column_type][row],
                                                                        self.decimals),
                                                                  grouping=True))
                    else:
                        total[reporting_column_type][column_counter] += 0
                        data_block += "&"
                column_counter += 1
                self.columns_reporting
            data_block += "\\\\ \n \\addlinespace "
            if self.dataSepLines_enabled:
                data_block += r"   \midrule[0.2pt]"
        if self.dataSepLines_enabled:
            data_block = data_block[0:-15]

        block_summary_tex = r"\midrule\addlinespace "
        block_summary_tex += "GESAMT"
        column_counter = 1

        for code in codes_head:
            for reporting_column_type in self.columns_reporting:
                try:
                    if reporting_column_type == 'COUNT':
                        block_summary_tex += r"&" + str(
                            locale.format("%.0f", total[reporting_column_type][column_counter], True))
                    elif reporting_column_type == 'PCT':
                        block_summary_tex += r"&" + str(locale.format("%." + str(self.decimals) + "f", round(
                            total[reporting_column_type][column_counter], self.decimals)))
                    else:
                        block_summary_tex += r"&" + str(locale.format("%." + str(self.decimals) + "f", round(
                            total[reporting_column_type][column_counter], self.decimals), True))
                except:
                    block_summary_tex += r"&"
            column_counter += 1
        block_summary_tex += r"\\ \addlinespace "

        if 'TOTAL' in datasets and datasets['TOTAL']:
            if 'COUNT_TOTAL' in datasets['TOTAL']:
                block_summary_tex += "Fallzahl"
                for code in codes_head:
                    if self.columns_reporting == ['PCT_GN_W']:
                        datasets[code]['COUNT_TOTAL'] = datasets[code]['COUNT_GN_TOTAL']
                        datasets[code]['COUNT_W_TOTAL'] = datasets[code]['COUNT_GN_W_TOTAL']
                    if datasets[code]:
                        count_total = str(locale.format("%.0f", datasets[code]['COUNT_TOTAL'], True))
                    else:
                        count_total = str(0)
                    for reporting_column_type in self.columns_reporting:
                        if reporting_column_type == "COUNT":
                            block_summary_tex += r"&" + count_total
                        elif reporting_column_type == "COUNT_W":
                            try:
                                block_summary_tex += r"&" + str(locale.format("%." + str(self.decimals) + "f",
                                                                              round(datasets[code]['COUNT_W_TOTAL'],
                                                                                    self.decimals), True))
                            except:
                                block_summary_tex += r"&"
                        else:
                            block_summary_tex += r"&"

                            # if len(self.reportingColumns) > 1:
                            #    summaryBlock_tex += r"&\multicolumn{"+str(len(self.reportingColumns))+"}{l}{"
                            #                 if datasets[code].data:
                            #                     summaryBlock_tex += count_total
                            # summaryBlock_tex += str(datasets[code].data['COUNT_TOTAL']) #### nachbessern
                            # except:
                            #    summaryBlock_tex += str(0)
                            #                 else:
                            #                     summaryBlock_tex += str(0)
                            #                 summaryBlock_tex += r"}"

                            #             else:
                            #                 summaryBlock_tex += r"&"+count_total
                block_summary_tex += r"\\ \addlinespace "

            if 'AVG_TOTAL' in datasets['TOTAL']:
                block_summary_tex += "Durchschnitt"
                for code in codes_head:
                    if datasets[code]:
                        count_total = str(
                            locale.format("%." + str(self.decimals) + "f", datasets[code]['AVG_TOTAL'], True))
                    else:
                        count_total = str(0)
                    for reporting_column_type in self.columns_reporting:
                        if reporting_column_type == "COUNT":
                            block_summary_tex += r"&" + count_total
                        elif reporting_column_type == "COUNT_W":
                            try:
                                block_summary_tex += r"&" + str(locale.format("%." + str(self.decimals) + "f",
                                                                              round(datasets[code]['COUNT_W_TOTAL'],
                                                                                    self.decimals), True))
                            except:
                                block_summary_tex += r"&"
                        else:
                            block_summary_tex += r"&"

                            # if len(self.reportingColumns) > 1:
                            #    summaryBlock_tex += r"&\multicolumn{"+str(len(self.reportingColumns))+"}{l}{"
                            #
                            #                 if datasets[code].data:
                            #                     summaryBlock_tex += count_total
                            # summaryBlock_tex += str(datasets[code].data['COUNT_TOTAL']) #### nachbessern
                            # except:
                            #    summaryBlock_tex += str(0)
                            #                 else:
                            #                     summaryBlock_tex += str(0)
                            #                 summaryBlock_tex += r"}"

                            #             else:
                            #                 summaryBlock_tex += r"&"+count_total

                block_summary_tex += r"\\ \addlinespace "

            # if 'COUNT_W_TOTAL'  in datasets['TOTAL'].data.keys():
            #    summaryBlock_tex += "Fallzahl gewichtet"
            #    for code in codes_head:
            #       if len(self.reportingColumns) > 1:
            #            summaryBlock_tex += r"&\multicolumn{"+str(len(self.reportingColumns))+"}{l}{"
            #            summaryBlock_tex += str(locale.format("%."+str(self.decimals)+"f", round(datasets[code].data['COUNT_W_TOTAL'], self.decimals)))
            #            summaryBlock_tex += r"}"
            #        else:
            #            summaryBlock_tex += r"&"+str(locale.format("%."+str(self.decimals)+"f", round(datasets[code].data['COUNT_W_TOTAL'], self.decimals)))
            #                    #summaryBlock_tex += r"&0"
            #    summaryBlock_tex +=r"\\ \addlinespace "

            if False:
                aready_displayed = ["TOTAL", ]
                block_summary_tex += "Zusammenhang"
                for code in codes_head:
                    if code == "TOTAL":
                        block_summary_tex += r"&\multicolumn{" + str(len(self.columns_reporting)) + "}{l}{/}"
                    elif code[0:code.rfind('_')] not in aready_displayed:
                        aready_displayed.append(code[0:code.rfind('_')])
                        statistics = rs.subSplits[code[0:code.rfind('_')]].statistics
                        block_summary_tex += r"&\multicolumn{" + str(len(self.columns_reporting) * len(
                            rs.subSplits[code[0:code.rfind('_')]].key_order)) + "}{l}{"
                        block_summary_tex += "P($\chi2$): " + str(
                            round(statistics["chi2P"] * 100, 1)) + "\%, CramersV: " + str(
                            round(statistics["CramersV"], 2))
                        block_summary_tex += r"}"
                    else:
                        pass
                block_summary_tex += r"\\ \addlinespace "

        rs_txt += head_last + block_summary_tex + foot_first + block_summary_tex + foot_last + data_block
        rs_txt += " \end{longtable}\n\r\\newpage\n\r"
        return rs_txt